summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2011-10-20 21:10:27 +0000
committerdim <dim@FreeBSD.org>2011-10-20 21:10:27 +0000
commit7b3392326c40c3c20697816acae597ba7b3144eb (patch)
tree2cbcf22585e99f8a87d12d5ff94f392c0d266819 /lib
parent1176aa52646fe641a4243a246aa7f960c708a274 (diff)
downloadFreeBSD-src-7b3392326c40c3c20697816acae597ba7b3144eb.zip
FreeBSD-src-7b3392326c40c3c20697816acae597ba7b3144eb.tar.gz
Vendor import of llvm release_30 branch r142614:
http://llvm.org/svn/llvm-project/llvm/branches/release_30@142614
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/AliasAnalysis.cpp50
-rw-r--r--lib/Analysis/AliasAnalysisEvaluator.cpp6
-rw-r--r--lib/Analysis/AliasSetTracker.cpp107
-rw-r--r--lib/Analysis/Analysis.cpp3
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp136
-rw-r--r--lib/Analysis/BlockFrequency.cpp59
-rw-r--r--lib/Analysis/BlockFrequencyInfo.cpp63
-rw-r--r--lib/Analysis/BranchProbabilityInfo.cpp276
-rw-r--r--lib/Analysis/CMakeLists.txt8
-rw-r--r--lib/Analysis/ConstantFolding.cpp126
-rw-r--r--lib/Analysis/DIBuilder.cpp286
-rw-r--r--lib/Analysis/DbgInfoPrinter.cpp2
-rw-r--r--lib/Analysis/DebugInfo.cpp275
-rw-r--r--lib/Analysis/IPA/CMakeLists.txt6
-rw-r--r--lib/Analysis/IPA/CallGraphSCCPass.cpp7
-rw-r--r--lib/Analysis/IPA/FindUsedTypes.cpp4
-rw-r--r--lib/Analysis/IVUsers.cpp3
-rw-r--r--lib/Analysis/InlineCost.cpp124
-rw-r--r--lib/Analysis/InstructionSimplify.cpp173
-rw-r--r--lib/Analysis/LazyValueInfo.cpp6
-rw-r--r--lib/Analysis/Lint.cpp8
-rw-r--r--lib/Analysis/Loads.cpp12
-rw-r--r--lib/Analysis/LoopDependenceAnalysis.cpp8
-rw-r--r--lib/Analysis/LoopInfo.cpp296
-rw-r--r--lib/Analysis/LoopPass.cpp108
-rw-r--r--lib/Analysis/MemDepPrinter.cpp80
-rw-r--r--lib/Analysis/MemoryBuiltins.cpp16
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp48
-rw-r--r--lib/Analysis/PHITransAddr.cpp8
-rw-r--r--lib/Analysis/PathNumbering.cpp2
-rw-r--r--lib/Analysis/RegionPass.cpp6
-rw-r--r--lib/Analysis/ScalarEvolution.cpp844
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp393
-rw-r--r--lib/Analysis/ScalarEvolutionNormalization.cpp101
-rw-r--r--lib/Analysis/ValueTracking.cpp39
-rw-r--r--lib/Archive/CMakeLists.txt6
-rw-r--r--lib/AsmParser/CMakeLists.txt5
-rw-r--r--lib/AsmParser/LLLexer.cpp30
-rw-r--r--lib/AsmParser/LLParser.cpp352
-rw-r--r--lib/AsmParser/LLParser.h26
-rw-r--r--lib/AsmParser/LLToken.h15
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp316
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.h8
-rw-r--r--lib/Bitcode/Reader/CMakeLists.txt5
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp156
-rw-r--r--lib/Bitcode/Writer/CMakeLists.txt5
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.cpp6
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.h8
-rw-r--r--lib/CMakeLists.txt4
-rw-r--r--lib/CodeGen/Analysis.cpp16
-rw-r--r--lib/CodeGen/AsmPrinter/ARMException.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp155
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp15
-rw-r--r--lib/CodeGen/AsmPrinter/CMakeLists.txt9
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCFIException.cpp5
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp407
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h20
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp1403
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h133
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.cpp100
-rw-r--r--lib/CodeGen/AsmPrinter/Win64Exception.cpp1
-rw-r--r--lib/CodeGen/BranchFolding.cpp29
-rw-r--r--lib/CodeGen/CMakeLists.txt17
-rw-r--r--lib/CodeGen/CalcSpillWeights.cpp32
-rw-r--r--lib/CodeGen/CodeGen.cpp1
-rw-r--r--lib/CodeGen/DwarfEHPrepare.cpp67
-rw-r--r--lib/CodeGen/ELFCodeEmitter.cpp2
-rw-r--r--lib/CodeGen/ELFCodeEmitter.h4
-rw-r--r--lib/CodeGen/ELFWriter.cpp12
-rw-r--r--lib/CodeGen/ExecutionDepsFix.cpp (renamed from lib/Target/X86/SSEDomainFix.cpp)99
-rw-r--r--lib/CodeGen/ExpandPostRAPseudos.cpp (renamed from lib/CodeGen/LowerSubregs.cpp)114
-rw-r--r--lib/CodeGen/IfConversion.cpp39
-rw-r--r--lib/CodeGen/InlineSpiller.cpp434
-rw-r--r--lib/CodeGen/InterferenceCache.cpp5
-rw-r--r--lib/CodeGen/InterferenceCache.h3
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp10
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp96
-rw-r--r--lib/CodeGen/LexicalScopes.cpp335
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp71
-rw-r--r--lib/CodeGen/LiveInterval.cpp35
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp29
-rw-r--r--lib/CodeGen/LiveIntervalUnion.cpp230
-rw-r--r--lib/CodeGen/LiveIntervalUnion.h78
-rw-r--r--lib/CodeGen/LiveRangeCalc.cpp270
-rw-r--r--lib/CodeGen/LiveRangeCalc.h226
-rw-r--r--lib/CodeGen/LiveRangeEdit.cpp5
-rw-r--r--lib/CodeGen/LiveRangeEdit.h2
-rw-r--r--lib/CodeGen/LiveStackAnalysis.cpp5
-rw-r--r--lib/CodeGen/LiveVariables.cpp2
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp5
-rw-r--r--lib/CodeGen/MachineBlockFrequencyInfo.cpp (renamed from lib/CodeGen/MachineBlockFrequency.cpp)32
-rw-r--r--lib/CodeGen/MachineCSE.cpp11
-rw-r--r--lib/CodeGen/MachineFunction.cpp2
-rw-r--r--lib/CodeGen/MachineInstr.cpp366
-rw-r--r--lib/CodeGen/MachineLICM.cpp156
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp37
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp44
-rw-r--r--lib/CodeGen/MachineSink.cpp31
-rw-r--r--lib/CodeGen/MachineVerifier.cpp70
-rw-r--r--lib/CodeGen/PHIElimination.cpp3
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp17
-rw-r--r--lib/CodeGen/ProcessImplicitDefs.cpp6
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp14
-rw-r--r--lib/CodeGen/RegAllocBasic.cpp10
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp523
-rw-r--r--lib/CodeGen/RegAllocLinearScan.cpp3
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp2
-rw-r--r--lib/CodeGen/RegisterClassInfo.cpp7
-rw-r--r--lib/CodeGen/RegisterClassInfo.h15
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp630
-rw-r--r--lib/CodeGen/RegisterCoalescer.h205
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp1
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp3
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp3
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.h7
-rw-r--r--lib/CodeGen/ScoreboardHazardRecognizer.cpp1
-rw-r--r--lib/CodeGen/SelectionDAG/CMakeLists.txt10
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp121
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp48
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp47
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp186
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.h6
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp341
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp27
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp184
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp33
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h33
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp44
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp46
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp118
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp36
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp184
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp526
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h39
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp37
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp48
-rw-r--r--lib/CodeGen/ShadowStackGC.cpp52
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp514
-rw-r--r--lib/CodeGen/SpillPlacement.cpp17
-rw-r--r--lib/CodeGen/SpillPlacement.h14
-rw-r--r--lib/CodeGen/SplitKit.cpp718
-rw-r--r--lib/CodeGen/SplitKit.h171
-rw-r--r--lib/CodeGen/Splitter.cpp4
-rw-r--r--lib/CodeGen/StackProtector.cpp4
-rw-r--r--lib/CodeGen/StrongPHIElimination.cpp3
-rw-r--r--lib/CodeGen/TailDuplication.cpp1
-rw-r--r--lib/CodeGen/TargetInstrInfoImpl.cpp76
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp535
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp13
-rw-r--r--lib/CodeGen/VirtRegMap.cpp32
-rw-r--r--lib/CompilerDriver/Action.cpp134
-rw-r--r--lib/CompilerDriver/BuiltinOptions.cpp61
-rw-r--r--lib/CompilerDriver/CMakeLists.txt12
-rw-r--r--lib/CompilerDriver/CompilationGraph.cpp655
-rw-r--r--lib/CompilerDriver/Main.cpp146
-rw-r--r--lib/CompilerDriver/Makefile20
-rw-r--r--lib/CompilerDriver/Tool.cpp95
-rw-r--r--lib/DebugInfo/CMakeLists.txt16
-rw-r--r--lib/DebugInfo/DIContext.cpp24
-rw-r--r--lib/DebugInfo/DWARFAbbreviationDeclaration.cpp83
-rw-r--r--lib/DebugInfo/DWARFAbbreviationDeclaration.h54
-rw-r--r--lib/DebugInfo/DWARFAttribute.h30
-rw-r--r--lib/DebugInfo/DWARFCompileUnit.cpp238
-rw-r--r--lib/DebugInfo/DWARFCompileUnit.h111
-rw-r--r--lib/DebugInfo/DWARFContext.cpp167
-rw-r--r--lib/DebugInfo/DWARFContext.h118
-rw-r--r--lib/DebugInfo/DWARFDebugAbbrev.cpp106
-rw-r--r--lib/DebugInfo/DWARFDebugAbbrev.h73
-rw-r--r--lib/DebugInfo/DWARFDebugArangeSet.cpp150
-rw-r--r--lib/DebugInfo/DWARFDebugArangeSet.h75
-rw-r--r--lib/DebugInfo/DWARFDebugAranges.cpp223
-rw-r--r--lib/DebugInfo/DWARFDebugAranges.h98
-rw-r--r--lib/DebugInfo/DWARFDebugInfoEntry.cpp444
-rw-r--r--lib/DebugInfo/DWARFDebugInfoEntry.h135
-rw-r--r--lib/DebugInfo/DWARFDebugLine.cpp475
-rw-r--r--lib/DebugInfo/DWARFDebugLine.h190
-rw-r--r--lib/DebugInfo/DWARFFormValue.cpp427
-rw-r--r--lib/DebugInfo/DWARFFormValue.h78
-rw-r--r--lib/DebugInfo/Makefile14
-rw-r--r--lib/ExecutionEngine/CMakeLists.txt7
-rw-r--r--lib/ExecutionEngine/ExecutionEngine.cpp34
-rw-r--r--lib/ExecutionEngine/Interpreter/CMakeLists.txt8
-rw-r--r--lib/ExecutionEngine/Interpreter/Execution.cpp133
-rw-r--r--lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp34
-rw-r--r--lib/ExecutionEngine/Interpreter/Interpreter.h30
-rw-r--r--lib/ExecutionEngine/JIT/CMakeLists.txt8
-rw-r--r--lib/ExecutionEngine/JIT/Intercept.cpp1
-rw-r--r--lib/ExecutionEngine/JIT/JIT.cpp8
-rw-r--r--lib/ExecutionEngine/JIT/JIT.h5
-rw-r--r--lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp10
-rw-r--r--lib/ExecutionEngine/JIT/JITDwarfEmitter.h4
-rw-r--r--lib/ExecutionEngine/JIT/JITEmitter.cpp5
-rw-r--r--lib/ExecutionEngine/MCJIT/CMakeLists.txt8
-rw-r--r--lib/ExecutionEngine/MCJIT/Intercept.cpp1
-rw-r--r--lib/ExecutionEngine/MCJIT/MCJIT.cpp7
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt5
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h2
-rw-r--r--lib/ExecutionEngine/TargetSelect.cpp13
-rw-r--r--lib/Linker/CMakeLists.txt8
-rw-r--r--lib/Linker/LinkModules.cpp112
-rw-r--r--lib/Linker/Linker.cpp8
-rw-r--r--lib/MC/CMakeLists.txt19
-rw-r--r--lib/MC/ELFObjectWriter.cpp124
-rw-r--r--lib/MC/ELFObjectWriter.h32
-rw-r--r--lib/MC/MCAsmBackend.cpp (renamed from lib/MC/TargetAsmBackend.cpp)10
-rw-r--r--lib/MC/MCAsmInfo.cpp12
-rw-r--r--lib/MC/MCAsmInfoCOFF.cpp7
-rw-r--r--lib/MC/MCAsmInfoDarwin.cpp8
-rw-r--r--lib/MC/MCAsmStreamer.cpp57
-rw-r--r--lib/MC/MCAssembler.cpp6
-rw-r--r--lib/MC/MCAtom.cpp97
-rw-r--r--lib/MC/MCCodeGenInfo.cpp21
-rw-r--r--lib/MC/MCContext.cpp13
-rw-r--r--lib/MC/MCDisassembler/CMakeLists.txt24
-rw-r--r--lib/MC/MCDisassembler/Disassembler.cpp82
-rw-r--r--lib/MC/MCDisassembler/Disassembler.h25
-rw-r--r--lib/MC/MCDisassembler/EDDisassembler.cpp70
-rw-r--r--lib/MC/MCDisassembler/EDDisassembler.h19
-rw-r--r--lib/MC/MCDisassembler/EDInst.h2
-rw-r--r--lib/MC/MCDisassembler/EDToken.cpp8
-rw-r--r--lib/MC/MCDisassembler/EDToken.h2
-rw-r--r--lib/MC/MCDwarf.cpp95
-rw-r--r--lib/MC/MCELF.cpp1
-rw-r--r--lib/MC/MCELFStreamer.cpp17
-rw-r--r--lib/MC/MCELFStreamer.h7
-rw-r--r--lib/MC/MCExpr.cpp1
-rw-r--r--lib/MC/MCInstPrinter.cpp11
-rw-r--r--lib/MC/MCInstrAnalysis.cpp21
-rw-r--r--lib/MC/MCLoggingStreamer.cpp5
-rw-r--r--lib/MC/MCMachOStreamer.cpp20
-rw-r--r--lib/MC/MCModule.cpp45
-rw-r--r--lib/MC/MCNullStreamer.cpp4
-rw-r--r--lib/MC/MCObjectFileInfo.cpp554
-rw-r--r--lib/MC/MCObjectStreamer.cpp6
-rw-r--r--lib/MC/MCParser/AsmLexer.cpp32
-rw-r--r--lib/MC/MCParser/AsmParser.cpp198
-rw-r--r--lib/MC/MCParser/CMakeLists.txt7
-rw-r--r--lib/MC/MCParser/COFFAsmParser.cpp47
-rw-r--r--lib/MC/MCParser/ELFAsmParser.cpp59
-rw-r--r--lib/MC/MCParser/MCAsmParser.cpp6
-rw-r--r--lib/MC/MCParser/MCTargetAsmParser.cpp (renamed from lib/MC/MCParser/TargetAsmParser.cpp)8
-rw-r--r--lib/MC/MCPureStreamer.cpp9
-rw-r--r--lib/MC/MCStreamer.cpp113
-rw-r--r--lib/MC/MCTargetAsmLexer.cpp (renamed from lib/Target/TargetAsmLexer.cpp)10
-rw-r--r--lib/MC/MCWin64EH.cpp34
-rw-r--r--lib/MC/MachObjectWriter.cpp25
-rw-r--r--lib/MC/WinCOFFObjectWriter.cpp2
-rw-r--r--lib/MC/WinCOFFStreamer.cpp22
-rw-r--r--lib/Makefile2
-rw-r--r--lib/Object/Archive.cpp172
-rw-r--r--lib/Object/Binary.cpp11
-rw-r--r--lib/Object/CMakeLists.txt6
-rw-r--r--lib/Object/COFFObjectFile.cpp239
-rw-r--r--lib/Object/ELFObjectFile.cpp773
-rw-r--r--lib/Object/MachOObject.cpp38
-rw-r--r--lib/Object/MachOObjectFile.cpp316
-rw-r--r--lib/Object/Object.cpp4
-rw-r--r--lib/Object/ObjectFile.cpp1
-rw-r--r--lib/Support/APFloat.cpp31
-rw-r--r--lib/Support/APInt.cpp56
-rw-r--r--lib/Support/Atomic.cpp10
-rw-r--r--lib/Support/BlockFrequency.cpp126
-rw-r--r--lib/Support/BranchProbability.cpp3
-rw-r--r--lib/Support/CMakeLists.txt4
-rw-r--r--lib/Support/CommandLine.cpp66
-rw-r--r--lib/Support/ConstantRange.cpp25
-rw-r--r--lib/Support/CrashRecoveryContext.cpp68
-rw-r--r--lib/Support/DataExtractor.cpp175
-rw-r--r--lib/Support/Disassembler.cpp2
-rw-r--r--lib/Support/Dwarf.cpp49
-rw-r--r--lib/Support/DynamicLibrary.cpp99
-rw-r--r--lib/Support/FoldingSet.cpp6
-rw-r--r--lib/Support/Host.cpp4
-rw-r--r--lib/Support/IncludeFile.cpp2
-rw-r--r--lib/Support/Memory.cpp6
-rw-r--r--lib/Support/MemoryBuffer.cpp8
-rw-r--r--lib/Support/MemoryObject.cpp7
-rw-r--r--lib/Support/Mutex.cpp2
-rw-r--r--lib/Support/Path.cpp2
-rw-r--r--lib/Support/PathV2.cpp36
-rw-r--r--lib/Support/PrettyStackTrace.cpp2
-rw-r--r--lib/Support/RWMutex.cpp2
-rw-r--r--lib/Support/SearchForAddressOfSpecialSymbol.cpp15
-rw-r--r--lib/Support/StringExtras.cpp9
-rw-r--r--lib/Support/StringRef.cpp19
-rw-r--r--lib/Support/TargetRegistry.cpp32
-rw-r--r--lib/Support/ThreadLocal.cpp2
-rw-r--r--lib/Support/Threading.cpp41
-rw-r--r--lib/Support/Triple.cpp44
-rw-r--r--lib/Support/Twine.cpp60
-rw-r--r--lib/Support/Unix/Host.inc1
-rw-r--r--lib/Support/Unix/Path.inc4
-rw-r--r--lib/Support/Unix/PathV2.inc22
-rw-r--r--lib/Support/Unix/Process.inc4
-rw-r--r--lib/Support/Windows/DynamicLibrary.inc76
-rw-r--r--lib/Support/Windows/Memory.inc57
-rw-r--r--lib/Support/Windows/PathV2.inc51
-rw-r--r--lib/Support/Windows/Process.inc5
-rw-r--r--lib/Support/Windows/RWMutex.inc98
-rw-r--r--lib/Support/Windows/Signals.inc224
-rw-r--r--lib/Support/Windows/Windows.h6
-rw-r--r--lib/Support/raw_ostream.cpp7
-rw-r--r--lib/TableGen/CMakeLists.txt16
-rw-r--r--lib/TableGen/Error.cpp39
-rw-r--r--lib/TableGen/Main.cpp124
-rw-r--r--lib/TableGen/Makefile18
-rw-r--r--lib/TableGen/Record.cpp2019
-rw-r--r--lib/TableGen/TGLexer.cpp435
-rw-r--r--lib/TableGen/TGLexer.h125
-rw-r--r--lib/TableGen/TGParser.cpp2194
-rw-r--r--lib/TableGen/TGParser.h137
-rw-r--r--lib/TableGen/TableGenBackend.cpp25
-rw-r--r--lib/Target/ARM/ARM.h21
-rw-r--r--lib/Target/ARM/ARM.td23
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp194
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp522
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h161
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp287
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h19
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp35
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp23
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.cpp317
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.h189
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp61
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp93
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp31
-rw-r--r--lib/Target/ARM/ARMGlobalMerge.cpp10
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp336
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp1295
-rw-r--r--lib/Target/ARM/ARMISelLowering.h47
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td341
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp26
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td2748
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td153
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td489
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td1957
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td113
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp49
-rw-r--r--lib/Target/ARM/ARMMCInstLower.cpp2
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td24
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.cpp15
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.h17
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp5
-rw-r--r--lib/Target/ARM/ARMSubtarget.h18
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp91
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h16
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmLexer.cpp37
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp4039
-rw-r--r--lib/Target/ARM/AsmParser/CMakeLists.txt9
-rw-r--r--lib/Target/ARM/CMakeLists.txt48
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp4455
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.h99
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp3818
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassemblerCore.h336
-rw-r--r--lib/Target/ARM/Disassembler/CMakeLists.txt11
-rw-r--r--lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h2459
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp399
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.h34
-rw-r--r--lib/Target/ARM/InstPrinter/CMakeLists.txt8
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h (renamed from lib/Target/ARM/ARMAddressingModes.h)104
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp (renamed from lib/Target/ARM/ARMAsmBackend.cpp)77
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h (renamed from lib/Target/ARM/ARMBaseInfo.h)163
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h (renamed from lib/Target/ARM/ARMFixupKinds.h)0
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp11
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp (renamed from lib/Target/ARM/ARMMCCodeEmitter.cpp)386
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp (renamed from lib/Target/ARM/ARMMCExpr.cpp)0
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.h (renamed from lib/Target/ARM/ARMMCExpr.h)0
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp179
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h21
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp (renamed from lib/Target/ARM/ARMMachObjectWriter.cpp)5
-rw-r--r--lib/Target/ARM/MCTargetDesc/CMakeLists.txt14
-rw-r--r--lib/Target/ARM/Makefile5
-rw-r--r--lib/Target/ARM/NEONMoveFix.cpp149
-rw-r--r--lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp2
-rw-r--r--lib/Target/ARM/TargetInfo/CMakeLists.txt8
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp11
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp34
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp21
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp13
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp21
-rw-r--r--lib/Target/Alpha/AlphaAsmPrinter.cpp2
-rw-r--r--lib/Target/Alpha/AlphaISelDAGToDAG.cpp7
-rw-r--r--lib/Target/Alpha/AlphaISelLowering.cpp8
-rw-r--r--lib/Target/Alpha/AlphaISelLowering.h2
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.cpp1
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.td2
-rw-r--r--lib/Target/Alpha/AlphaRegisterInfo.cpp18
-rw-r--r--lib/Target/Alpha/AlphaRegisterInfo.h4
-rw-r--r--lib/Target/Alpha/AlphaSubtarget.cpp1
-rw-r--r--lib/Target/Alpha/AlphaTargetMachine.cpp12
-rw-r--r--lib/Target/Alpha/AlphaTargetMachine.h5
-rw-r--r--lib/Target/Alpha/CMakeLists.txt25
-rw-r--r--lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp35
-rw-r--r--lib/Target/Alpha/MCTargetDesc/CMakeLists.txt7
-rw-r--r--lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp2
-rw-r--r--lib/Target/Alpha/TargetInfo/CMakeLists.txt8
-rw-r--r--lib/Target/Blackfin/BlackfinAsmPrinter.cpp2
-rw-r--r--lib/Target/Blackfin/BlackfinFrameLowering.h4
-rw-r--r--lib/Target/Blackfin/BlackfinISelLowering.cpp4
-rw-r--r--lib/Target/Blackfin/BlackfinISelLowering.h2
-rw-r--r--lib/Target/Blackfin/BlackfinInstrInfo.cpp2
-rw-r--r--lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp8
-rw-r--r--lib/Target/Blackfin/BlackfinIntrinsicInfo.h4
-rw-r--r--lib/Target/Blackfin/BlackfinRegisterInfo.cpp18
-rw-r--r--lib/Target/Blackfin/BlackfinRegisterInfo.h4
-rw-r--r--lib/Target/Blackfin/BlackfinSubtarget.cpp2
-rw-r--r--lib/Target/Blackfin/BlackfinTargetMachine.cpp12
-rw-r--r--lib/Target/Blackfin/BlackfinTargetMachine.h5
-rw-r--r--lib/Target/Blackfin/CMakeLists.txt27
-rw-r--r--lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp39
-rw-r--r--lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt7
-rw-r--r--lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp2
-rw-r--r--lib/Target/Blackfin/TargetInfo/CMakeLists.txt8
-rw-r--r--lib/Target/CBackend/CBackend.cpp180
-rw-r--r--lib/Target/CBackend/CMakeLists.txt12
-rw-r--r--lib/Target/CBackend/CTargetMachine.h5
-rw-r--r--lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp4
-rw-r--r--lib/Target/CBackend/TargetInfo/CMakeLists.txt5
-rw-r--r--lib/Target/CMakeLists.txt8
-rw-r--r--lib/Target/CellSPU/CMakeLists.txt27
-rw-r--r--lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt7
-rw-r--r--lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp52
-rw-r--r--lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h4
-rw-r--r--lib/Target/CellSPU/SPUAsmPrinter.cpp2
-rw-r--r--lib/Target/CellSPU/SPUFrameLowering.cpp20
-rw-r--r--lib/Target/CellSPU/SPUFrameLowering.h14
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp16
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.h6
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.cpp4
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.td10
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.cpp19
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.h6
-rw-r--r--lib/Target/CellSPU/SPUSubtarget.cpp2
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.cpp25
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.h5
-rw-r--r--lib/Target/CellSPU/TargetInfo/CMakeLists.txt8
-rw-r--r--lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp2
-rw-r--r--lib/Target/CppBackend/CMakeLists.txt7
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp141
-rw-r--r--lib/Target/CppBackend/CPPTargetMachine.h5
-rw-r--r--lib/Target/CppBackend/TargetInfo/CMakeLists.txt4
-rw-r--r--lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp4
-rw-r--r--lib/Target/MBlaze/AsmParser/CMakeLists.txt8
-rw-r--r--lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp27
-rw-r--r--lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp24
-rw-r--r--lib/Target/MBlaze/AsmParser/Makefile2
-rw-r--r--lib/Target/MBlaze/CMakeLists.txt36
-rw-r--r--lib/Target/MBlaze/Disassembler/CMakeLists.txt10
-rw-r--r--lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp76
-rw-r--r--lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h9
-rw-r--r--lib/Target/MBlaze/InstPrinter/CMakeLists.txt9
-rw-r--r--lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp4
-rw-r--r--lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h2
-rw-r--r--lib/Target/MBlaze/MBlaze.h12
-rw-r--r--lib/Target/MBlaze/MBlazeAsmPrinter.cpp21
-rw-r--r--lib/Target/MBlaze/MBlazeFrameLowering.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeISelLowering.cpp13
-rw-r--r--lib/Target/MBlaze/MBlazeISelLowering.h2
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.cpp5
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.h56
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.td21
-rw-r--r--lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp8
-rw-r--r--lib/Target/MBlaze/MBlazeIntrinsicInfo.h4
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.cpp172
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.h12
-rw-r--r--lib/Target/MBlaze/MBlazeSubtarget.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.cpp50
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.h5
-rw-r--r--lib/Target/MBlaze/MBlazeTargetObjectFile.cpp2
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt13
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp (renamed from lib/Target/MBlaze/MBlazeAsmBackend.cpp)17
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h240
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp (renamed from lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp)8
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp93
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h11
-rw-r--r--lib/Target/MBlaze/TargetInfo/CMakeLists.txt8
-rw-r--r--lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp2
-rw-r--r--lib/Target/MSP430/CMakeLists.txt26
-rw-r--r--lib/Target/MSP430/InstPrinter/CMakeLists.txt8
-rw-r--r--lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp4
-rw-r--r--lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h4
-rw-r--r--lib/Target/MSP430/MCTargetDesc/CMakeLists.txt8
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp51
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h4
-rw-r--r--lib/Target/MSP430/MSP430AsmPrinter.cpp14
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp7
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.h4
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp2
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.cpp16
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.h5
-rw-r--r--lib/Target/MSP430/MSP430Subtarget.cpp2
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp11
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.h5
-rw-r--r--lib/Target/MSP430/TargetInfo/CMakeLists.txt8
-rw-r--r--lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp2
-rw-r--r--lib/Target/Mangler.cpp4
-rw-r--r--lib/Target/Mips/CMakeLists.txt28
-rw-r--r--lib/Target/Mips/InstPrinter/CMakeLists.txt8
-rw-r--r--lib/Target/Mips/InstPrinter/Makefile2
-rw-r--r--lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp6
-rw-r--r--lib/Target/Mips/InstPrinter/MipsInstPrinter.h4
-rw-r--r--lib/Target/Mips/MCTargetDesc/CMakeLists.txt13
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp117
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h113
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h90
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp3
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp52
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp122
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h21
-rw-r--r--lib/Target/Mips/Mips.h3
-rw-r--r--lib/Target/Mips/Mips.td32
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td214
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp42
-rw-r--r--lib/Target/Mips/MipsCallingConv.td55
-rw-r--r--lib/Target/Mips/MipsCodeEmitter.cpp245
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp204
-rw-r--r--lib/Target/Mips/MipsFrameLowering.cpp19
-rw-r--r--lib/Target/Mips/MipsFrameLowering.h5
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp185
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp848
-rw-r--r--lib/Target/Mips/MipsISelLowering.h16
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td236
-rw-r--r--lib/Target/Mips/MipsInstrFormats.td44
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp210
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h37
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td832
-rw-r--r--lib/Target/Mips/MipsJITInfo.cpp230
-rw-r--r--lib/Target/Mips/MipsJITInfo.h70
-rw-r--r--lib/Target/Mips/MipsMCInstLower.cpp64
-rw-r--r--lib/Target/Mips/MipsMCInstLower.h5
-rw-r--r--lib/Target/Mips/MipsMCSymbolRefExpr.cpp9
-rw-r--r--lib/Target/Mips/MipsMCSymbolRefExpr.h7
-rw-r--r--lib/Target/Mips/MipsMachineFunction.h9
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp205
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h4
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td119
-rw-r--r--lib/Target/Mips/MipsRelocations.h41
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp38
-rw-r--r--lib/Target/Mips/MipsSubtarget.h15
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp73
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h48
-rw-r--r--lib/Target/Mips/MipsTargetObjectFile.cpp2
-rw-r--r--lib/Target/Mips/TargetInfo/CMakeLists.txt8
-rw-r--r--lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp16
-rw-r--r--lib/Target/PTX/CMakeLists.txt32
-rw-r--r--lib/Target/PTX/InstPrinter/CMakeLists.txt13
-rw-r--r--lib/Target/PTX/InstPrinter/Makefile16
-rw-r--r--lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp192
-rw-r--r--lib/Target/PTX/InstPrinter/PTXInstPrinter.h47
-rw-r--r--lib/Target/PTX/MCTargetDesc/CMakeLists.txt9
-rw-r--r--lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h63
-rw-r--r--lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp53
-rw-r--r--lib/Target/PTX/Makefile3
-rw-r--r--lib/Target/PTX/PTX.h28
-rw-r--r--lib/Target/PTX/PTX.td28
-rw-r--r--lib/Target/PTX/PTXAsmPrinter.cpp500
-rw-r--r--lib/Target/PTX/PTXAsmPrinter.h57
-rw-r--r--lib/Target/PTX/PTXCallingConv.td29
-rw-r--r--lib/Target/PTX/PTXFPRoundingModePass.cpp179
-rw-r--r--lib/Target/PTX/PTXISelDAGToDAG.cpp182
-rw-r--r--lib/Target/PTX/PTXISelLowering.cpp273
-rw-r--r--lib/Target/PTX/PTXISelLowering.h19
-rw-r--r--lib/Target/PTX/PTXInstrFormats.td31
-rw-r--r--lib/Target/PTX/PTXInstrInfo.cpp82
-rw-r--r--lib/Target/PTX/PTXInstrInfo.td1241
-rw-r--r--lib/Target/PTX/PTXInstrLoadStore.td278
-rw-r--r--lib/Target/PTX/PTXIntrinsicInstrInfo.td78
-rw-r--r--lib/Target/PTX/PTXMCAsmStreamer.cpp15
-rw-r--r--lib/Target/PTX/PTXMCInstLower.cpp32
-rw-r--r--lib/Target/PTX/PTXMFInfoExtract.cpp36
-rw-r--r--lib/Target/PTX/PTXMachineFunctionInfo.h163
-rw-r--r--lib/Target/PTX/PTXParamManager.cpp73
-rw-r--r--lib/Target/PTX/PTXParamManager.h86
-rw-r--r--lib/Target/PTX/PTXRegAlloc.cpp58
-rw-r--r--lib/Target/PTX/PTXRegisterInfo.cpp29
-rw-r--r--lib/Target/PTX/PTXRegisterInfo.h18
-rw-r--r--lib/Target/PTX/PTXRegisterInfo.td540
-rw-r--r--lib/Target/PTX/PTXSelectionDAGInfo.cpp149
-rw-r--r--lib/Target/PTX/PTXSelectionDAGInfo.h53
-rw-r--r--lib/Target/PTX/PTXSubtarget.cpp2
-rw-r--r--lib/Target/PTX/PTXSubtarget.h11
-rw-r--r--lib/Target/PTX/PTXTargetMachine.cpp318
-rw-r--r--lib/Target/PTX/PTXTargetMachine.h60
-rw-r--r--lib/Target/PTX/TargetInfo/CMakeLists.txt8
-rw-r--r--lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp2
-rwxr-xr-xlib/Target/PTX/generate-register-td.py163
-rw-r--r--lib/Target/PowerPC/CMakeLists.txt34
-rw-r--r--lib/Target/PowerPC/InstPrinter/CMakeLists.txt8
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp11
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt12
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp (renamed from lib/Target/PowerPC/PPCAsmBackend.cpp)88
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h70
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h (renamed from lib/Target/PowerPC/PPCFixupKinds.h)0
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp6
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp (renamed from lib/Target/PowerPC/PPCMCCodeEmitter.cpp)9
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp115
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h10
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp (renamed from lib/Target/PowerPC/PPCPredicates.cpp)0
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h (renamed from lib/Target/PowerPC/PPCPredicates.h)2
-rw-r--r--lib/Target/PowerPC/PPC.h11
-rw-r--r--lib/Target/PowerPC/PPC.td4
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp14
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp2
-rw-r--r--lib/Target/PowerPC/PPCCodeEmitter.cpp4
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp17
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.h1
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp2
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp65
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h17
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp36
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td6
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp79
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h8
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp2
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp71
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h16
-rw-r--r--lib/Target/PowerPC/TargetInfo/CMakeLists.txt8
-rw-r--r--lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp2
-rw-r--r--lib/Target/README.txt10
-rw-r--r--lib/Target/Sparc/CMakeLists.txt25
-rw-r--r--lib/Target/Sparc/MCTargetDesc/CMakeLists.txt8
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp36
-rw-r--r--lib/Target/Sparc/SparcAsmPrinter.cpp2
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp8
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.cpp2
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp15
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.h4
-rw-r--r--lib/Target/Sparc/SparcSubtarget.cpp2
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp27
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.h16
-rw-r--r--lib/Target/Sparc/TargetInfo/CMakeLists.txt8
-rw-r--r--lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp2
-rw-r--r--lib/Target/SystemZ/CMakeLists.txt25
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt7
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp39
-rw-r--r--lib/Target/SystemZ/SystemZAsmPrinter.cpp4
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp1
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td2
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.cpp17
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.h4
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.cpp13
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.h5
-rw-r--r--lib/Target/SystemZ/TargetInfo/CMakeLists.txt8
-rw-r--r--lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp2
-rw-r--r--lib/Target/Target.cpp10
-rw-r--r--lib/Target/TargetAsmInfo.cpp23
-rw-r--r--lib/Target/TargetData.cpp53
-rw-r--r--lib/Target/TargetFrameLowering.cpp8
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp59
-rw-r--r--lib/Target/TargetMachine.cpp67
-rw-r--r--lib/Target/TargetRegisterInfo.cpp49
-rw-r--r--lib/Target/X86/AsmParser/CMakeLists.txt11
-rw-r--r--lib/Target/X86/AsmParser/X86AsmLexer.cpp20
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp84
-rw-r--r--lib/Target/X86/CMakeLists.txt41
-rw-r--r--lib/Target/X86/Disassembler/CMakeLists.txt10
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp77
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.h26
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.c165
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h8
-rw-r--r--lib/Target/X86/InstPrinter/CMakeLists.txt9
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp10
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.h2
-rw-r--r--lib/Target/X86/InstPrinter/X86InstComments.cpp31
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp7
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.h2
-rw-r--r--lib/Target/X86/MCTargetDesc/CMakeLists.txt13
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp (renamed from lib/Target/X86/X86AsmBackend.cpp)29
-rw-r--r--lib/Target/X86/MCTargetDesc/X86BaseInfo.h548
-rw-r--r--lib/Target/X86/MCTargetDesc/X86FixupKinds.h (renamed from lib/Target/X86/X86FixupKinds.h)0
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp (renamed from lib/Target/X86/X86MCCodeEmitter.cpp)198
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp338
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h45
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp (renamed from lib/Target/X86/X86MachObjectWriter.cpp)6
-rw-r--r--lib/Target/X86/README-SSE.txt2
-rw-r--r--lib/Target/X86/README.txt38
-rw-r--r--lib/Target/X86/TargetInfo/CMakeLists.txt8
-rw-r--r--lib/Target/X86/TargetInfo/X86TargetInfo.cpp2
-rw-r--r--lib/Target/X86/Utils/CMakeLists.txt8
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.cpp75
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.h20
-rw-r--r--lib/Target/X86/X86.h30
-rw-r--r--lib/Target/X86/X86.td78
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp23
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp36
-rw-r--r--lib/Target/X86/X86ELFWriterInfo.cpp2
-rw-r--r--lib/Target/X86/X86FastISel.cpp93
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp34
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp625
-rw-r--r--lib/Target/X86/X86FrameLowering.h7
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp16
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp2940
-rw-r--r--lib/Target/X86/X86ISelLowering.h62
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td96
-rw-r--r--lib/Target/X86/X86InstrCompiler.td102
-rw-r--r--lib/Target/X86/X86InstrExtension.td4
-rw-r--r--lib/Target/X86/X86InstrFormats.td8
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td84
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp1838
-rw-r--r--lib/Target/X86/X86InstrInfo.h553
-rw-r--r--lib/Target/X86/X86InstrInfo.td209
-rw-r--r--lib/Target/X86/X86InstrSSE.td4104
-rw-r--r--lib/Target/X86/X86InstrSystem.td114
-rw-r--r--lib/Target/X86/X86InstrVMX.td10
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp23
-rw-r--r--lib/Target/X86/X86MachineFunctionInfo.h20
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp165
-rw-r--r--lib/Target/X86/X86RegisterInfo.h25
-rw-r--r--lib/Target/X86/X86RegisterInfo.td11
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp2
-rw-r--r--lib/Target/X86/X86Subtarget.cpp78
-rw-r--r--lib/Target/X86/X86Subtarget.h36
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp173
-rw-r--r--lib/Target/X86/X86TargetMachine.h22
-rw-r--r--lib/Target/X86/X86TargetObjectFile.cpp76
-rw-r--r--lib/Target/X86/X86TargetObjectFile.h22
-rw-r--r--lib/Target/X86/X86VZeroUpper.cpp105
-rw-r--r--lib/Target/XCore/CMakeLists.txt25
-rw-r--r--lib/Target/XCore/MCTargetDesc/CMakeLists.txt7
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp48
-rw-r--r--lib/Target/XCore/TargetInfo/CMakeLists.txt8
-rw-r--r--lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp2
-rw-r--r--lib/Target/XCore/XCoreAsmPrinter.cpp60
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.cpp11
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.h2
-rw-r--r--lib/Target/XCore/XCoreISelDAGToDAG.cpp11
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp37
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h5
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp12
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.h5
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.td81
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.cpp15
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.h5
-rw-r--r--lib/Target/XCore/XCoreSubtarget.cpp2
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp10
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.h5
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp43
-rw-r--r--lib/Transforms/IPO/CMakeLists.txt12
-rw-r--r--lib/Transforms/IPO/ConstantMerge.cpp49
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp12
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp4
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp125
-rw-r--r--lib/Transforms/IPO/IPConstantPropagation.cpp2
-rw-r--r--lib/Transforms/IPO/IPO.cpp15
-rw-r--r--lib/Transforms/IPO/InlineAlways.cpp15
-rw-r--r--lib/Transforms/IPO/InlineSimple.cpp26
-rw-r--r--lib/Transforms/IPO/Inliner.cpp4
-rw-r--r--lib/Transforms/IPO/LoopExtractor.cpp73
-rw-r--r--lib/Transforms/IPO/LowerSetJmp.cpp547
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp70
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp343
-rw-r--r--lib/Transforms/IPO/PruneEH.cpp5
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp2
-rw-r--r--lib/Transforms/InstCombine/CMakeLists.txt8
-rw-r--r--lib/Transforms/InstCombine/InstCombine.h15
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp8
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp53
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp192
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp112
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp495
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp99
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp13
-rw-r--r--lib/Transforms/InstCombine/InstCombinePHI.cpp20
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp20
-rw-r--r--lib/Transforms/InstCombine/InstCombineShifts.cpp49
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp24
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp8
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp630
-rw-r--r--lib/Transforms/Instrumentation/CMakeLists.txt7
-rw-r--r--lib/Transforms/Instrumentation/EdgeProfiling.cpp2
-rw-r--r--lib/Transforms/Instrumentation/GCOVProfiling.cpp372
-rw-r--r--lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp4
-rw-r--r--lib/Transforms/Instrumentation/PathProfiling.cpp29
-rw-r--r--lib/Transforms/Instrumentation/ProfilingUtils.cpp13
-rw-r--r--lib/Transforms/Scalar/ADCE.cpp2
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt10
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp108
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp182
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp8
-rw-r--r--lib/Transforms/Scalar/GVN.cpp274
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp1142
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp4
-rw-r--r--lib/Transforms/Scalar/LICM.cpp64
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp10
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp215
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp61
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp28
-rw-r--r--lib/Transforms/Scalar/LowerAtomic.cpp173
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp14
-rw-r--r--lib/Transforms/Scalar/ObjCARC.cpp440
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp2
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp250
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp5
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp598
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp111
-rw-r--r--lib/Transforms/Scalar/Sink.cpp13
-rw-r--r--lib/Transforms/Scalar/TailDuplication.cpp373
-rw-r--r--lib/Transforms/Utils/AddrModeMatcher.cpp4
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp346
-rw-r--r--lib/Transforms/Utils/BreakCriticalEdges.cpp56
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp10
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt8
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp13
-rw-r--r--lib/Transforms/Utils/CloneModule.cpp27
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp45
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp178
-rw-r--r--lib/Transforms/Utils/Local.cpp25
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp47
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp186
-rw-r--r--lib/Transforms/Utils/LowerExpectIntrinsic.cpp4
-rw-r--r--lib/Transforms/Utils/LowerInvoke.cpp37
-rw-r--r--lib/Transforms/Utils/LowerSwitch.cpp4
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp4
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp8
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp155
-rw-r--r--lib/Transforms/Utils/SimplifyIndVar.cpp432
-rw-r--r--lib/Transforms/Utils/ValueMapper.cpp5
-rw-r--r--lib/VMCore/AsmWriter.cpp252
-rw-r--r--lib/VMCore/Attributes.cpp6
-rw-r--r--lib/VMCore/AutoUpgrade.cpp395
-rw-r--r--lib/VMCore/BasicBlock.cpp34
-rw-r--r--lib/VMCore/CMakeLists.txt5
-rw-r--r--lib/VMCore/ConstantFold.cpp209
-rw-r--r--lib/VMCore/ConstantFold.h6
-rw-r--r--lib/VMCore/Constants.cpp265
-rw-r--r--lib/VMCore/ConstantsContext.h32
-rw-r--r--lib/VMCore/Core.cpp183
-rw-r--r--lib/VMCore/DebugLoc.cpp4
-rw-r--r--lib/VMCore/Function.cpp43
-rw-r--r--lib/VMCore/GCOV.cpp281
-rw-r--r--lib/VMCore/Globals.cpp8
-rw-r--r--lib/VMCore/IRBuilder.cpp2
-rw-r--r--lib/VMCore/InlineAsm.cpp8
-rw-r--r--lib/VMCore/Instruction.cpp68
-rw-r--r--lib/VMCore/Instructions.cpp674
-rw-r--r--lib/VMCore/LLVMContextImpl.h4
-rw-r--r--lib/VMCore/Makefile4
-rw-r--r--lib/VMCore/Module.cpp48
-rw-r--r--lib/VMCore/PassManager.cpp59
-rw-r--r--lib/VMCore/PassRegistry.cpp1
-rw-r--r--lib/VMCore/Type.cpp95
-rw-r--r--lib/VMCore/Value.cpp8
-rw-r--r--lib/VMCore/ValueTypes.cpp16
-rw-r--r--lib/VMCore/Verifier.cpp284
849 files changed, 63160 insertions, 33915 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index c189a00..bd132c0 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -237,6 +237,19 @@ AliasAnalysis::Location AliasAnalysis::getLocation(const VAArgInst *VI) {
VI->getMetadata(LLVMContext::MD_tbaa));
}
+AliasAnalysis::Location
+AliasAnalysis::getLocation(const AtomicCmpXchgInst *CXI) {
+ return Location(CXI->getPointerOperand(),
+ getTypeStoreSize(CXI->getCompareOperand()->getType()),
+ CXI->getMetadata(LLVMContext::MD_tbaa));
+}
+
+AliasAnalysis::Location
+AliasAnalysis::getLocation(const AtomicRMWInst *RMWI) {
+ return Location(RMWI->getPointerOperand(),
+ getTypeStoreSize(RMWI->getValOperand()->getType()),
+ RMWI->getMetadata(LLVMContext::MD_tbaa));
+}
AliasAnalysis::Location
AliasAnalysis::getLocationForSource(const MemTransferInst *MTI) {
@@ -268,8 +281,8 @@ AliasAnalysis::getLocationForDest(const MemIntrinsic *MTI) {
AliasAnalysis::ModRefResult
AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) {
- // Be conservative in the face of volatile.
- if (L->isVolatile())
+ // Be conservative in the face of volatile/atomic.
+ if (!L->isUnordered())
return ModRef;
// If the load address doesn't alias the given address, it doesn't read
@@ -283,8 +296,8 @@ AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) {
AliasAnalysis::ModRefResult
AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) {
- // Be conservative in the face of volatile.
- if (S->isVolatile())
+ // Be conservative in the face of volatile/atomic.
+ if (!S->isUnordered())
return ModRef;
// If the store address cannot alias the pointer in question, then the
@@ -317,6 +330,33 @@ AliasAnalysis::getModRefInfo(const VAArgInst *V, const Location &Loc) {
return ModRef;
}
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(const AtomicCmpXchgInst *CX, const Location &Loc) {
+ // Acquire/Release cmpxchg has properties that matter for arbitrary addresses.
+ if (CX->getOrdering() > Monotonic)
+ return ModRef;
+
+ // If the cmpxchg address does not alias the location, it does not access it.
+ if (!alias(getLocation(CX), Loc))
+ return NoModRef;
+
+ return ModRef;
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc) {
+ // Acquire/Release atomicrmw has properties that matter for arbitrary addresses.
+ if (RMW->getOrdering() > Monotonic)
+ return ModRef;
+
+ // If the atomicrmw address does not alias the location, it does not access it.
+ if (!alias(getLocation(RMW), Loc))
+ return NoModRef;
+
+ return ModRef;
+}
+
+
// AliasAnalysis destructor: DO NOT move this to the header file for
// AliasAnalysis or else clients of the AliasAnalysis class may not depend on
// the AliasAnalysis.o file in the current .a file, causing alias analysis
@@ -341,7 +381,7 @@ void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
/// getTypeStoreSize - Return the TargetData store size for the given type,
/// if known, or a conservative value otherwise.
///
-uint64_t AliasAnalysis::getTypeStoreSize(const Type *Ty) {
+uint64_t AliasAnalysis::getTypeStoreSize(Type *Ty) {
return TD ? TD->getTypeStoreSize(Ty) : UnknownSize;
}
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index 1afc1b7..37271b9 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -171,12 +171,12 @@ bool AAEval::runOnFunction(Function &F) {
for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end();
I1 != E; ++I1) {
uint64_t I1Size = AliasAnalysis::UnknownSize;
- const Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType();
+ Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType();
if (I1ElTy->isSized()) I1Size = AA.getTypeStoreSize(I1ElTy);
for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) {
uint64_t I2Size = AliasAnalysis::UnknownSize;
- const Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType();
+ Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType();
if (I2ElTy->isSized()) I2Size = AA.getTypeStoreSize(I2ElTy);
switch (AA.alias(*I1, I1Size, *I2, I2Size)) {
@@ -207,7 +207,7 @@ bool AAEval::runOnFunction(Function &F) {
for (SetVector<Value *>::iterator V = Pointers.begin(), Ve = Pointers.end();
V != Ve; ++V) {
uint64_t Size = AliasAnalysis::UnknownSize;
- const Type *ElTy = cast<PointerType>((*V)->getType())->getElementType();
+ Type *ElTy = cast<PointerType>((*V)->getType())->getElementType();
if (ElTy->isSized()) Size = AA.getTypeStoreSize(ElTy);
switch (AA.getModRefInfo(*C, *V, Size)) {
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index 2ed6949..3fcd3b5 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -56,12 +56,12 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
AliasTy = MayAlias;
}
- if (CallSites.empty()) { // Merge call sites...
- if (!AS.CallSites.empty())
- std::swap(CallSites, AS.CallSites);
- } else if (!AS.CallSites.empty()) {
- CallSites.insert(CallSites.end(), AS.CallSites.begin(), AS.CallSites.end());
- AS.CallSites.clear();
+ if (UnknownInsts.empty()) { // Merge call sites...
+ if (!AS.UnknownInsts.empty())
+ std::swap(UnknownInsts, AS.UnknownInsts);
+ } else if (!AS.UnknownInsts.empty()) {
+ UnknownInsts.insert(UnknownInsts.end(), AS.UnknownInsts.begin(), AS.UnknownInsts.end());
+ AS.UnknownInsts.clear();
}
AS.Forward = this; // Forward across AS now...
@@ -123,13 +123,10 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
addRef(); // Entry points to alias set.
}
-void AliasSet::addCallSite(CallSite CS, AliasAnalysis &AA) {
- CallSites.push_back(CS.getInstruction());
+void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) {
+ UnknownInsts.push_back(I);
- AliasAnalysis::ModRefBehavior Behavior = AA.getModRefBehavior(CS);
- if (Behavior == AliasAnalysis::DoesNotAccessMemory)
- return;
- if (AliasAnalysis::onlyReadsMemory(Behavior)) {
+ if (!I->mayWriteToMemory()) {
AliasTy = MayAlias;
AccessTy |= Refs;
return;
@@ -147,7 +144,7 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
const MDNode *TBAAInfo,
AliasAnalysis &AA) const {
if (AliasTy == MustAlias) {
- assert(CallSites.empty() && "Illegal must alias set!");
+ assert(UnknownInsts.empty() && "Illegal must alias set!");
// If this is a set of MustAliases, only check to see if the pointer aliases
// SOME value in the set.
@@ -167,10 +164,10 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
I.getTBAAInfo())))
return true;
- // Check the call sites list and invoke list...
- if (!CallSites.empty()) {
- for (unsigned i = 0, e = CallSites.size(); i != e; ++i)
- if (AA.getModRefInfo(CallSites[i],
+ // Check the unknown instructions...
+ if (!UnknownInsts.empty()) {
+ for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i)
+ if (AA.getModRefInfo(UnknownInsts[i],
AliasAnalysis::Location(Ptr, Size, TBAAInfo)) !=
AliasAnalysis::NoModRef)
return true;
@@ -179,18 +176,20 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
return false;
}
-bool AliasSet::aliasesCallSite(CallSite CS, AliasAnalysis &AA) const {
- if (AA.doesNotAccessMemory(CS))
+bool AliasSet::aliasesUnknownInst(Instruction *Inst, AliasAnalysis &AA) const {
+ if (!Inst->mayReadOrWriteMemory())
return false;
- for (unsigned i = 0, e = CallSites.size(); i != e; ++i) {
- if (AA.getModRefInfo(getCallSite(i), CS) != AliasAnalysis::NoModRef ||
- AA.getModRefInfo(CS, getCallSite(i)) != AliasAnalysis::NoModRef)
+ for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) {
+ CallSite C1 = getUnknownInst(i), C2 = Inst;
+ if (!C1 || !C2 ||
+ AA.getModRefInfo(C1, C2) != AliasAnalysis::NoModRef ||
+ AA.getModRefInfo(C2, C1) != AliasAnalysis::NoModRef)
return true;
}
for (iterator I = begin(), E = end(); I != E; ++I)
- if (AA.getModRefInfo(CS, I.getPointer(), I.getSize()) !=
+ if (AA.getModRefInfo(Inst, I.getPointer(), I.getSize()) !=
AliasAnalysis::NoModRef)
return true;
@@ -244,10 +243,10 @@ bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size,
-AliasSet *AliasSetTracker::findAliasSetForCallSite(CallSite CS) {
+AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
AliasSet *FoundSet = 0;
for (iterator I = begin(), E = end(); I != E; ++I) {
- if (I->Forward || !I->aliasesCallSite(CS, AA))
+ if (I->Forward || !I->aliasesUnknownInst(Inst, AA))
continue;
if (FoundSet == 0) // If this is the first alias set ptr can go into.
@@ -296,22 +295,28 @@ bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) {
bool AliasSetTracker::add(LoadInst *LI) {
+ if (LI->getOrdering() > Monotonic) return addUnknown(LI);
+ AliasSet::AccessType ATy = AliasSet::Refs;
+ if (!LI->isUnordered()) ATy = AliasSet::ModRef;
bool NewPtr;
AliasSet &AS = addPointer(LI->getOperand(0),
AA.getTypeStoreSize(LI->getType()),
LI->getMetadata(LLVMContext::MD_tbaa),
- AliasSet::Refs, NewPtr);
+ ATy, NewPtr);
if (LI->isVolatile()) AS.setVolatile();
return NewPtr;
}
bool AliasSetTracker::add(StoreInst *SI) {
+ if (SI->getOrdering() > Monotonic) return addUnknown(SI);
+ AliasSet::AccessType ATy = AliasSet::Mods;
+ if (!SI->isUnordered()) ATy = AliasSet::ModRef;
bool NewPtr;
Value *Val = SI->getOperand(0);
AliasSet &AS = addPointer(SI->getOperand(1),
AA.getTypeStoreSize(Val->getType()),
SI->getMetadata(LLVMContext::MD_tbaa),
- AliasSet::Mods, NewPtr);
+ ATy, NewPtr);
if (SI->isVolatile()) AS.setVolatile();
return NewPtr;
}
@@ -325,20 +330,20 @@ bool AliasSetTracker::add(VAArgInst *VAAI) {
}
-bool AliasSetTracker::add(CallSite CS) {
- if (isa<DbgInfoIntrinsic>(CS.getInstruction()))
+bool AliasSetTracker::addUnknown(Instruction *Inst) {
+ if (isa<DbgInfoIntrinsic>(Inst))
return true; // Ignore DbgInfo Intrinsics.
- if (AA.doesNotAccessMemory(CS))
+ if (!Inst->mayReadOrWriteMemory())
return true; // doesn't alias anything
- AliasSet *AS = findAliasSetForCallSite(CS);
+ AliasSet *AS = findAliasSetForUnknownInst(Inst);
if (AS) {
- AS->addCallSite(CS, AA);
+ AS->addUnknownInst(Inst, AA);
return false;
}
AliasSets.push_back(new AliasSet());
AS = &AliasSets.back();
- AS->addCallSite(CS, AA);
+ AS->addUnknownInst(Inst, AA);
return true;
}
@@ -348,13 +353,9 @@ bool AliasSetTracker::add(Instruction *I) {
return add(LI);
if (StoreInst *SI = dyn_cast<StoreInst>(I))
return add(SI);
- if (CallInst *CI = dyn_cast<CallInst>(I))
- return add(CI);
- if (InvokeInst *II = dyn_cast<InvokeInst>(I))
- return add(II);
if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
return add(VAAI);
- return true;
+ return addUnknown(I);
}
void AliasSetTracker::add(BasicBlock &BB) {
@@ -375,8 +376,8 @@ void AliasSetTracker::add(const AliasSetTracker &AST) {
AliasSet &AS = const_cast<AliasSet&>(*I);
// If there are any call sites in the alias set, add them to this AST.
- for (unsigned i = 0, e = AS.CallSites.size(); i != e; ++i)
- add(AS.CallSites[i]);
+ for (unsigned i = 0, e = AS.UnknownInsts.size(); i != e; ++i)
+ add(AS.UnknownInsts[i]);
// Loop over all of the pointers in this alias set.
bool X;
@@ -393,7 +394,7 @@ void AliasSetTracker::add(const AliasSetTracker &AST) {
/// tracker.
void AliasSetTracker::remove(AliasSet &AS) {
// Drop all call sites.
- AS.CallSites.clear();
+ AS.UnknownInsts.clear();
// Clear the alias set.
unsigned NumRefs = 0;
@@ -453,11 +454,11 @@ bool AliasSetTracker::remove(VAArgInst *VAAI) {
return true;
}
-bool AliasSetTracker::remove(CallSite CS) {
- if (AA.doesNotAccessMemory(CS))
+bool AliasSetTracker::removeUnknown(Instruction *I) {
+ if (!I->mayReadOrWriteMemory())
return false; // doesn't alias anything
- AliasSet *AS = findAliasSetForCallSite(CS);
+ AliasSet *AS = findAliasSetForUnknownInst(I);
if (!AS) return false;
remove(*AS);
return true;
@@ -469,11 +470,9 @@ bool AliasSetTracker::remove(Instruction *I) {
return remove(LI);
if (StoreInst *SI = dyn_cast<StoreInst>(I))
return remove(SI);
- if (CallInst *CI = dyn_cast<CallInst>(I))
- return remove(CI);
if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
return remove(VAAI);
- return true;
+ return removeUnknown(I);
}
@@ -488,13 +487,13 @@ void AliasSetTracker::deleteValue(Value *PtrVal) {
// If this is a call instruction, remove the callsite from the appropriate
// AliasSet (if present).
- if (CallSite CS = PtrVal) {
- if (!AA.doesNotAccessMemory(CS)) {
+ if (Instruction *Inst = dyn_cast<Instruction>(PtrVal)) {
+ if (Inst->mayReadOrWriteMemory()) {
// Scan all the alias sets to see if this call site is contained.
for (iterator I = begin(), E = end(); I != E; ++I) {
if (I->Forward) continue;
- I->removeCallSite(CS);
+ I->removeUnknownInst(Inst);
}
}
}
@@ -571,11 +570,11 @@ void AliasSet::print(raw_ostream &OS) const {
OS << ", " << I.getSize() << ")";
}
}
- if (!CallSites.empty()) {
- OS << "\n " << CallSites.size() << " Call Sites: ";
- for (unsigned i = 0, e = CallSites.size(); i != e; ++i) {
+ if (!UnknownInsts.empty()) {
+ OS << "\n " << UnknownInsts.size() << " Unknown instructions: ";
+ for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) {
if (i) OS << ", ";
- WriteAsOperand(OS, CallSites[i]);
+ WriteAsOperand(OS, UnknownInsts[i]);
}
}
OS << "\n";
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index 71e0a83..0ba6af9 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "llvm-c/Analysis.h"
+#include "llvm-c/Initialization.h"
#include "llvm/InitializePasses.h"
#include "llvm/Analysis/Verifier.h"
#include <cstring>
@@ -23,7 +24,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeAliasSetPrinterPass(Registry);
initializeNoAAPass(Registry);
initializeBasicAliasAnalysisPass(Registry);
- initializeBlockFrequencyPass(Registry);
+ initializeBlockFrequencyInfoPass(Registry);
initializeBranchProbabilityInfoPass(Registry);
initializeCFGViewerPass(Registry);
initializeCFGPrinterPass(Registry);
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 8330ea7..af400ba 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -30,6 +30,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/ErrorHandling.h"
@@ -100,7 +101,7 @@ static bool isEscapeSource(const Value *V) {
/// getObjectSize - Return the size of the object specified by V, or
/// UnknownSize if unknown.
static uint64_t getObjectSize(const Value *V, const TargetData &TD) {
- const Type *AccessTy;
+ Type *AccessTy;
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
if (!GV->hasDefinitiveInitializer())
return AliasAnalysis::UnknownSize;
@@ -317,7 +318,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
E = GEPOp->op_end(); I != E; ++I) {
Value *Index = *I;
// Compute the (potentially symbolic) offset in bytes for this index.
- if (const StructType *STy = dyn_cast<StructType>(*GTI++)) {
+ if (StructType *STy = dyn_cast<StructType>(*GTI++)) {
// For a struct, add the member offset.
unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
if (FieldNo == 0) continue;
@@ -374,7 +375,8 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
}
if (Scale) {
- VariableGEPIndex Entry = {Index, Extension, Scale};
+ VariableGEPIndex Entry = {Index, Extension,
+ static_cast<int64_t>(Scale)};
VarIndices.push_back(Entry);
}
}
@@ -467,6 +469,7 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetLibraryInfo>();
}
virtual AliasResult alias(const Location &LocA,
@@ -549,10 +552,15 @@ namespace {
// Register this pass...
char BasicAliasAnalysis::ID = 0;
-INITIALIZE_AG_PASS(BasicAliasAnalysis, AliasAnalysis, "basicaa",
+INITIALIZE_AG_PASS_BEGIN(BasicAliasAnalysis, AliasAnalysis, "basicaa",
+ "Basic Alias Analysis (stateless AA impl)",
+ false, true, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_AG_PASS_END(BasicAliasAnalysis, AliasAnalysis, "basicaa",
"Basic Alias Analysis (stateless AA impl)",
false, true, false)
+
ImmutablePass *llvm::createBasicAliasAnalysisPass() {
return new BasicAliasAnalysis();
}
@@ -706,7 +714,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
// is impossible to alias the pointer we're checking. If not, we have to
// assume that the call could touch the pointer, even though it doesn't
// escape.
- if (!isNoAlias(Location(cast<Value>(CI)), Loc)) {
+ if (!isNoAlias(Location(*CI), Location(Object))) {
PassedAsArg = true;
break;
}
@@ -716,6 +724,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
return NoModRef;
}
+ const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfo>();
ModRefResult Min = ModRef;
// Finally, handle specific knowledge of intrinsics.
@@ -754,26 +763,6 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
// We know that memset doesn't load anything.
Min = Mod;
break;
- case Intrinsic::atomic_cmp_swap:
- case Intrinsic::atomic_swap:
- case Intrinsic::atomic_load_add:
- case Intrinsic::atomic_load_sub:
- case Intrinsic::atomic_load_and:
- case Intrinsic::atomic_load_nand:
- case Intrinsic::atomic_load_or:
- case Intrinsic::atomic_load_xor:
- case Intrinsic::atomic_load_max:
- case Intrinsic::atomic_load_min:
- case Intrinsic::atomic_load_umax:
- case Intrinsic::atomic_load_umin:
- if (TD) {
- Value *Op1 = II->getArgOperand(0);
- uint64_t Op1Size = TD->getTypeStoreSize(Op1->getType());
- MDNode *Tag = II->getMetadata(LLVMContext::MD_tbaa);
- if (isNoAlias(Location(Op1, Op1Size, Tag), Loc))
- return NoModRef;
- }
- break;
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
case Intrinsic::invariant_start: {
@@ -818,6 +807,39 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
}
}
+ // We can bound the aliasing properties of memset_pattern16 just as we can
+ // for memcpy/memset. This is particularly important because the
+ // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
+ // whenever possible.
+ else if (TLI.has(LibFunc::memset_pattern16) &&
+ CS.getCalledFunction() &&
+ CS.getCalledFunction()->getName() == "memset_pattern16") {
+ const Function *MS = CS.getCalledFunction();
+ FunctionType *MemsetType = MS->getFunctionType();
+ if (!MemsetType->isVarArg() && MemsetType->getNumParams() == 3 &&
+ isa<PointerType>(MemsetType->getParamType(0)) &&
+ isa<PointerType>(MemsetType->getParamType(1)) &&
+ isa<IntegerType>(MemsetType->getParamType(2))) {
+ uint64_t Len = UnknownSize;
+ if (const ConstantInt *LenCI = dyn_cast<ConstantInt>(CS.getArgument(2)))
+ Len = LenCI->getZExtValue();
+ const Value *Dest = CS.getArgument(0);
+ const Value *Src = CS.getArgument(1);
+ // If it can't overlap the source dest, then it doesn't modref the loc.
+ if (isNoAlias(Location(Dest, Len), Loc)) {
+ // Always reads 16 bytes of the source.
+ if (isNoAlias(Location(Src, 16), Loc))
+ return NoModRef;
+ // If it can't overlap the dest, then worst case it reads the loc.
+ Min = Ref;
+ // Always reads 16 bytes of the source.
+ } else if (isNoAlias(Location(Src, 16), Loc)) {
+ // If it can't overlap the source, then worst case it mutates the loc.
+ Min = Mod;
+ }
+ }
+ }
+
// The AliasAnalysis base class has some smarts, lets use them.
return ModRefResult(AliasAnalysis::getModRefInfo(CS, Loc) & Min);
}
@@ -913,43 +935,43 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
if (GEP1BaseOffset == 0 && GEP1VariableIndices.empty())
return MustAlias;
- // If there is a difference between the pointers, but the difference is
- // less than the size of the associated memory object, then we know
- // that the objects are partially overlapping.
+ // If there is a constant difference between the pointers, but the difference
+ // is less than the size of the associated memory object, then we know
+ // that the objects are partially overlapping. If the difference is
+ // greater, we know they do not overlap.
if (GEP1BaseOffset != 0 && GEP1VariableIndices.empty()) {
- if (GEP1BaseOffset >= 0 ?
- (V2Size != UnknownSize && (uint64_t)GEP1BaseOffset < V2Size) :
- (V1Size != UnknownSize && -(uint64_t)GEP1BaseOffset < V1Size &&
- GEP1BaseOffset != INT64_MIN))
- return PartialAlias;
+ if (GEP1BaseOffset >= 0) {
+ if (V2Size != UnknownSize) {
+ if ((uint64_t)GEP1BaseOffset < V2Size)
+ return PartialAlias;
+ return NoAlias;
+ }
+ } else {
+ if (V1Size != UnknownSize) {
+ if (-(uint64_t)GEP1BaseOffset < V1Size)
+ return PartialAlias;
+ return NoAlias;
+ }
+ }
}
- // If we have a known constant offset, see if this offset is larger than the
- // access size being queried. If so, and if no variable indices can remove
- // pieces of this constant, then we know we have a no-alias. For example,
- // &A[100] != &A.
-
- // In order to handle cases like &A[100][i] where i is an out of range
- // subscript, we have to ignore all constant offset pieces that are a multiple
- // of a scaled index. Do this by removing constant offsets that are a
- // multiple of any of our variable indices. This allows us to transform
- // things like &A[i][1] because i has a stride of (e.g.) 8 bytes but the 1
- // provides an offset of 4 bytes (assuming a <= 4 byte access).
- for (unsigned i = 0, e = GEP1VariableIndices.size();
- i != e && GEP1BaseOffset;++i)
- if (int64_t RemovedOffset = GEP1BaseOffset/GEP1VariableIndices[i].Scale)
- GEP1BaseOffset -= RemovedOffset*GEP1VariableIndices[i].Scale;
-
- // If our known offset is bigger than the access size, we know we don't have
- // an alias.
- if (GEP1BaseOffset) {
- if (GEP1BaseOffset >= 0 ?
- (V2Size != UnknownSize && (uint64_t)GEP1BaseOffset >= V2Size) :
- (V1Size != UnknownSize && -(uint64_t)GEP1BaseOffset >= V1Size &&
- GEP1BaseOffset != INT64_MIN))
+ // Try to distinguish something like &A[i][1] against &A[42][0].
+ // Grab the least significant bit set in any of the scales.
+ if (!GEP1VariableIndices.empty()) {
+ uint64_t Modulo = 0;
+ for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i)
+ Modulo |= (uint64_t)GEP1VariableIndices[i].Scale;
+ Modulo = Modulo ^ (Modulo & (Modulo - 1));
+
+ // We can compute the difference between the two addresses
+ // mod Modulo. Check whether that difference guarantees that the
+ // two locations do not alias.
+ uint64_t ModOffset = (uint64_t)GEP1BaseOffset & (Modulo - 1);
+ if (V1Size != UnknownSize && V2Size != UnknownSize &&
+ ModOffset >= V2Size && V1Size <= Modulo - ModOffset)
return NoAlias;
}
-
+
// Statically, we can see that the base objects are the same, but the
// pointers have dynamic offsets which we can't resolve. And none of our
// little tricks above worked.
diff --git a/lib/Analysis/BlockFrequency.cpp b/lib/Analysis/BlockFrequency.cpp
deleted file mode 100644
index 4b86d1d..0000000
--- a/lib/Analysis/BlockFrequency.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-//=======-------- BlockFrequency.cpp - Block Frequency Analysis -------=======//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Loops should be simplified before this analysis.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/InitializePasses.h"
-#include "llvm/Analysis/BlockFrequencyImpl.h"
-#include "llvm/Analysis/BlockFrequency.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/BranchProbabilityInfo.h"
-
-using namespace llvm;
-
-INITIALIZE_PASS_BEGIN(BlockFrequency, "block-freq", "Block Frequency Analysis",
- true, true)
-INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo)
-INITIALIZE_PASS_END(BlockFrequency, "block-freq", "Block Frequency Analysis",
- true, true)
-
-char BlockFrequency::ID = 0;
-
-
-BlockFrequency::BlockFrequency() : FunctionPass(ID) {
- initializeBlockFrequencyPass(*PassRegistry::getPassRegistry());
- BFI = new BlockFrequencyImpl<BasicBlock, Function, BranchProbabilityInfo>();
-}
-
-BlockFrequency::~BlockFrequency() {
- delete BFI;
-}
-
-void BlockFrequency::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<BranchProbabilityInfo>();
- AU.setPreservesAll();
-}
-
-bool BlockFrequency::runOnFunction(Function &F) {
- BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
- BFI->doFunction(&F, &BPI);
- return false;
-}
-
-/// getblockFreq - Return block frequency. Never return 0, value must be
-/// positive. Please note that initial frequency is equal to 1024. It means that
-/// we should not rely on the value itself, but only on the comparison to the
-/// other block frequencies. We do this to avoid using of floating points.
-///
-uint32_t BlockFrequency::getBlockFreq(BasicBlock *BB) {
- return BFI->getBlockFreq(BB);
-}
diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp
new file mode 100644
index 0000000..d16665f
--- /dev/null
+++ b/lib/Analysis/BlockFrequencyInfo.cpp
@@ -0,0 +1,63 @@
+//=======-------- BlockFrequencyInfo.cpp - Block Frequency Analysis -------=======//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loops should be simplified before this analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm/Analysis/BlockFrequencyImpl.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis",
+ true, true)
+INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo)
+INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis",
+ true, true)
+
+char BlockFrequencyInfo::ID = 0;
+
+
+BlockFrequencyInfo::BlockFrequencyInfo() : FunctionPass(ID) {
+ initializeBlockFrequencyInfoPass(*PassRegistry::getPassRegistry());
+ BFI = new BlockFrequencyImpl<BasicBlock, Function, BranchProbabilityInfo>();
+}
+
+BlockFrequencyInfo::~BlockFrequencyInfo() {
+ delete BFI;
+}
+
+void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<BranchProbabilityInfo>();
+ AU.setPreservesAll();
+}
+
+bool BlockFrequencyInfo::runOnFunction(Function &F) {
+ BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
+ BFI->doFunction(&F, &BPI);
+ return false;
+}
+
+void BlockFrequencyInfo::print(raw_ostream &O, const Module *) const {
+ if (BFI) BFI->print(O);
+}
+
+/// getblockFreq - Return block frequency. Return 0 if we don't have the
+/// information. Please note that initial frequency is equal to 1024. It means
+/// that we should not rely on the value itself, but only on the comparison to
+/// the other block frequencies. We do this to avoid using of floating points.
+///
+BlockFrequency BlockFrequencyInfo::getBlockFreq(BasicBlock *BB) const {
+ return BFI->getBlockFreq(BB);
+}
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index e39cd22..bde3b76 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -11,7 +11,10 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Constants.h"
#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Support/Debug.h"
@@ -33,7 +36,7 @@ namespace {
// private methods are hidden in the .cpp file.
class BranchProbabilityAnalysis {
- typedef std::pair<BasicBlock *, BasicBlock *> Edge;
+ typedef std::pair<const BasicBlock *, const BasicBlock *> Edge;
DenseMap<Edge, uint32_t> *Weights;
@@ -52,7 +55,7 @@ class BranchProbabilityAnalysis {
// V
// BB1<-+
// | |
- // | | (Weight = 128)
+ // | | (Weight = 124)
// V |
// BB2--+
// |
@@ -60,12 +63,21 @@ class BranchProbabilityAnalysis {
// V
// BB3
//
- // Probability of the edge BB2->BB1 = 128 / (128 + 4) = 0.9696..
- // Probability of the edge BB2->BB3 = 4 / (128 + 4) = 0.0303..
+ // Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875
+ // Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125
- static const uint32_t LBH_TAKEN_WEIGHT = 128;
+ static const uint32_t LBH_TAKEN_WEIGHT = 124;
static const uint32_t LBH_NONTAKEN_WEIGHT = 4;
+ static const uint32_t RH_TAKEN_WEIGHT = 24;
+ static const uint32_t RH_NONTAKEN_WEIGHT = 8;
+
+ static const uint32_t PH_TAKEN_WEIGHT = 20;
+ static const uint32_t PH_NONTAKEN_WEIGHT = 12;
+
+ static const uint32_t ZH_TAKEN_WEIGHT = 20;
+ static const uint32_t ZH_NONTAKEN_WEIGHT = 12;
+
// Standard weight value. Used when none of the heuristics set weight for
// the edge.
static const uint32_t NORMAL_WEIGHT = 16;
@@ -100,29 +112,6 @@ class BranchProbabilityAnalysis {
return false;
}
- // Multiply Edge Weight by two.
- void incEdgeWeight(BasicBlock *Src, BasicBlock *Dst) {
- uint32_t Weight = BP->getEdgeWeight(Src, Dst);
- uint32_t MaxWeight = getMaxWeightFor(Src);
-
- if (Weight * 2 > MaxWeight)
- BP->setEdgeWeight(Src, Dst, MaxWeight);
- else
- BP->setEdgeWeight(Src, Dst, Weight * 2);
- }
-
- // Divide Edge Weight by two.
- void decEdgeWeight(BasicBlock *Src, BasicBlock *Dst) {
- uint32_t Weight = BP->getEdgeWeight(Src, Dst);
-
- assert(Weight > 0);
- if (Weight / 2 < MIN_WEIGHT)
- BP->setEdgeWeight(Src, Dst, MIN_WEIGHT);
- else
- BP->setEdgeWeight(Src, Dst, Weight / 2);
- }
-
-
uint32_t getMaxWeightFor(BasicBlock *BB) const {
return UINT32_MAX / BB->getTerminator()->getNumSuccessors();
}
@@ -133,49 +122,119 @@ public:
: Weights(W), BP(BP), LI(LI) {
}
+ // Metadata Weights
+ bool calcMetadataWeights(BasicBlock *BB);
+
// Return Heuristics
- void calcReturnHeuristics(BasicBlock *BB);
+ bool calcReturnHeuristics(BasicBlock *BB);
// Pointer Heuristics
- void calcPointerHeuristics(BasicBlock *BB);
+ bool calcPointerHeuristics(BasicBlock *BB);
// Loop Branch Heuristics
- void calcLoopBranchHeuristics(BasicBlock *BB);
+ bool calcLoopBranchHeuristics(BasicBlock *BB);
+
+ // Zero Heurestics
+ bool calcZeroHeuristics(BasicBlock *BB);
bool runOnFunction(Function &F);
};
} // end anonymous namespace
+// Propagate existing explicit probabilities from either profile data or
+// 'expect' intrinsic processing.
+bool BranchProbabilityAnalysis::calcMetadataWeights(BasicBlock *BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ if (TI->getNumSuccessors() == 1)
+ return false;
+ if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
+ return false;
+
+ MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof);
+ if (!WeightsNode)
+ return false;
+
+ // Ensure there are weights for all of the successors. Note that the first
+ // operand to the metadata node is a name, not a weight.
+ if (WeightsNode->getNumOperands() != TI->getNumSuccessors() + 1)
+ return false;
+
+ // Build up the final weights that will be used in a temporary buffer, but
+ // don't add them until all weihts are present. Each weight value is clamped
+ // to [1, getMaxWeightFor(BB)].
+ uint32_t WeightLimit = getMaxWeightFor(BB);
+ SmallVector<uint32_t, 2> Weights;
+ Weights.reserve(TI->getNumSuccessors());
+ for (unsigned i = 1, e = WeightsNode->getNumOperands(); i != e; ++i) {
+ ConstantInt *Weight = dyn_cast<ConstantInt>(WeightsNode->getOperand(i));
+ if (!Weight)
+ return false;
+ Weights.push_back(
+ std::max<uint32_t>(1, Weight->getLimitedValue(WeightLimit)));
+ }
+ assert(Weights.size() == TI->getNumSuccessors() && "Checked above");
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ BP->setEdgeWeight(BB, TI->getSuccessor(i), Weights[i]);
+
+ return true;
+}
+
// Calculate Edge Weights using "Return Heuristics". Predict a successor which
// leads directly to Return Instruction will not be taken.
-void BranchProbabilityAnalysis::calcReturnHeuristics(BasicBlock *BB){
+bool BranchProbabilityAnalysis::calcReturnHeuristics(BasicBlock *BB){
if (BB->getTerminator()->getNumSuccessors() == 1)
- return;
+ return false;
+
+ SmallPtrSet<BasicBlock *, 4> ReturningEdges;
+ SmallPtrSet<BasicBlock *, 4> StayEdges;
for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
BasicBlock *Succ = *I;
- if (isReturningBlock(Succ)) {
- decEdgeWeight(BB, Succ);
+ if (isReturningBlock(Succ))
+ ReturningEdges.insert(Succ);
+ else
+ StayEdges.insert(Succ);
+ }
+
+ if (uint32_t numStayEdges = StayEdges.size()) {
+ uint32_t stayWeight = RH_TAKEN_WEIGHT / numStayEdges;
+ if (stayWeight < NORMAL_WEIGHT)
+ stayWeight = NORMAL_WEIGHT;
+
+ for (SmallPtrSet<BasicBlock *, 4>::iterator I = StayEdges.begin(),
+ E = StayEdges.end(); I != E; ++I)
+ BP->setEdgeWeight(BB, *I, stayWeight);
+ }
+
+ if (uint32_t numRetEdges = ReturningEdges.size()) {
+ uint32_t retWeight = RH_NONTAKEN_WEIGHT / numRetEdges;
+ if (retWeight < MIN_WEIGHT)
+ retWeight = MIN_WEIGHT;
+ for (SmallPtrSet<BasicBlock *, 4>::iterator I = ReturningEdges.begin(),
+ E = ReturningEdges.end(); I != E; ++I) {
+ BP->setEdgeWeight(BB, *I, retWeight);
}
}
+
+ return ReturningEdges.size() > 0;
}
// Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion
// between two pointer or pointer and NULL will fail.
-void BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) {
+bool BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) {
BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator());
if (!BI || !BI->isConditional())
- return;
+ return false;
Value *Cond = BI->getCondition();
ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
if (!CI || !CI->isEquality())
- return;
+ return false;
Value *LHS = CI->getOperand(0);
if (!LHS->getType()->isPointerTy())
- return;
+ return false;
assert(CI->getOperand(1)->getType()->isPointerTy());
@@ -190,29 +249,35 @@ void BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) {
if (!isProb)
std::swap(Taken, NonTaken);
- incEdgeWeight(BB, Taken);
- decEdgeWeight(BB, NonTaken);
+ BP->setEdgeWeight(BB, Taken, PH_TAKEN_WEIGHT);
+ BP->setEdgeWeight(BB, NonTaken, PH_NONTAKEN_WEIGHT);
+ return true;
}
// Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges
// as taken, exiting edges as not-taken.
-void BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) {
+bool BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) {
uint32_t numSuccs = BB->getTerminator()->getNumSuccessors();
Loop *L = LI->getLoopFor(BB);
if (!L)
- return;
+ return false;
+
+ SmallPtrSet<BasicBlock *, 8> BackEdges;
+ SmallPtrSet<BasicBlock *, 8> ExitingEdges;
+ SmallPtrSet<BasicBlock *, 8> InEdges; // Edges from header to the loop.
- SmallVector<BasicBlock *, 8> BackEdges;
- SmallVector<BasicBlock *, 8> ExitingEdges;
+ bool isHeader = BB == L->getHeader();
for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
BasicBlock *Succ = *I;
Loop *SuccL = LI->getLoopFor(Succ);
if (SuccL != L)
- ExitingEdges.push_back(Succ);
+ ExitingEdges.insert(Succ);
else if (Succ == L->getHeader())
- BackEdges.push_back(Succ);
+ BackEdges.insert(Succ);
+ else if (isHeader)
+ InEdges.insert(Succ);
}
if (uint32_t numBackEdges = BackEdges.size()) {
@@ -220,39 +285,121 @@ void BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) {
if (backWeight < NORMAL_WEIGHT)
backWeight = NORMAL_WEIGHT;
- for (SmallVector<BasicBlock *, 8>::iterator EI = BackEdges.begin(),
+ for (SmallPtrSet<BasicBlock *, 8>::iterator EI = BackEdges.begin(),
EE = BackEdges.end(); EI != EE; ++EI) {
BasicBlock *Back = *EI;
BP->setEdgeWeight(BB, Back, backWeight);
}
}
+ if (uint32_t numInEdges = InEdges.size()) {
+ uint32_t inWeight = LBH_TAKEN_WEIGHT / numInEdges;
+ if (inWeight < NORMAL_WEIGHT)
+ inWeight = NORMAL_WEIGHT;
+
+ for (SmallPtrSet<BasicBlock *, 8>::iterator EI = InEdges.begin(),
+ EE = InEdges.end(); EI != EE; ++EI) {
+ BasicBlock *Back = *EI;
+ BP->setEdgeWeight(BB, Back, inWeight);
+ }
+ }
+
uint32_t numExitingEdges = ExitingEdges.size();
if (uint32_t numNonExitingEdges = numSuccs - numExitingEdges) {
uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numNonExitingEdges;
if (exitWeight < MIN_WEIGHT)
exitWeight = MIN_WEIGHT;
- for (SmallVector<BasicBlock *, 8>::iterator EI = ExitingEdges.begin(),
+ for (SmallPtrSet<BasicBlock *, 8>::iterator EI = ExitingEdges.begin(),
EE = ExitingEdges.end(); EI != EE; ++EI) {
BasicBlock *Exiting = *EI;
BP->setEdgeWeight(BB, Exiting, exitWeight);
}
}
+
+ return true;
}
+bool BranchProbabilityAnalysis::calcZeroHeuristics(BasicBlock *BB) {
+ BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BI || !BI->isConditional())
+ return false;
+
+ Value *Cond = BI->getCondition();
+ ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
+ if (!CI)
+ return false;
+
+ Value *RHS = CI->getOperand(1);
+ ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
+ if (!CV)
+ return false;
+
+ bool isProb;
+ if (CV->isZero()) {
+ switch (CI->getPredicate()) {
+ case CmpInst::ICMP_EQ:
+ // X == 0 -> Unlikely
+ isProb = false;
+ break;
+ case CmpInst::ICMP_NE:
+ // X != 0 -> Likely
+ isProb = true;
+ break;
+ case CmpInst::ICMP_SLT:
+ // X < 0 -> Unlikely
+ isProb = false;
+ break;
+ case CmpInst::ICMP_SGT:
+ // X > 0 -> Likely
+ isProb = true;
+ break;
+ default:
+ return false;
+ }
+ } else if (CV->isOne() && CI->getPredicate() == CmpInst::ICMP_SLT) {
+ // InstCombine canonicalizes X <= 0 into X < 1.
+ // X <= 0 -> Unlikely
+ isProb = false;
+ } else if (CV->isAllOnesValue() && CI->getPredicate() == CmpInst::ICMP_SGT) {
+ // InstCombine canonicalizes X >= 0 into X > -1.
+ // X >= 0 -> Likely
+ isProb = true;
+ } else {
+ return false;
+ }
+
+ BasicBlock *Taken = BI->getSuccessor(0);
+ BasicBlock *NonTaken = BI->getSuccessor(1);
+
+ if (!isProb)
+ std::swap(Taken, NonTaken);
+
+ BP->setEdgeWeight(BB, Taken, ZH_TAKEN_WEIGHT);
+ BP->setEdgeWeight(BB, NonTaken, ZH_NONTAKEN_WEIGHT);
+
+ return true;
+}
+
+
bool BranchProbabilityAnalysis::runOnFunction(Function &F) {
for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
BasicBlock *BB = I++;
- // Only LBH uses setEdgeWeight method.
- calcLoopBranchHeuristics(BB);
+ if (calcMetadataWeights(BB))
+ continue;
+
+ if (calcLoopBranchHeuristics(BB))
+ continue;
- // PH and RH use only incEdgeWeight and decEwdgeWeight methods to
- // not efface LBH results.
- calcPointerHeuristics(BB);
- calcReturnHeuristics(BB);
+ if (calcReturnHeuristics(BB))
+ continue;
+
+ if (calcPointerHeuristics(BB))
+ continue;
+
+ calcZeroHeuristics(BB);
}
return false;
@@ -269,11 +416,11 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) {
return BPA.runOnFunction(F);
}
-uint32_t BranchProbabilityInfo::getSumForBlock(BasicBlock *BB) const {
+uint32_t BranchProbabilityInfo::getSumForBlock(const BasicBlock *BB) const {
uint32_t Sum = 0;
- for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
- BasicBlock *Succ = *I;
+ for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+ const BasicBlock *Succ = *I;
uint32_t Weight = getEdgeWeight(BB, Succ);
uint32_t PrevSum = Sum;
@@ -284,7 +431,8 @@ uint32_t BranchProbabilityInfo::getSumForBlock(BasicBlock *BB) const {
return Sum;
}
-bool BranchProbabilityInfo::isEdgeHot(BasicBlock *Src, BasicBlock *Dst) const {
+bool BranchProbabilityInfo::
+isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const {
// Hot probability is at least 4/5 = 80%
uint32_t Weight = getEdgeWeight(Src, Dst);
uint32_t Sum = getSumForBlock(Src);
@@ -321,8 +469,8 @@ BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const {
}
// Return edge's weight. If can't find it, return DEFAULT_WEIGHT value.
-uint32_t
-BranchProbabilityInfo::getEdgeWeight(BasicBlock *Src, BasicBlock *Dst) const {
+uint32_t BranchProbabilityInfo::
+getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const {
Edge E(Src, Dst);
DenseMap<Edge, uint32_t>::const_iterator I = Weights.find(E);
@@ -332,8 +480,8 @@ BranchProbabilityInfo::getEdgeWeight(BasicBlock *Src, BasicBlock *Dst) const {
return DEFAULT_WEIGHT;
}
-void BranchProbabilityInfo::setEdgeWeight(BasicBlock *Src, BasicBlock *Dst,
- uint32_t Weight) {
+void BranchProbabilityInfo::
+setEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst, uint32_t Weight) {
Weights[std::make_pair(Src, Dst)] = Weight;
DEBUG(dbgs() << "set edge " << Src->getNameStr() << " -> "
<< Dst->getNameStr() << " weight to " << Weight
@@ -342,7 +490,7 @@ void BranchProbabilityInfo::setEdgeWeight(BasicBlock *Src, BasicBlock *Dst,
BranchProbability BranchProbabilityInfo::
-getEdgeProbability(BasicBlock *Src, BasicBlock *Dst) const {
+getEdgeProbability(const BasicBlock *Src, const BasicBlock *Dst) const {
uint32_t N = getEdgeWeight(Src, Dst);
uint32_t D = getSumForBlock(Src);
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index ab846a2..e79459d 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -6,7 +6,7 @@ add_llvm_library(LLVMAnalysis
AliasSetTracker.cpp
Analysis.cpp
BasicAliasAnalysis.cpp
- BlockFrequency.cpp
+ BlockFrequencyInfo.cpp
BranchProbabilityInfo.cpp
CFGPrinter.cpp
CaptureTracking.cpp
@@ -58,4 +58,10 @@ add_llvm_library(LLVMAnalysis
ValueTracking.cpp
)
+add_llvm_library_dependencies(LLVMAnalysis
+ LLVMCore
+ LLVMSupport
+ LLVMTarget
+ )
+
add_subdirectory(IPA)
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 7fca17e..df79849 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -43,11 +43,16 @@ using namespace llvm;
/// FoldBitCast - Constant fold bitcast, symbolically evaluating it with
/// TargetData. This always returns a non-null constant, but it may be a
/// ConstantExpr if unfoldable.
-static Constant *FoldBitCast(Constant *C, const Type *DestTy,
+static Constant *FoldBitCast(Constant *C, Type *DestTy,
const TargetData &TD) {
-
- // This only handles casts to vectors currently.
- const VectorType *DestVTy = dyn_cast<VectorType>(DestTy);
+ // Catch the obvious splat cases.
+ if (C->isNullValue() && !DestTy->isX86_MMXTy())
+ return Constant::getNullValue(DestTy);
+ if (C->isAllOnesValue() && !DestTy->isX86_MMXTy())
+ return Constant::getAllOnesValue(DestTy);
+
+ // The code below only handles casts to vectors currently.
+ VectorType *DestVTy = dyn_cast<VectorType>(DestTy);
if (DestVTy == 0)
return ConstantExpr::getBitCast(C, DestTy);
@@ -69,8 +74,8 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy,
if (NumDstElt == NumSrcElt)
return ConstantExpr::getBitCast(C, DestTy);
- const Type *SrcEltTy = CV->getType()->getElementType();
- const Type *DstEltTy = DestVTy->getElementType();
+ Type *SrcEltTy = CV->getType()->getElementType();
+ Type *DstEltTy = DestVTy->getElementType();
// Otherwise, we're changing the number of elements in a vector, which
// requires endianness information to do the right thing. For example,
@@ -85,7 +90,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy,
if (DstEltTy->isFloatingPointTy()) {
// Fold to an vector of integers with same size as our FP type.
unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits();
- const Type *DestIVTy =
+ Type *DestIVTy =
VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt);
// Recursively handle this integer conversion, if possible.
C = FoldBitCast(C, DestIVTy, TD);
@@ -99,7 +104,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy,
// it to integer first.
if (SrcEltTy->isFloatingPointTy()) {
unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
- const Type *SrcIVTy =
+ Type *SrcIVTy =
VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt);
// Ask VMCore to do the conversion now that #elts line up.
C = ConstantExpr::getBitCast(C, SrcIVTy);
@@ -212,11 +217,11 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
if (!CI) return false; // Index isn't a simple constant?
if (CI->isZero()) continue; // Not adding anything.
- if (const StructType *ST = dyn_cast<StructType>(*GTI)) {
+ if (StructType *ST = dyn_cast<StructType>(*GTI)) {
// N = N + Offset
Offset += TD.getStructLayout(ST)->getElementOffset(CI->getZExtValue());
} else {
- const SequentialType *SQT = cast<SequentialType>(*GTI);
+ SequentialType *SQT = cast<SequentialType>(*GTI);
Offset += TD.getTypeAllocSize(SQT->getElementType())*CI->getSExtValue();
}
}
@@ -354,8 +359,8 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
const TargetData &TD) {
- const Type *LoadTy = cast<PointerType>(C->getType())->getElementType();
- const IntegerType *IntType = dyn_cast<IntegerType>(LoadTy);
+ Type *LoadTy = cast<PointerType>(C->getType())->getElementType();
+ IntegerType *IntType = dyn_cast<IntegerType>(LoadTy);
// If this isn't an integer load we can't fold it directly.
if (!IntType) {
@@ -363,7 +368,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
// and then bitcast the result. This can be useful for union cases. Note
// that address spaces don't matter here since we're not going to result in
// an actual new load.
- const Type *MapTy;
+ Type *MapTy;
if (LoadTy->isFloatTy())
MapTy = Type::getInt32PtrTy(C->getContext());
else if (LoadTy->isDoubleTy())
@@ -443,7 +448,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
std::string Str;
if (TD && GetConstantStringInfo(CE, Str) && !Str.empty()) {
unsigned StrLen = Str.length();
- const Type *Ty = cast<PointerType>(CE->getType())->getElementType();
+ Type *Ty = cast<PointerType>(CE->getType())->getElementType();
unsigned NumBits = Ty->getPrimitiveSizeInBits();
// Replace load with immediate integer if the result is an integer or fp
// value.
@@ -478,7 +483,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
if (GlobalVariable *GV =
dyn_cast<GlobalVariable>(GetUnderlyingObject(CE, TD))) {
if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
- const Type *ResTy = cast<PointerType>(C->getType())->getElementType();
+ Type *ResTy = cast<PointerType>(C->getType())->getElementType();
if (GV->getInitializer()->isNullValue())
return Constant::getNullValue(ResTy);
if (isa<UndefValue>(GV->getInitializer()))
@@ -536,19 +541,18 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
/// CastGEPIndices - If array indices are not pointer-sized integers,
/// explicitly cast them so that they aren't implicitly casted by the
/// getelementptr.
-static Constant *CastGEPIndices(Constant *const *Ops, unsigned NumOps,
- const Type *ResultTy,
+static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
+ Type *ResultTy,
const TargetData *TD) {
if (!TD) return 0;
- const Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext());
+ Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext());
bool Any = false;
SmallVector<Constant*, 32> NewIdxs;
- for (unsigned i = 1; i != NumOps; ++i) {
+ for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
if ((i == 1 ||
!isa<StructType>(GetElementPtrInst::getIndexedType(Ops[0]->getType(),
- reinterpret_cast<Value *const *>(Ops+1),
- i-1))) &&
+ Ops.slice(1, i-1)))) &&
Ops[i]->getType() != IntPtrTy) {
Any = true;
NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
@@ -562,7 +566,7 @@ static Constant *CastGEPIndices(Constant *const *Ops, unsigned NumOps,
if (!Any) return 0;
Constant *C =
- ConstantExpr::getGetElementPtr(Ops[0], &NewIdxs[0], NewIdxs.size());
+ ConstantExpr::getGetElementPtr(Ops[0], NewIdxs);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
C = Folded;
@@ -571,23 +575,23 @@ static Constant *CastGEPIndices(Constant *const *Ops, unsigned NumOps,
/// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP
/// constant expression, do so.
-static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
- const Type *ResultTy,
+static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
+ Type *ResultTy,
const TargetData *TD) {
Constant *Ptr = Ops[0];
if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized())
return 0;
- const Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext());
+ Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext());
// If this is a constant expr gep that is effectively computing an
// "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12'
- for (unsigned i = 1; i != NumOps; ++i)
+ for (unsigned i = 1, e = Ops.size(); i != e; ++i)
if (!isa<ConstantInt>(Ops[i])) {
// If this is "gep i8* Ptr, (sub 0, V)", fold this as:
// "inttoptr (sub (ptrtoint Ptr), V)"
- if (NumOps == 2 &&
+ if (Ops.size() == 2 &&
cast<PointerType>(ResultTy)->getElementType()->isIntegerTy(8)) {
ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[1]);
assert((CE == 0 || CE->getType() == IntPtrTy) &&
@@ -606,9 +610,10 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
}
unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy);
- APInt Offset = APInt(BitWidth,
- TD->getIndexedOffset(Ptr->getType(),
- (Value**)Ops+1, NumOps-1));
+ APInt Offset =
+ APInt(BitWidth, TD->getIndexedOffset(Ptr->getType(),
+ makeArrayRef((Value **)Ops.data() + 1,
+ Ops.size() - 1)));
Ptr = cast<Constant>(Ptr->stripPointerCasts());
// If this is a GEP of a GEP, fold it all into a single GEP.
@@ -627,9 +632,7 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
Ptr = cast<Constant>(GEP->getOperand(0));
Offset += APInt(BitWidth,
- TD->getIndexedOffset(Ptr->getType(),
- (Value**)NestedOps.data(),
- NestedOps.size()));
+ TD->getIndexedOffset(Ptr->getType(), NestedOps));
Ptr = cast<Constant>(Ptr->stripPointerCasts());
}
@@ -649,10 +652,10 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
// we eliminate over-indexing of the notional static type array bounds.
// This makes it easy to determine if the getelementptr is "inbounds".
// Also, this helps GlobalOpt do SROA on GlobalVariables.
- const Type *Ty = Ptr->getType();
+ Type *Ty = Ptr->getType();
SmallVector<Constant*, 32> NewIdxs;
do {
- if (const SequentialType *ATy = dyn_cast<SequentialType>(Ty)) {
+ if (SequentialType *ATy = dyn_cast<SequentialType>(Ty)) {
if (ATy->isPointerTy()) {
// The only pointer indexing we'll do is on the first index of the GEP.
if (!NewIdxs.empty())
@@ -665,7 +668,7 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
// Determine which element of the array the offset points into.
APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType()));
- const IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext());
+ IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext());
if (ElemSize == 0)
// The element size is 0. This may be [0 x Ty]*, so just use a zero
// index for this level and proceed to the next level to see if it can
@@ -679,7 +682,7 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
NewIdxs.push_back(ConstantInt::get(IntPtrTy, NewIdx));
}
Ty = ATy->getElementType();
- } else if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
// Determine which field of the struct the offset points into. The
// getZExtValue is at least as safe as the StructLayout API because we
// know the offset is within the struct at this point.
@@ -703,7 +706,7 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
// Create a GEP.
Constant *C =
- ConstantExpr::getGetElementPtr(Ptr, &NewIdxs[0], NewIdxs.size());
+ ConstantExpr::getGetElementPtr(Ptr, NewIdxs);
assert(cast<PointerType>(C->getType())->getElementType() == Ty &&
"Computed GetElementPtr has unexpected type!");
@@ -778,8 +781,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
cast<Constant>(EVI->getAggregateOperand()),
EVI->getIndices());
- return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
- Ops.data(), Ops.size(), TD);
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD);
}
/// ConstantFoldConstantExpression - Attempt to fold the constant expression
@@ -800,8 +802,7 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
if (CE->isCompare())
return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
TD);
- return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(),
- Ops.data(), Ops.size(), TD);
+ return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD);
}
/// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
@@ -814,8 +815,8 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
/// information, due to only being passed an opcode and operands. Constant
/// folding using this function strips this information.
///
-Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
- Constant* const* Ops, unsigned NumOps,
+Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
+ ArrayRef<Constant *> Ops,
const TargetData *TD) {
// Handle easy binops first.
if (Instruction::isBinaryOp(Opcode)) {
@@ -831,9 +832,9 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
case Instruction::ICmp:
case Instruction::FCmp: assert(0 && "Invalid for compares");
case Instruction::Call:
- if (Function *F = dyn_cast<Function>(Ops[NumOps - 1]))
+ if (Function *F = dyn_cast<Function>(Ops.back()))
if (canConstantFoldCallTo(F))
- return ConstantFoldCall(F, Ops, NumOps - 1);
+ return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1));
return 0;
case Instruction::PtrToInt:
// If the input is a inttoptr, eliminate the pair. This requires knowing
@@ -887,12 +888,12 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
case Instruction::ShuffleVector:
return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
case Instruction::GetElementPtr:
- if (Constant *C = CastGEPIndices(Ops, NumOps, DestTy, TD))
+ if (Constant *C = CastGEPIndices(Ops, DestTy, TD))
return C;
- if (Constant *C = SymbolicallyEvaluateGEP(Ops, NumOps, DestTy, TD))
+ if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, TD))
return C;
- return ConstantExpr::getGetElementPtr(Ops[0], Ops+1, NumOps-1);
+ return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1));
}
}
@@ -912,7 +913,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
// around to know if bit truncation is happening.
if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops0)) {
if (TD && Ops1->isNullValue()) {
- const Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
+ Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
if (CE0->getOpcode() == Instruction::IntToPtr) {
// Convert the integer value to the right size to ensure we get the
// proper extension or truncation.
@@ -934,7 +935,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops1)) {
if (TD && CE0->getOpcode() == CE1->getOpcode()) {
- const Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
+ Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
if (CE0->getOpcode() == Instruction::IntToPtr) {
// Convert the integer value to the right size to ensure we get the
@@ -967,7 +968,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
unsigned OpC =
Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
Constant *Ops[] = { LHS, RHS };
- return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, 2, TD);
+ return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, TD);
}
}
@@ -987,7 +988,7 @@ Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C,
// addressing...
gep_type_iterator I = gep_type_begin(CE), E = gep_type_end(CE);
for (++I; I != E; ++I)
- if (const StructType *STy = dyn_cast<StructType>(*I)) {
+ if (StructType *STy = dyn_cast<StructType>(*I)) {
ConstantInt *CU = cast<ConstantInt>(I.getOperand());
assert(CU->getZExtValue() < STy->getNumElements() &&
"Struct index out of range!");
@@ -1002,7 +1003,7 @@ Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C,
return 0;
}
} else if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand())) {
- if (const ArrayType *ATy = dyn_cast<ArrayType>(*I)) {
+ if (ArrayType *ATy = dyn_cast<ArrayType>(*I)) {
if (CI->getZExtValue() >= ATy->getNumElements())
return 0;
if (ConstantArray *CA = dyn_cast<ConstantArray>(C))
@@ -1013,7 +1014,7 @@ Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C,
C = UndefValue::get(ATy->getElementType());
else
return 0;
- } else if (const VectorType *VTy = dyn_cast<VectorType>(*I)) {
+ } else if (VectorType *VTy = dyn_cast<VectorType>(*I)) {
if (CI->getZExtValue() >= VTy->getNumElements())
return 0;
if (ConstantVector *CP = dyn_cast<ConstantVector>(C))
@@ -1101,7 +1102,7 @@ llvm::canConstantFoldCallTo(const Function *F) {
}
static Constant *ConstantFoldFP(double (*NativeFP)(double), double V,
- const Type *Ty) {
+ Type *Ty) {
sys::llvm_fenv_clearexcept();
V = NativeFP(V);
if (sys::llvm_fenv_testexcept()) {
@@ -1118,7 +1119,7 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V,
}
static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
- double V, double W, const Type *Ty) {
+ double V, double W, Type *Ty) {
sys::llvm_fenv_clearexcept();
V = NativeFP(V, W);
if (sys::llvm_fenv_testexcept()) {
@@ -1143,7 +1144,7 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
/// performed, otherwise returns the Constant value resulting from the
/// conversion.
static Constant *ConstantFoldConvertToInt(ConstantFP *Op, bool roundTowardZero,
- const Type *Ty) {
+ Type *Ty) {
assert(Op && "Called with NULL operand");
APFloat Val(Op->getValueAPF());
@@ -1167,13 +1168,12 @@ static Constant *ConstantFoldConvertToInt(ConstantFP *Op, bool roundTowardZero,
/// ConstantFoldCall - Attempt to constant fold a call to the specified function
/// with the specified arguments, returning null if unsuccessful.
Constant *
-llvm::ConstantFoldCall(Function *F,
- Constant *const *Operands, unsigned NumOperands) {
+llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
if (!F->hasName()) return 0;
StringRef Name = F->getName();
- const Type *Ty = F->getReturnType();
- if (NumOperands == 1) {
+ Type *Ty = F->getReturnType();
+ if (Operands.size() == 1) {
if (ConstantFP *Op = dyn_cast<ConstantFP>(Operands[0])) {
if (F->getIntrinsicID() == Intrinsic::convert_to_fp16) {
APFloat Val(Op->getValueAPF());
@@ -1327,7 +1327,7 @@ llvm::ConstantFoldCall(Function *F,
return 0;
}
- if (NumOperands == 2) {
+ if (Operands.size() == 2) {
if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
if (!Ty->isFloatTy() && !Ty->isDoubleTy())
return 0;
diff --git a/lib/Analysis/DIBuilder.cpp b/lib/Analysis/DIBuilder.cpp
index ac5eeeb..bfa429d 100644
--- a/lib/Analysis/DIBuilder.cpp
+++ b/lib/Analysis/DIBuilder.cpp
@@ -29,14 +29,74 @@ static Constant *GetTagConstant(LLVMContext &VMContext, unsigned Tag) {
}
DIBuilder::DIBuilder(Module &m)
- : M(m), VMContext(M.getContext()), TheCU(0), DeclareFn(0), ValueFn(0) {}
+ : M(m), VMContext(M.getContext()), TheCU(0), TempEnumTypes(0),
+ TempRetainTypes(0), TempSubprograms(0), TempGVs(0), DeclareFn(0),
+ ValueFn(0)
+{}
+
+/// finalize - Construct any deferred debug info descriptors.
+void DIBuilder::finalize() {
+ DIArray Enums = getOrCreateArray(AllEnumTypes);
+ DIType(TempEnumTypes).replaceAllUsesWith(Enums);
+
+ DIArray RetainTypes = getOrCreateArray(AllRetainTypes);
+ DIType(TempRetainTypes).replaceAllUsesWith(RetainTypes);
+
+ DIArray SPs = getOrCreateArray(AllSubprograms);
+ DIType(TempSubprograms).replaceAllUsesWith(SPs);
+ for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
+ DISubprogram SP(SPs.getElement(i));
+ if (NamedMDNode *NMD = getFnSpecificMDNode(M, SP)) {
+ SmallVector<Value *, 4> Variables;
+ for (unsigned ii = 0, ee = NMD->getNumOperands(); ii != ee; ++ii)
+ Variables.push_back(NMD->getOperand(ii));
+ if (MDNode *Temp = SP.getVariablesNodes()) {
+ DIArray AV = getOrCreateArray(Variables);
+ DIType(Temp).replaceAllUsesWith(AV);
+ }
+ NMD->eraseFromParent();
+ }
+ }
+
+ DIArray GVs = getOrCreateArray(AllGVs);
+ DIType(TempGVs).replaceAllUsesWith(GVs);
+}
+
+/// getNonCompileUnitScope - If N is compile unit return NULL otherwise return
+/// N.
+static MDNode *getNonCompileUnitScope(MDNode *N) {
+ if (DIDescriptor(N).isCompileUnit())
+ return NULL;
+ return N;
+}
/// createCompileUnit - A CompileUnit provides an anchor for all debugging
/// information generated during this instance of compilation.
-void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
- StringRef Directory, StringRef Producer,
- bool isOptimized, StringRef Flags,
+void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
+ StringRef Directory, StringRef Producer,
+ bool isOptimized, StringRef Flags,
unsigned RunTimeVer) {
+ assert (Lang <= dwarf::DW_LANG_D && Lang >= dwarf::DW_LANG_C89
+ && "Invalid Language tag");
+ assert (!Filename.empty()
+ && "Unable to create compile unit without filename");
+ Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
+ TempEnumTypes = MDNode::getTemporary(VMContext, TElts);
+ Value *THElts[] = { TempEnumTypes };
+ MDNode *EnumHolder = MDNode::get(VMContext, THElts);
+
+ TempRetainTypes = MDNode::getTemporary(VMContext, TElts);
+ Value *TRElts[] = { TempRetainTypes };
+ MDNode *RetainHolder = MDNode::get(VMContext, TRElts);
+
+ TempSubprograms = MDNode::getTemporary(VMContext, TElts);
+ Value *TSElts[] = { TempSubprograms };
+ MDNode *SPHolder = MDNode::get(VMContext, TSElts);
+
+ TempGVs = MDNode::getTemporary(VMContext, TElts);
+ Value *TVElts[] = { TempGVs };
+ MDNode *GVHolder = MDNode::get(VMContext, TVElts);
+
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit),
llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
@@ -48,7 +108,11 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
ConstantInt::get(Type::getInt1Ty(VMContext), true), // isMain
ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
MDString::get(VMContext, Flags),
- ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer)
+ ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer),
+ EnumHolder,
+ RetainHolder,
+ SPHolder,
+ GVHolder
};
TheCU = DICompileUnit(MDNode::get(VMContext, Elts));
@@ -61,17 +125,19 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
/// for a file.
DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) {
assert(TheCU && "Unable to create DW_TAG_file_type without CompileUnit");
+ assert(!Filename.empty() && "Unable to create file without name");
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_file_type),
MDString::get(VMContext, Filename),
MDString::get(VMContext, Directory),
- TheCU
+ NULL // TheCU
};
return DIFile(MDNode::get(VMContext, Elts));
}
/// createEnumerator - Create a single enumerator value.
DIEnumerator DIBuilder::createEnumerator(StringRef Name, uint64_t Val) {
+ assert(!Name.empty() && "Unable to create enumerator without name");
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_enumerator),
MDString::get(VMContext, Name),
@@ -80,16 +146,37 @@ DIEnumerator DIBuilder::createEnumerator(StringRef Name, uint64_t Val) {
return DIEnumerator(MDNode::get(VMContext, Elts));
}
-/// createBasicType - Create debugging information entry for a basic
+/// createNullPtrType - Create C++0x nullptr type.
+DIType DIBuilder::createNullPtrType(StringRef Name) {
+ assert(!Name.empty() && "Unable to create type without name");
+ // nullptr is encoded in DIBasicType format. Line number, filename,
+ // ,size, alignment, offset and flags are always empty here.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_type),
+ NULL, //TheCU,
+ MDString::get(VMContext, Name),
+ NULL, // Filename
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags;
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Encoding
+ };
+ return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createBasicType - Create debugging information entry for a basic
/// type, e.g 'char'.
-DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
+DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
uint64_t AlignInBits,
unsigned Encoding) {
+ assert(!Name.empty() && "Unable to create type without name");
// Basic types are encoded in DIBasicType format. Line number, filename,
// offset and flags are always empty here.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_base_type),
- TheCU,
+ NULL, //TheCU,
MDString::get(VMContext, Name),
NULL, // Filename
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
@@ -108,7 +195,7 @@ DIType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) {
// Qualified types are encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, Tag),
- TheCU,
+ NULL, //TheCU,
MDString::get(VMContext, StringRef()), // Empty name.
NULL, // Filename
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
@@ -127,7 +214,7 @@ DIType DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits,
// Pointer types are encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_pointer_type),
- TheCU,
+ NULL, //TheCU,
MDString::get(VMContext, Name),
NULL, // Filename
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
@@ -142,10 +229,11 @@ DIType DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits,
/// createReferenceType - Create debugging information entry for a reference.
DIType DIBuilder::createReferenceType(DIType RTy) {
+ assert(RTy.Verify() && "Unable to create reference type");
// References are encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_reference_type),
- TheCU,
+ NULL, // TheCU,
NULL, // Name
NULL, // Filename
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
@@ -165,7 +253,7 @@ DIType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File,
assert(Ty.Verify() && "Invalid typedef type!");
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_typedef),
- Context,
+ getNonCompileUnitScope(Context),
MDString::get(VMContext, Name),
File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
@@ -199,9 +287,10 @@ DIType DIBuilder::createFriend(DIType Ty, DIType FriendTy) {
}
/// createInheritance - Create debugging information entry to establish
-/// inheritnace relationship between two types.
-DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy,
+/// inheritance relationship between two types.
+DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy,
uint64_t BaseOffset, unsigned Flags) {
+ assert(Ty.Verify() && "Unable to create inheritance");
// TAG_inheritance is encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_inheritance),
@@ -219,15 +308,15 @@ DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy,
}
/// createMemberType - Create debugging information entry for a member.
-DIType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name,
- DIFile File, unsigned LineNumber,
+DIType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name,
+ DIFile File, unsigned LineNumber,
uint64_t SizeInBits, uint64_t AlignInBits,
- uint64_t OffsetInBits, unsigned Flags,
+ uint64_t OffsetInBits, unsigned Flags,
DIType Ty) {
// TAG_member is encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_member),
- Scope,
+ getNonCompileUnitScope(Scope),
MDString::get(VMContext, Name),
File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
@@ -242,17 +331,17 @@ DIType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name,
/// createObjCIVar - Create debugging information entry for Objective-C
/// instance variable.
-DIType DIBuilder::createObjCIVar(StringRef Name,
- DIFile File, unsigned LineNumber,
+DIType DIBuilder::createObjCIVar(StringRef Name,
+ DIFile File, unsigned LineNumber,
uint64_t SizeInBits, uint64_t AlignInBits,
- uint64_t OffsetInBits, unsigned Flags,
+ uint64_t OffsetInBits, unsigned Flags,
DIType Ty, StringRef PropertyName,
StringRef GetterName, StringRef SetterName,
unsigned PropertyAttributes) {
// TAG_member is encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_member),
- File, // Or TheCU ? Ty ?
+ getNonCompileUnitScope(File),
MDString::get(VMContext, Name),
File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
@@ -270,8 +359,8 @@ DIType DIBuilder::createObjCIVar(StringRef Name,
}
/// createClassType - Create debugging information entry for a class.
-DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
- DIFile File, unsigned LineNumber,
+DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
+ DIFile File, unsigned LineNumber,
uint64_t SizeInBits, uint64_t AlignInBits,
uint64_t OffsetInBits, unsigned Flags,
DIType DerivedFrom, DIArray Elements,
@@ -279,7 +368,7 @@ DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
// TAG_class_type is encoded in DICompositeType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_class_type),
- Context,
+ getNonCompileUnitScope(Context),
MDString::get(VMContext, Name),
File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
@@ -298,13 +387,13 @@ DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
/// createTemplateTypeParameter - Create debugging information for template
/// type parameter.
-DITemplateTypeParameter
+DITemplateTypeParameter
DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name,
DIType Ty, MDNode *File, unsigned LineNo,
unsigned ColumnNo) {
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_template_type_parameter),
- Context,
+ getNonCompileUnitScope(Context),
MDString::get(VMContext, Name),
Ty,
File,
@@ -316,14 +405,14 @@ DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name,
/// createTemplateValueParameter - Create debugging information for template
/// value parameter.
-DITemplateValueParameter
+DITemplateValueParameter
DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name,
DIType Ty, uint64_t Val,
MDNode *File, unsigned LineNo,
unsigned ColumnNo) {
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_template_value_parameter),
- Context,
+ getNonCompileUnitScope(Context),
MDString::get(VMContext, Name),
Ty,
ConstantInt::get(Type::getInt64Ty(VMContext), Val),
@@ -335,15 +424,15 @@ DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name,
}
/// createStructType - Create debugging information entry for a struct.
-DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name,
- DIFile File, unsigned LineNumber,
+DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name,
+ DIFile File, unsigned LineNumber,
uint64_t SizeInBits, uint64_t AlignInBits,
- unsigned Flags, DIArray Elements,
+ unsigned Flags, DIArray Elements,
unsigned RunTimeLang) {
// TAG_structure_type is encoded in DICompositeType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_structure_type),
- Context,
+ getNonCompileUnitScope(Context),
MDString::get(VMContext, Name),
File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
@@ -360,7 +449,7 @@ DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name,
}
/// createUnionType - Create debugging information entry for an union.
-DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name,
+DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name,
DIFile File,
unsigned LineNumber, uint64_t SizeInBits,
uint64_t AlignInBits, unsigned Flags,
@@ -368,7 +457,7 @@ DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name,
// TAG_union_type is encoded in DICompositeType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_union_type),
- Scope,
+ getNonCompileUnitScope(Scope),
MDString::get(VMContext, Name),
File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
@@ -389,9 +478,9 @@ DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) {
// TAG_subroutine_type is encoded in DICompositeType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type),
- File,
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
MDString::get(VMContext, ""),
- File,
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
ConstantInt::get(Type::getInt64Ty(VMContext), 0),
ConstantInt::get(Type::getInt64Ty(VMContext), 0),
@@ -405,16 +494,17 @@ DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) {
return DIType(MDNode::get(VMContext, Elts));
}
-/// createEnumerationType - Create debugging information entry for an
+/// createEnumerationType - Create debugging information entry for an
/// enumeration.
-DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name,
- DIFile File, unsigned LineNumber,
- uint64_t SizeInBits,
- uint64_t AlignInBits, DIArray Elements) {
+DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name,
+ DIFile File, unsigned LineNumber,
+ uint64_t SizeInBits,
+ uint64_t AlignInBits,
+ DIArray Elements) {
// TAG_enumeration_type is encoded in DICompositeType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type),
- Scope,
+ getNonCompileUnitScope(Scope),
MDString::get(VMContext, Name),
File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
@@ -428,20 +518,19 @@ DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name,
llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
};
MDNode *Node = MDNode::get(VMContext, Elts);
- NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum");
- NMD->addOperand(Node);
+ AllEnumTypes.push_back(Node);
return DIType(Node);
}
/// createArrayType - Create debugging information entry for an array.
-DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits,
+DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits,
DIType Ty, DIArray Subscripts) {
// TAG_array_type is encoded in DICompositeType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_array_type),
- TheCU,
+ NULL, //TheCU,
MDString::get(VMContext, ""),
- TheCU,
+ NULL, //TheCU,
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
ConstantInt::get(Type::getInt64Ty(VMContext), Size),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -456,14 +545,14 @@ DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits,
}
/// createVectorType - Create debugging information entry for a vector.
-DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits,
+DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits,
DIType Ty, DIArray Subscripts) {
// TAG_vector_type is encoded in DICompositeType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_vector_type),
- TheCU,
+ NULL, //TheCU,
MDString::get(VMContext, ""),
- TheCU,
+ NULL, //TheCU,
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
ConstantInt::get(Type::getInt64Ty(VMContext), Size),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -501,18 +590,17 @@ DIType DIBuilder::createArtificialType(DIType Ty) {
return DIType(MDNode::get(VMContext, Elts));
}
-/// retainType - Retain DIType in a module even if it is not referenced
+/// retainType - Retain DIType in a module even if it is not referenced
/// through debug info anchors.
void DIBuilder::retainType(DIType T) {
- NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.ty");
- NMD->addOperand(T);
+ AllRetainTypes.push_back(T);
}
/// createUnspecifiedParameter - Create unspeicified type descriptor
/// for the subroutine type.
DIDescriptor DIBuilder::createUnspecifiedParameter() {
- Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters)
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters)
};
return DIDescriptor(MDNode::get(VMContext, Elts));
}
@@ -532,7 +620,7 @@ DIType DIBuilder::createTemporaryType(DIFile F) {
// use here as long as DIType accepts it.
Value *Elts[] = {
GetTagConstant(VMContext, DW_TAG_base_type),
- F.getCompileUnit(),
+ TheCU,
NULL,
F
};
@@ -563,12 +651,12 @@ DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Hi) {
/// createGlobalVariable - Create a new descriptor for the specified global.
DIGlobalVariable DIBuilder::
-createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber,
+createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber,
DIType Ty, bool isLocalToUnit, llvm::Value *Val) {
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_variable),
llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
- TheCU,
+ NULL, // TheCU,
MDString::get(VMContext, Name),
MDString::get(VMContext, Name),
MDString::get(VMContext, Name),
@@ -580,22 +668,20 @@ createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber,
Val
};
MDNode *Node = MDNode::get(VMContext, Elts);
- // Create a named metadata so that we do not lose this mdnode.
- NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
- NMD->addOperand(Node);
+ AllGVs.push_back(Node);
return DIGlobalVariable(Node);
}
/// createStaticVariable - Create a new descriptor for the specified static
/// variable.
DIGlobalVariable DIBuilder::
-createStaticVariable(DIDescriptor Context, StringRef Name,
- StringRef LinkageName, DIFile F, unsigned LineNumber,
+createStaticVariable(DIDescriptor Context, StringRef Name,
+ StringRef LinkageName, DIFile F, unsigned LineNumber,
DIType Ty, bool isLocalToUnit, llvm::Value *Val) {
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_variable),
llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
- Context,
+ getNonCompileUnitScope(Context),
MDString::get(VMContext, Name),
MDString::get(VMContext, Name),
MDString::get(VMContext, LinkageName),
@@ -607,26 +693,25 @@ createStaticVariable(DIDescriptor Context, StringRef Name,
Val
};
MDNode *Node = MDNode::get(VMContext, Elts);
- // Create a named metadata so that we do not lose this mdnode.
- NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
- NMD->addOperand(Node);
+ AllGVs.push_back(Node);
return DIGlobalVariable(Node);
}
/// createVariable - Create a new descriptor for the specified variable.
DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope,
StringRef Name, DIFile File,
- unsigned LineNo, DIType Ty,
+ unsigned LineNo, DIType Ty,
bool AlwaysPreserve, unsigned Flags,
unsigned ArgNo) {
Value *Elts[] = {
GetTagConstant(VMContext, Tag),
- Scope,
+ getNonCompileUnitScope(Scope),
MDString::get(VMContext, Name),
File,
ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24))),
Ty,
- ConstantInt::get(Type::getInt32Ty(VMContext), Flags)
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ Constant::getNullValue(Type::getInt32Ty(VMContext)),
};
MDNode *Node = MDNode::get(VMContext, Elts);
if (AlwaysPreserve) {
@@ -634,13 +719,7 @@ DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope,
// to preserve variable info in such situation then stash it in a
// named mdnode.
DISubprogram Fn(getDISubprogram(Scope));
- StringRef FName = "fn";
- if (Fn.getFunction())
- FName = Fn.getFunction()->getName();
- char One = '\1';
- if (FName.startswith(StringRef(&One, 1)))
- FName = FName.substr(1);
- NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, FName);
+ NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, Fn);
FnLocals->addOperand(Node);
}
return DIVariable(Node);
@@ -655,12 +734,14 @@ DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope,
unsigned ArgNo) {
SmallVector<Value *, 15> Elts;
Elts.push_back(GetTagConstant(VMContext, Tag));
- Elts.push_back(Scope);
+ Elts.push_back(getNonCompileUnitScope(Scope)),
Elts.push_back(MDString::get(VMContext, Name));
Elts.push_back(F);
- Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24))));
+ Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext),
+ (LineNo | (ArgNo << 24))));
Elts.push_back(Ty);
Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)));
+ Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)));
Elts.append(Addr.begin(), Addr.end());
return DIVariable(MDNode::get(VMContext, Elts));
@@ -677,10 +758,15 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context,
Function *Fn,
MDNode *TParams,
MDNode *Decl) {
+ Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
+ MDNode *Temp = MDNode::getTemporary(VMContext, TElts);
+ Value *TVElts[] = { Temp };
+ MDNode *THolder = MDNode::get(VMContext, TVElts);
+
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
- Context,
+ getNonCompileUnitScope(Context),
MDString::get(VMContext, Name),
MDString::get(VMContext, Name),
MDString::get(VMContext, LinkageName),
@@ -696,13 +782,13 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context,
ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
Fn,
TParams,
- Decl
+ Decl,
+ THolder
};
MDNode *Node = MDNode::get(VMContext, Elts);
// Create a named metadata so that we do not lose this mdnode.
- NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
- NMD->addOperand(Node);
+ AllSubprograms.push_back(Node);
return DISubprogram(Node);
}
@@ -720,10 +806,15 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context,
bool isOptimized,
Function *Fn,
MDNode *TParam) {
+ Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
+ MDNode *Temp = MDNode::getTemporary(VMContext, TElts);
+ Value *TVElts[] = { Temp };
+ MDNode *THolder = MDNode::get(VMContext, TVElts);
+
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
- Context,
+ getNonCompileUnitScope(Context),
MDString::get(VMContext, Name),
MDString::get(VMContext, Name),
MDString::get(VMContext, LinkageName),
@@ -739,12 +830,10 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context,
ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
Fn,
TParam,
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ THolder
};
MDNode *Node = MDNode::get(VMContext, Elts);
-
- // Create a named metadata so that we do not lose this mdnode.
- NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
- NMD->addOperand(Node);
return DISubprogram(Node);
}
@@ -754,7 +843,7 @@ DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name,
DIFile File, unsigned LineNo) {
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_namespace),
- Scope,
+ getNonCompileUnitScope(Scope),
MDString::get(VMContext, Name),
File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)
@@ -762,13 +851,25 @@ DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name,
return DINameSpace(MDNode::get(VMContext, Elts));
}
+/// createLexicalBlockFile - This creates a new MDNode that encapsulates
+/// an existing scope with a new filename.
+DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope,
+ DIFile File) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block),
+ Scope,
+ File
+ };
+ return DILexicalBlockFile(MDNode::get(VMContext, Elts));
+}
+
DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File,
unsigned Line, unsigned Col) {
// Defeat MDNode uniqing for lexical blocks by using unique id.
static unsigned int unique_id = 0;
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block),
- Scope,
+ getNonCompileUnitScope(Scope),
ConstantInt::get(Type::getInt32Ty(VMContext), Line),
ConstantInt::get(Type::getInt32Ty(VMContext), Col),
File,
@@ -836,4 +937,3 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
VarInfo };
return CallInst::Create(ValueFn, Args, "", InsertAtEnd);
}
-
diff --git a/lib/Analysis/DbgInfoPrinter.cpp b/lib/Analysis/DbgInfoPrinter.cpp
index b23c351..cd832ab 100644
--- a/lib/Analysis/DbgInfoPrinter.cpp
+++ b/lib/Analysis/DbgInfoPrinter.cpp
@@ -171,7 +171,7 @@ static bool getLocationInfo(const Value *V, std::string &DisplayName,
void PrintDbgInfo::printVariableDeclaration(const Value *V) {
std::string DisplayName, File, Directory, Type;
- unsigned LineNo;
+ unsigned LineNo = 0;
if (!getLocationInfo(V, DisplayName, Type, LineNo, File, Directory))
return;
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index b42e946..44457d3 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -39,6 +39,9 @@ DIDescriptor::DIDescriptor(const DIFile F) : DbgNode(F.DbgNode) {
DIDescriptor::DIDescriptor(const DISubprogram F) : DbgNode(F.DbgNode) {
}
+DIDescriptor::DIDescriptor(const DILexicalBlockFile F) : DbgNode(F.DbgNode) {
+}
+
DIDescriptor::DIDescriptor(const DILexicalBlock F) : DbgNode(F.DbgNode) {
}
@@ -111,9 +114,17 @@ Function *DIDescriptor::getFunctionField(unsigned Elt) const {
unsigned DIVariable::getNumAddrElements() const {
if (getVersion() <= llvm::LLVMDebugVersion8)
return DbgNode->getNumOperands()-6;
- return DbgNode->getNumOperands()-7;
+ if (getVersion() == llvm::LLVMDebugVersion9)
+ return DbgNode->getNumOperands()-7;
+ return DbgNode->getNumOperands()-8;
}
+/// getInlinedAt - If this variable is inlined then return inline location.
+MDNode *DIVariable::getInlinedAt() const {
+ if (getVersion() <= llvm::LLVMDebugVersion9)
+ return NULL;
+ return dyn_cast_or_null<MDNode>(DbgNode->getOperand(7));
+}
//===----------------------------------------------------------------------===//
// Predicates
@@ -122,7 +133,14 @@ unsigned DIVariable::getNumAddrElements() const {
/// isBasicType - Return true if the specified tag is legal for
/// DIBasicType.
bool DIDescriptor::isBasicType() const {
- return DbgNode && getTag() == dwarf::DW_TAG_base_type;
+ if (!DbgNode) return false;
+ switch (getTag()) {
+ case dwarf::DW_TAG_base_type:
+ case dwarf::DW_TAG_unspecified_type:
+ return true;
+ default:
+ return false;
+ }
}
/// isDerivedType - Return true if the specified tag is legal for DIDerivedType.
@@ -248,9 +266,17 @@ bool DIDescriptor::isNameSpace() const {
return DbgNode && getTag() == dwarf::DW_TAG_namespace;
}
+/// isLexicalBlockFile - Return true if the specified descriptor is a
+/// lexical block with an extra file.
+bool DIDescriptor::isLexicalBlockFile() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_lexical_block &&
+ (DbgNode->getNumOperands() == 3);
+}
+
/// isLexicalBlock - Return true if the specified tag is DW_TAG_lexical_block.
bool DIDescriptor::isLexicalBlock() const {
- return DbgNode && getTag() == dwarf::DW_TAG_lexical_block;
+ return DbgNode && getTag() == dwarf::DW_TAG_lexical_block &&
+ (DbgNode->getNumOperands() > 3);
}
/// isSubrange - Return true if the specified tag is DW_TAG_subrange_type.
@@ -320,6 +346,22 @@ void DIType::replaceAllUsesWith(MDNode *D) {
}
}
+/// isUnsignedDIType - Return true if type encoding is unsigned.
+bool DIType::isUnsignedDIType() {
+ DIDerivedType DTy(DbgNode);
+ if (DTy.Verify())
+ return DTy.getTypeDerivedFrom().isUnsignedDIType();
+
+ DIBasicType BTy(DbgNode);
+ if (BTy.Verify()) {
+ unsigned Encoding = BTy.getEncoding();
+ if (Encoding == dwarf::DW_ATE_unsigned ||
+ Encoding == dwarf::DW_ATE_unsigned_char)
+ return true;
+ }
+ return false;
+}
+
/// Verify - Verify that a compile unit is well formed.
bool DICompileUnit::Verify() const {
if (!DbgNode)
@@ -335,7 +377,7 @@ bool DICompileUnit::Verify() const {
bool DIType::Verify() const {
if (!DbgNode)
return false;
- if (!getContext().Verify())
+ if (getContext() && !getContext().Verify())
return false;
unsigned Tag = getTag();
if (!isBasicType() && Tag != dwarf::DW_TAG_const_type &&
@@ -343,6 +385,7 @@ bool DIType::Verify() const {
Tag != dwarf::DW_TAG_reference_type && Tag != dwarf::DW_TAG_restrict_type
&& Tag != dwarf::DW_TAG_vector_type && Tag != dwarf::DW_TAG_array_type
&& Tag != dwarf::DW_TAG_enumeration_type
+ && Tag != dwarf::DW_TAG_subroutine_type
&& getFilename().empty())
return false;
return true;
@@ -362,12 +405,9 @@ bool DIDerivedType::Verify() const {
bool DICompositeType::Verify() const {
if (!DbgNode)
return false;
- if (!getContext().Verify())
+ if (getContext() && !getContext().Verify())
return false;
- DICompileUnit CU = getCompileUnit();
- if (!CU.Verify())
- return false;
return true;
}
@@ -376,11 +416,7 @@ bool DISubprogram::Verify() const {
if (!DbgNode)
return false;
- if (!getContext().Verify())
- return false;
-
- DICompileUnit CU = getCompileUnit();
- if (!CU.Verify())
+ if (getContext() && !getContext().Verify())
return false;
DICompositeType Ty = getType();
@@ -397,11 +433,7 @@ bool DIGlobalVariable::Verify() const {
if (getDisplayName().empty())
return false;
- if (!getContext().Verify())
- return false;
-
- DICompileUnit CU = getCompileUnit();
- if (!CU.Verify())
+ if (getContext() && !getContext().Verify())
return false;
DIType Ty = getType();
@@ -419,10 +451,7 @@ bool DIVariable::Verify() const {
if (!DbgNode)
return false;
- if (!getContext().Verify())
- return false;
-
- if (!getCompileUnit().Verify())
+ if (getContext() && !getContext().Verify())
return false;
DIType Ty = getType();
@@ -446,8 +475,6 @@ bool DINameSpace::Verify() const {
return false;
if (getName().empty())
return false;
- if (!getCompileUnit().Verify())
- return false;
return true;
}
@@ -504,9 +531,28 @@ unsigned DISubprogram::isOptimized() const {
return 0;
}
+MDNode *DISubprogram::getVariablesNodes() const {
+ if (!DbgNode || DbgNode->getNumOperands() <= 19)
+ return NULL;
+ if (MDNode *Temp = dyn_cast_or_null<MDNode>(DbgNode->getOperand(19)))
+ return dyn_cast_or_null<MDNode>(Temp->getOperand(0));
+ return NULL;
+}
+
+DIArray DISubprogram::getVariables() const {
+ if (!DbgNode || DbgNode->getNumOperands() <= 19)
+ return DIArray();
+ if (MDNode *T = dyn_cast_or_null<MDNode>(DbgNode->getOperand(19)))
+ if (MDNode *A = dyn_cast_or_null<MDNode>(T->getOperand(0)))
+ return DIArray(A);
+ return DIArray();
+}
+
StringRef DIScope::getFilename() const {
if (!DbgNode)
return StringRef();
+ if (isLexicalBlockFile())
+ return DILexicalBlockFile(DbgNode).getFilename();
if (isLexicalBlock())
return DILexicalBlock(DbgNode).getFilename();
if (isSubprogram())
@@ -526,6 +572,8 @@ StringRef DIScope::getFilename() const {
StringRef DIScope::getDirectory() const {
if (!DbgNode)
return StringRef();
+ if (isLexicalBlockFile())
+ return DILexicalBlockFile(DbgNode).getDirectory();
if (isLexicalBlock())
return DILexicalBlock(DbgNode).getDirectory();
if (isSubprogram())
@@ -542,6 +590,47 @@ StringRef DIScope::getDirectory() const {
return StringRef();
}
+DIArray DICompileUnit::getEnumTypes() const {
+ if (!DbgNode || DbgNode->getNumOperands() < 14)
+ return DIArray();
+
+ if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(10)))
+ if (MDNode *A = dyn_cast_or_null<MDNode>(N->getOperand(0)))
+ return DIArray(A);
+ return DIArray();
+}
+
+DIArray DICompileUnit::getRetainedTypes() const {
+ if (!DbgNode || DbgNode->getNumOperands() < 14)
+ return DIArray();
+
+ if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(11)))
+ if (MDNode *A = dyn_cast_or_null<MDNode>(N->getOperand(0)))
+ return DIArray(A);
+ return DIArray();
+}
+
+DIArray DICompileUnit::getSubprograms() const {
+ if (!DbgNode || DbgNode->getNumOperands() < 14)
+ return DIArray();
+
+ if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(12)))
+ if (MDNode *A = dyn_cast_or_null<MDNode>(N->getOperand(0)))
+ return DIArray(A);
+ return DIArray();
+}
+
+
+DIArray DICompileUnit::getGlobalVariables() const {
+ if (!DbgNode || DbgNode->getNumOperands() < 14)
+ return DIArray();
+
+ if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(13)))
+ if (MDNode *A = dyn_cast_or_null<MDNode>(N->getOperand(0)))
+ return DIArray(A);
+ return DIArray();
+}
+
//===----------------------------------------------------------------------===//
// DIDescriptor: dump routines for all descriptors.
//===----------------------------------------------------------------------===//
@@ -573,7 +662,6 @@ void DIType::print(raw_ostream &OS) const {
OS << " [" << dwarf::TagString(Tag) << "] ";
// TODO : Print context
- getCompileUnit().print(OS);
OS << " ["
<< "line " << getLineNumber() << ", "
<< getSizeInBits() << " bits, "
@@ -629,7 +717,6 @@ void DISubprogram::print(raw_ostream &OS) const {
OS << " [" << dwarf::TagString(Tag) << "] ";
// TODO : Print context
- getCompileUnit().print(OS);
OS << " [" << getLineNumber() << "] ";
if (isLocalToUnit())
@@ -652,7 +739,6 @@ void DIGlobalVariable::print(raw_ostream &OS) const {
OS << " [" << dwarf::TagString(Tag) << "] ";
// TODO : Print context
- getCompileUnit().print(OS);
OS << " [" << getLineNumber() << "] ";
if (isLocalToUnit())
@@ -666,13 +752,48 @@ void DIGlobalVariable::print(raw_ostream &OS) const {
OS << "]\n";
}
+static void printDebugLoc(DebugLoc DL, raw_ostream &CommentOS,
+ const LLVMContext &Ctx) {
+ if (!DL.isUnknown()) { // Print source line info.
+ DIScope Scope(DL.getScope(Ctx));
+ // Omit the directory, because it's likely to be long and uninteresting.
+ if (Scope.Verify())
+ CommentOS << Scope.getFilename();
+ else
+ CommentOS << "<unknown>";
+ CommentOS << ':' << DL.getLine();
+ if (DL.getCol() != 0)
+ CommentOS << ':' << DL.getCol();
+ DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx));
+ if (!InlinedAtDL.isUnknown()) {
+ CommentOS << " @[ ";
+ printDebugLoc(InlinedAtDL, CommentOS, Ctx);
+ CommentOS << " ]";
+ }
+ }
+}
+
+void DIVariable::printExtendedName(raw_ostream &OS) const {
+ const LLVMContext &Ctx = DbgNode->getContext();
+ StringRef Res = getName();
+ if (!Res.empty())
+ OS << Res << "," << getLineNumber();
+ if (MDNode *InlinedAt = getInlinedAt()) {
+ DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt);
+ if (!InlinedAtDL.isUnknown()) {
+ OS << " @[";
+ printDebugLoc(InlinedAtDL, OS, Ctx);
+ OS << "]";
+ }
+ }
+}
+
/// print - Print variable.
void DIVariable::print(raw_ostream &OS) const {
StringRef Res = getName();
if (!Res.empty())
OS << " [" << Res << "] ";
- getCompileUnit().print(OS);
OS << " [" << getLineNumber() << "] ";
getType().print(OS);
OS << "\n";
@@ -744,22 +865,61 @@ static void fixupObjcLikeName(StringRef Str, SmallVectorImpl<char> &Out) {
/// getFnSpecificMDNode - Return a NameMDNode, if available, that is
/// suitable to hold function specific information.
-NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, StringRef FuncName) {
+NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, DISubprogram Fn) {
SmallString<32> Name = StringRef("llvm.dbg.lv.");
- fixupObjcLikeName(FuncName, Name);
-
+ StringRef FName = "fn";
+ if (Fn.getFunction())
+ FName = Fn.getFunction()->getName();
+ else
+ FName = Fn.getName();
+ char One = '\1';
+ if (FName.startswith(StringRef(&One, 1)))
+ FName = FName.substr(1);
+ fixupObjcLikeName(FName, Name);
return M.getNamedMetadata(Name.str());
}
/// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable
/// to hold function specific information.
-NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, StringRef FuncName) {
+NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, DISubprogram Fn) {
SmallString<32> Name = StringRef("llvm.dbg.lv.");
- fixupObjcLikeName(FuncName, Name);
-
+ StringRef FName = "fn";
+ if (Fn.getFunction())
+ FName = Fn.getFunction()->getName();
+ else
+ FName = Fn.getName();
+ char One = '\1';
+ if (FName.startswith(StringRef(&One, 1)))
+ FName = FName.substr(1);
+ fixupObjcLikeName(FName, Name);
+
return M.getOrInsertNamedMetadata(Name.str());
}
+/// createInlinedVariable - Create a new inlined variable based on current
+/// variable.
+/// @param DV Current Variable.
+/// @param InlinedScope Location at current variable is inlined.
+DIVariable llvm::createInlinedVariable(MDNode *DV, MDNode *InlinedScope,
+ LLVMContext &VMContext) {
+ SmallVector<Value *, 16> Elts;
+ // Insert inlined scope as 7th element.
+ for (unsigned i = 0, e = DV->getNumOperands(); i != e; ++i)
+ i == 7 ? Elts.push_back(InlinedScope) :
+ Elts.push_back(DV->getOperand(i));
+ return DIVariable(MDNode::get(VMContext, Elts));
+}
+
+/// cleanseInlinedVariable - Remove inlined scope from the variable.
+DIVariable llvm::cleanseInlinedVariable(MDNode *DV, LLVMContext &VMContext) {
+ SmallVector<Value *, 16> Elts;
+ // Insert inlined scope as 7th element.
+ for (unsigned i = 0, e = DV->getNumOperands(); i != e; ++i)
+ i == 7 ?
+ Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext))):
+ Elts.push_back(DV->getOperand(i));
+ return DIVariable(MDNode::get(VMContext, Elts));
+}
//===----------------------------------------------------------------------===//
// DebugInfoFinder implementations.
@@ -767,6 +927,10 @@ NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, StringRef FuncName) {
/// processModule - Process entire module and collect debug info.
void DebugInfoFinder::processModule(Module &M) {
+ if (NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu"))
+ for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i)
+ addCompileUnit(DICompileUnit(CU_Nodes->getOperand(i)));
+
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI)
for (BasicBlock::iterator BI = (*FI).begin(), BE = (*FI).end(); BI != BE;
@@ -785,6 +949,10 @@ void DebugInfoFinder::processModule(Module &M) {
addCompileUnit(DICompileUnit(Scope));
else if (Scope.isSubprogram())
processSubprogram(DISubprogram(Scope));
+ else if (Scope.isLexicalBlockFile()) {
+ DILexicalBlockFile DBF = DILexicalBlockFile(Scope);
+ processLexicalBlock(DILexicalBlock(DBF.getScope()));
+ }
else if (Scope.isLexicalBlock())
processLexicalBlock(DILexicalBlock(Scope));
@@ -796,7 +964,8 @@ void DebugInfoFinder::processModule(Module &M) {
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
DIGlobalVariable DIG(cast<MDNode>(NMD->getOperand(i)));
if (addGlobalVariable(DIG)) {
- addCompileUnit(DIG.getCompileUnit());
+ if (DIG.getVersion() <= LLVMDebugVersion10)
+ addCompileUnit(DIG.getCompileUnit());
processType(DIG.getType());
}
}
@@ -817,6 +986,10 @@ void DebugInfoFinder::processLocation(DILocation Loc) {
processSubprogram(DISubprogram(S));
else if (S.isLexicalBlock())
processLexicalBlock(DILexicalBlock(S));
+ else if (S.isLexicalBlockFile()) {
+ DILexicalBlockFile DBF = DILexicalBlockFile(S);
+ processLexicalBlock(DILexicalBlock(DBF.getScope()));
+ }
processLocation(Loc.getOrigLocation());
}
@@ -824,8 +997,8 @@ void DebugInfoFinder::processLocation(DILocation Loc) {
void DebugInfoFinder::processType(DIType DT) {
if (!addType(DT))
return;
-
- addCompileUnit(DT.getCompileUnit());
+ if (DT.getVersion() <= LLVMDebugVersion10)
+ addCompileUnit(DT.getCompileUnit());
if (DT.isCompositeType()) {
DICompositeType DCT(DT);
processType(DCT.getTypeDerivedFrom());
@@ -848,6 +1021,10 @@ void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) {
DIScope Context = LB.getContext();
if (Context.isLexicalBlock())
return processLexicalBlock(DILexicalBlock(Context));
+ else if (Context.isLexicalBlockFile()) {
+ DILexicalBlockFile DBF = DILexicalBlockFile(Context);
+ return processLexicalBlock(DILexicalBlock(DBF.getScope()));
+ }
else
return processSubprogram(DISubprogram(Context));
}
@@ -856,7 +1033,8 @@ void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) {
void DebugInfoFinder::processSubprogram(DISubprogram SP) {
if (!addSubprogram(SP))
return;
- addCompileUnit(SP.getCompileUnit());
+ if (SP.getVersion() <= LLVMDebugVersion10)
+ addCompileUnit(SP.getCompileUnit());
processType(SP.getType());
}
@@ -871,8 +1049,8 @@ void DebugInfoFinder::processDeclare(DbgDeclareInst *DDI) {
if (!NodesSeen.insert(DV))
return;
-
- addCompileUnit(DIVariable(N).getCompileUnit());
+ if (DIVariable(N).getVersion() <= LLVMDebugVersion10)
+ addCompileUnit(DIVariable(N).getCompileUnit());
processType(DIVariable(N).getType());
}
@@ -930,6 +1108,9 @@ DISubprogram llvm::getDISubprogram(const MDNode *Scope) {
if (D.isSubprogram())
return DISubprogram(Scope);
+ if (D.isLexicalBlockFile())
+ return getDISubprogram(DILexicalBlockFile(Scope).getContext());
+
if (D.isLexicalBlock())
return getDISubprogram(DILexicalBlock(Scope).getContext());
@@ -946,3 +1127,17 @@ DICompositeType llvm::getDICompositeType(DIType T) {
return DICompositeType();
}
+
+/// isSubprogramContext - Return true if Context is either a subprogram
+/// or another context nested inside a subprogram.
+bool llvm::isSubprogramContext(const MDNode *Context) {
+ if (!Context)
+ return false;
+ DIDescriptor D(Context);
+ if (D.isSubprogram())
+ return true;
+ if (D.isType())
+ return isSubprogramContext(DIType(Context).getContext());
+ return false;
+}
+
diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt
index 8ffef29..eae83fd 100644
--- a/lib/Analysis/IPA/CMakeLists.txt
+++ b/lib/Analysis/IPA/CMakeLists.txt
@@ -5,3 +5,9 @@ add_llvm_library(LLVMipa
GlobalsModRef.cpp
IPA.cpp
)
+
+add_llvm_library_dependencies(LLVMipa
+ LLVMAnalysis
+ LLVMCore
+ LLVMSupport
+ )
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index 659ffab..963da75 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -44,8 +44,8 @@ namespace {
class CGPassManager : public ModulePass, public PMDataManager {
public:
static char ID;
- explicit CGPassManager(int Depth)
- : ModulePass(ID), PMDataManager(Depth) { }
+ explicit CGPassManager()
+ : ModulePass(ID), PMDataManager() { }
/// run - Execute all of the passes scheduled for execution. Keep track of
/// whether any of the passes modifies the module, and if so, return true.
@@ -350,6 +350,7 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
dbgs() << "CGSCCPASSMGR: SCC Refresh didn't change call graph.\n";
}
);
+ (void)MadeChange;
return DevirtualizedCall;
}
@@ -542,7 +543,7 @@ void CallGraphSCCPass::assignPassManager(PMStack &PMS,
PMDataManager *PMD = PMS.top();
// [1] Create new Call Graph Pass Manager
- CGP = new CGPassManager(PMD->getDepth() + 1);
+ CGP = new CGPassManager();
// [2] Set up new manager's top level manager
PMTopLevelManager *TPM = PMD->getTopLevelManager();
diff --git a/lib/Analysis/IPA/FindUsedTypes.cpp b/lib/Analysis/IPA/FindUsedTypes.cpp
index 6535786..e9df3ca 100644
--- a/lib/Analysis/IPA/FindUsedTypes.cpp
+++ b/lib/Analysis/IPA/FindUsedTypes.cpp
@@ -29,7 +29,7 @@ INITIALIZE_PASS(FindUsedTypes, "print-used-types",
// IncorporateType - Incorporate one type and all of its subtypes into the
// collection of used types.
//
-void FindUsedTypes::IncorporateType(const Type *Ty) {
+void FindUsedTypes::IncorporateType(Type *Ty) {
// If ty doesn't already exist in the used types map, add it now, otherwise
// return.
if (!UsedTypes.insert(Ty)) return; // Already contain Ty.
@@ -94,7 +94,7 @@ bool FindUsedTypes::runOnModule(Module &m) {
//
void FindUsedTypes::print(raw_ostream &OS, const Module *M) const {
OS << "Types in use by this module:\n";
- for (SetVector<const Type *>::const_iterator I = UsedTypes.begin(),
+ for (SetVector<Type *>::const_iterator I = UsedTypes.begin(),
E = UsedTypes.end(); I != E; ++I) {
OS << " " << **I << '\n';
}
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index e5f0a77..d0ca892 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -146,7 +146,8 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
ISE, User, I,
NewUse.PostIncLoops,
*SE, *DT);
- DEBUG(dbgs() << " NORMALIZED TO: " << *ISE << '\n');
+ DEBUG(if (SE->getSCEV(I) != ISE)
+ dbgs() << " NORMALIZED TO: " << *ISE << '\n');
}
}
return true;
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index efde598..e12e322 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -15,6 +15,7 @@
#include "llvm/Support/CallSite.h"
#include "llvm/CallingConv.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/Target/TargetData.h"
#include "llvm/ADT/SmallPtrSet.h"
using namespace llvm;
@@ -24,13 +25,13 @@ using namespace llvm;
/// TODO: Perhaps calls like memcpy, strcpy, etc?
bool llvm::callIsSmall(const Function *F) {
if (!F) return false;
-
+
if (F->hasLocalLinkage()) return false;
-
+
if (!F->hasName()) return false;
-
+
StringRef Name = F->getName();
-
+
// These will all likely lower to a single selection DAG node.
if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
@@ -38,7 +39,7 @@ bool llvm::callIsSmall(const Function *F) {
Name == "cos" || Name == "cosf" || Name == "cosl" ||
Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" )
return true;
-
+
// These are all likely to be optimized into something smaller.
if (Name == "pow" || Name == "powf" || Name == "powl" ||
Name == "exp2" || Name == "exp2l" || Name == "exp2f" ||
@@ -46,13 +47,14 @@ bool llvm::callIsSmall(const Function *F) {
Name == "round" || Name == "ffs" || Name == "ffsl" ||
Name == "abs" || Name == "labs" || Name == "llabs")
return true;
-
+
return false;
}
/// analyzeBasicBlock - Fill in the current structure with information gleaned
/// from the specified block.
-void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
+void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
+ const TargetData *TD) {
++NumBlocks;
unsigned NumInstsBeforeThisBB = NumInsts;
for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
@@ -67,8 +69,8 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
ImmutableCallSite CS(cast<Instruction>(II));
if (const Function *F = CS.getCalledFunction()) {
- // If a function is both internal and has a single use, then it is
- // extremely likely to get inlined in the future (it was probably
+ // If a function is both internal and has a single use, then it is
+ // extremely likely to get inlined in the future (it was probably
// exposed by an interleaved devirtualization pass).
if (F->hasInternalLinkage() && F->hasOneUse())
++NumInlineCandidates;
@@ -91,20 +93,25 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
++NumCalls;
}
}
-
+
if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
if (!AI->isStaticAlloca())
this->usesDynamicAlloca = true;
}
if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy())
- ++NumVectorInsts;
-
+ ++NumVectorInsts;
+
if (const CastInst *CI = dyn_cast<CastInst>(II)) {
// Noop casts, including ptr <-> int, don't count.
- if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) ||
+ if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) ||
isa<PtrToIntInst>(CI))
continue;
+ // trunc to a native type is free (assuming the target has compare and
+ // shift-right of the same width).
+ if (isa<TruncInst>(CI) && TD &&
+ TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType())))
+ continue;
// Result of a cmp instruction is often extended (to be used by other
// cmp instructions, logical or return instructions). These are usually
// nop on most sane targets.
@@ -119,10 +126,10 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
++NumInsts;
}
-
+
if (isa<ReturnInst>(BB->getTerminator()))
++NumRets;
-
+
// We never want to inline functions that contain an indirectbr. This is
// incorrect because all the blockaddress's (in static global initializers
// for example) would be referring to the original function, and this indirect
@@ -217,7 +224,7 @@ unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) {
/// analyzeFunction - Fill in the current structure with information gleaned
/// from the specified function.
-void CodeMetrics::analyzeFunction(Function *F) {
+void CodeMetrics::analyzeFunction(Function *F, const TargetData *TD) {
// If this function contains a call to setjmp or _setjmp, never inline
// it. This is a hack because we depend on the user marking their local
// variables as volatile if they are live across a setjmp call, and they
@@ -227,13 +234,14 @@ void CodeMetrics::analyzeFunction(Function *F) {
// Look at the size of the callee.
for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
- analyzeBasicBlock(&*BB);
+ analyzeBasicBlock(&*BB, TD);
}
/// analyzeFunction - Fill in the current structure with information gleaned
/// from the specified function.
-void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
- Metrics.analyzeFunction(F);
+void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F,
+ const TargetData *TD) {
+ Metrics.analyzeFunction(F, TD);
// A function with exactly one return has it removed during the inlining
// process (see InlineFunction), so don't count it.
@@ -252,7 +260,7 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
/// NeverInline - returns true if the function should never be inlined into
/// any caller
bool InlineCostAnalyzer::FunctionInfo::NeverInline() {
- return (Metrics.callsSetJmp || Metrics.isRecursive ||
+ return (Metrics.callsSetJmp || Metrics.isRecursive ||
Metrics.containsIndirectBr);
}
// getSpecializationBonus - The heuristic used to determine the per-call
@@ -263,19 +271,19 @@ int InlineCostAnalyzer::getSpecializationBonus(Function *Callee,
{
if (Callee->mayBeOverridden())
return 0;
-
+
int Bonus = 0;
// If this function uses the coldcc calling convention, prefer not to
// specialize it.
if (Callee->getCallingConv() == CallingConv::Cold)
Bonus -= InlineConstants::ColdccPenalty;
-
+
// Get information about the callee.
FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
-
+
// If we haven't calculated this information yet, do so now.
if (CalleeFI->Metrics.NumBlocks == 0)
- CalleeFI->analyzeFunction(Callee);
+ CalleeFI->analyzeFunction(Callee, TD);
unsigned ArgNo = 0;
unsigned i = 0;
@@ -286,7 +294,7 @@ int InlineCostAnalyzer::getSpecializationBonus(Function *Callee,
Bonus += CountBonusForConstant(I);
}
- // Calls usually take a long time, so they make the specialization gain
+ // Calls usually take a long time, so they make the specialization gain
// smaller.
Bonus -= CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;
@@ -300,13 +308,13 @@ int InlineCostAnalyzer::getSpecializationBonus(Function *Callee,
// inlining because we decide we don't want to give a bonus for
// devirtualizing.
int InlineCostAnalyzer::ConstantFunctionBonus(CallSite CS, Constant *C) {
-
+
// This could just be NULL.
if (!C) return 0;
-
+
Function *F = dyn_cast<Function>(C);
if (!F) return 0;
-
+
int Bonus = InlineConstants::IndirectCallBonus + getInlineSize(CS, F);
return (Bonus > 0) ? 0 : Bonus;
}
@@ -355,18 +363,18 @@ int InlineCostAnalyzer::CountBonusForConstant(Value *V, Constant *C) {
Bonus += CountBonusForConstant(&Inst);
}
}
-
+
return Bonus;
}
int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) {
// Get information about the callee.
FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
-
+
// If we haven't calculated this information yet, do so now.
if (CalleeFI->Metrics.NumBlocks == 0)
- CalleeFI->analyzeFunction(Callee);
-
+ CalleeFI->analyzeFunction(Callee, TD);
+
// InlineCost - This value measures how good of an inline candidate this call
// site is to inline. A lower inline cost make is more likely for the call to
// be inlined. This value may go negative.
@@ -392,9 +400,9 @@ int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) {
// weights calculated for the callee to determine how much will be folded
// away with this information.
else if (isa<Constant>(I))
- InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight;
+ InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight;
}
-
+
// Each argument passed in has a cost at both the caller and the callee
// sides. Measurements show that each argument costs about the same as an
// instruction.
@@ -408,28 +416,28 @@ int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) {
// Look at the size of the callee. Each instruction counts as 5.
InlineCost += CalleeFI->Metrics.NumInsts*InlineConstants::InstrCost;
-
+
return InlineCost;
}
int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) {
// Get information about the callee.
FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
-
+
// If we haven't calculated this information yet, do so now.
if (CalleeFI->Metrics.NumBlocks == 0)
- CalleeFI->analyzeFunction(Callee);
-
+ CalleeFI->analyzeFunction(Callee, TD);
+
bool isDirectCall = CS.getCalledFunction() == Callee;
Instruction *TheCall = CS.getInstruction();
int Bonus = 0;
-
+
// If there is only one call of the function, and it has internal linkage,
// make it almost guaranteed to be inlined.
//
if (Callee->hasLocalLinkage() && Callee->hasOneUse() && isDirectCall)
Bonus += InlineConstants::LastCallToStaticBonus;
-
+
// If the instruction after the call, or if the normal destination of the
// invoke is an unreachable instruction, the function is noreturn. As such,
// there is little point in inlining this.
@@ -438,12 +446,12 @@ int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) {
Bonus += InlineConstants::NoreturnPenalty;
} else if (isa<UnreachableInst>(++BasicBlock::iterator(TheCall)))
Bonus += InlineConstants::NoreturnPenalty;
-
+
// If this function uses the coldcc calling convention, prefer not to inline
// it.
if (Callee->getCallingConv() == CallingConv::Cold)
Bonus += InlineConstants::ColdccPenalty;
-
+
// Add to the inline quality for properties that make the call valuable to
// inline. This includes factors that indicate that the result of inlining
// the function will be optimizable. Currently this just looks at arguments
@@ -455,7 +463,7 @@ int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) {
// Compute any constant bonus due to inlining we want to give here.
if (isa<Constant>(I))
Bonus += CountBonusForConstant(FI, cast<Constant>(I));
-
+
return Bonus;
}
@@ -483,10 +491,10 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
// Get information about the callee.
FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
-
+
// If we haven't calculated this information yet, do so now.
if (CalleeFI->Metrics.NumBlocks == 0)
- CalleeFI->analyzeFunction(Callee);
+ CalleeFI->analyzeFunction(Callee, TD);
// If we should never inline this, return a huge cost.
if (CalleeFI->NeverInline())
@@ -498,15 +506,15 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
// requires handling setjmp somewhere else, however.
if (!Callee->isDeclaration() && Callee->hasFnAttr(Attribute::AlwaysInline))
return InlineCost::getAlways();
-
+
if (CalleeFI->Metrics.usesDynamicAlloca) {
// Get information about the caller.
FunctionInfo &CallerFI = CachedFunctionInfo[Caller];
// If we haven't calculated this information yet, do so now.
if (CallerFI.Metrics.NumBlocks == 0) {
- CallerFI.analyzeFunction(Caller);
-
+ CallerFI.analyzeFunction(Caller, TD);
+
// Recompute the CalleeFI pointer, getting Caller could have invalidated
// it.
CalleeFI = &CachedFunctionInfo[Callee];
@@ -538,16 +546,16 @@ InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee,
// something else.
if (Callee->mayBeOverridden())
return llvm::InlineCost::getNever();
-
+
// Get information about the callee.
FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
-
+
// If we haven't calculated this information yet, do so now.
if (CalleeFI->Metrics.NumBlocks == 0)
- CalleeFI->analyzeFunction(Callee);
+ CalleeFI->analyzeFunction(Callee, TD);
int Cost = 0;
-
+
// Look at the original size of the callee. Each instruction counts as 5.
Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost;
@@ -564,13 +572,13 @@ InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee,
// higher threshold to determine if the function call should be inlined.
float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) {
Function *Callee = CS.getCalledFunction();
-
+
// Get information about the callee.
FunctionInfo &CalleeFI = CachedFunctionInfo[Callee];
-
+
// If we haven't calculated this information yet, do so now.
if (CalleeFI.Metrics.NumBlocks == 0)
- CalleeFI.analyzeFunction(Callee);
+ CalleeFI.analyzeFunction(Callee, TD);
float Factor = 1.0f;
// Single BB functions are often written to be inlined.
@@ -604,7 +612,7 @@ InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) {
--CallerMetrics.NumCalls;
if (Callee == 0) return;
-
+
CodeMetrics &CalleeMetrics = CachedFunctionInfo[Callee].Metrics;
// If we don't have metrics for the callee, don't recalculate them just to
@@ -614,7 +622,7 @@ InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) {
resetCachedCostInfo(Caller);
return;
}
-
+
// Since CalleeMetrics were already calculated, we know that the CallerMetrics
// reference isn't invalidated: both were in the DenseMap.
CallerMetrics.usesDynamicAlloca |= CalleeMetrics.usesDynamicAlloca;
@@ -636,7 +644,7 @@ InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) {
CallerMetrics.NumInsts -= Callee->arg_size();
else
CallerMetrics.NumInsts = 0;
-
+
// We are not updating the argument weights. We have already determined that
// Caller is a fairly large function, so we accept the loss of precision.
}
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 8709f6b..131cc97 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -48,6 +48,26 @@ static Value *SimplifyOrInst(Value *, Value *, const TargetData *,
static Value *SimplifyXorInst(Value *, Value *, const TargetData *,
const DominatorTree *, unsigned);
+/// getFalse - For a boolean type, or a vector of boolean type, return false, or
+/// a vector with every element false, as appropriate for the type.
+static Constant *getFalse(Type *Ty) {
+ assert((Ty->isIntegerTy(1) ||
+ (Ty->isVectorTy() &&
+ cast<VectorType>(Ty)->getElementType()->isIntegerTy(1))) &&
+ "Expected i1 type or a vector of i1!");
+ return Constant::getNullValue(Ty);
+}
+
+/// getTrue - For a boolean type, or a vector of boolean type, return true, or
+/// a vector with every element true, as appropriate for the type.
+static Constant *getTrue(Type *Ty) {
+ assert((Ty->isIntegerTy(1) ||
+ (Ty->isVectorTy() &&
+ cast<VectorType>(Ty)->getElementType()->isIntegerTy(1))) &&
+ "Expected i1 type or a vector of i1!");
+ return Constant::getAllOnesValue(Ty);
+}
+
/// ValueDominatesPHI - Does the given value dominate the specified phi node?
static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
Instruction *I = dyn_cast<Instruction>(V);
@@ -526,7 +546,7 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { CLHS, CRHS };
return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(),
- Ops, 2, TD);
+ Ops, TD);
}
// Canonicalize the constant to the RHS.
@@ -595,7 +615,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { CLHS, CRHS };
return ConstantFoldInstOperands(Instruction::Sub, CLHS->getType(),
- Ops, 2, TD);
+ Ops, TD);
}
// X - undef -> undef
@@ -715,7 +735,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { CLHS, CRHS };
return ConstantFoldInstOperands(Instruction::Mul, CLHS->getType(),
- Ops, 2, TD);
+ Ops, TD);
}
// Canonicalize the constant to the RHS.
@@ -788,7 +808,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
if (Constant *C0 = dyn_cast<Constant>(Op0)) {
if (Constant *C1 = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { C0, C1 };
- return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD);
+ return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD);
}
}
@@ -909,7 +929,7 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
if (Constant *C0 = dyn_cast<Constant>(Op0)) {
if (Constant *C1 = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { C0, C1 };
- return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD);
+ return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD);
}
}
@@ -1012,7 +1032,7 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
if (Constant *C0 = dyn_cast<Constant>(Op0)) {
if (Constant *C1 = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { C0, C1 };
- return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD);
+ return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD);
}
}
@@ -1138,7 +1158,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { CLHS, CRHS };
return ConstantFoldInstOperands(Instruction::And, CLHS->getType(),
- Ops, 2, TD);
+ Ops, TD);
}
// Canonicalize the constant to the RHS.
@@ -1227,7 +1247,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { CLHS, CRHS };
return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(),
- Ops, 2, TD);
+ Ops, TD);
}
// Canonicalize the constant to the RHS.
@@ -1321,7 +1341,7 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { CLHS, CRHS };
return ConstantFoldInstOperands(Instruction::Xor, CLHS->getType(),
- Ops, 2, TD);
+ Ops, TD);
}
// Canonicalize the constant to the RHS.
@@ -1372,7 +1392,7 @@ Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
return ::SimplifyXorInst(Op0, Op1, TD, DT, RecursionLimit);
}
-static const Type *GetCompareTy(Value *Op) {
+static Type *GetCompareTy(Value *Op) {
return CmpInst::makeCmpResultType(Op->getType());
}
@@ -1413,8 +1433,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
Pred = CmpInst::getSwappedPredicate(Pred);
}
- const Type *ITy = GetCompareTy(LHS); // The return type.
- const Type *OpTy = LHS->getType(); // The operand type.
+ Type *ITy = GetCompareTy(LHS); // The return type.
+ Type *OpTy = LHS->getType(); // The operand type.
// icmp X, X -> true/false
// X icmp undef -> true/false. For example, icmp ugt %X, undef -> false
@@ -1478,48 +1498,46 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
default:
assert(false && "Unknown ICmp predicate!");
case ICmpInst::ICMP_ULT:
- // getNullValue also works for vectors, unlike getFalse.
- return Constant::getNullValue(ITy);
+ return getFalse(ITy);
case ICmpInst::ICMP_UGE:
- // getAllOnesValue also works for vectors, unlike getTrue.
- return ConstantInt::getAllOnesValue(ITy);
+ return getTrue(ITy);
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_ULE:
if (isKnownNonZero(LHS, TD))
- return Constant::getNullValue(ITy);
+ return getFalse(ITy);
break;
case ICmpInst::ICMP_NE:
case ICmpInst::ICMP_UGT:
if (isKnownNonZero(LHS, TD))
- return ConstantInt::getAllOnesValue(ITy);
+ return getTrue(ITy);
break;
case ICmpInst::ICMP_SLT:
ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
if (LHSKnownNegative)
- return ConstantInt::getAllOnesValue(ITy);
+ return getTrue(ITy);
if (LHSKnownNonNegative)
- return Constant::getNullValue(ITy);
+ return getFalse(ITy);
break;
case ICmpInst::ICMP_SLE:
ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
if (LHSKnownNegative)
- return ConstantInt::getAllOnesValue(ITy);
+ return getTrue(ITy);
if (LHSKnownNonNegative && isKnownNonZero(LHS, TD))
- return Constant::getNullValue(ITy);
+ return getFalse(ITy);
break;
case ICmpInst::ICMP_SGE:
ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
if (LHSKnownNegative)
- return Constant::getNullValue(ITy);
+ return getFalse(ITy);
if (LHSKnownNonNegative)
- return ConstantInt::getAllOnesValue(ITy);
+ return getTrue(ITy);
break;
case ICmpInst::ICMP_SGT:
ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
if (LHSKnownNegative)
- return Constant::getNullValue(ITy);
+ return getFalse(ITy);
if (LHSKnownNonNegative && isKnownNonZero(LHS, TD))
- return ConstantInt::getAllOnesValue(ITy);
+ return getTrue(ITy);
break;
}
}
@@ -1593,8 +1611,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (isa<CastInst>(LHS) && (isa<Constant>(RHS) || isa<CastInst>(RHS))) {
Instruction *LI = cast<CastInst>(LHS);
Value *SrcOp = LI->getOperand(0);
- const Type *SrcTy = SrcOp->getType();
- const Type *DstTy = LI->getType();
+ Type *SrcTy = SrcOp->getType();
+ Type *DstTy = LI->getType();
// Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input
// if the integer type is the same size as the pointer type.
@@ -1811,8 +1829,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_UGE:
- // getNullValue also works for vectors, unlike getFalse.
- return Constant::getNullValue(ITy);
+ return getFalse(ITy);
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD);
@@ -1822,8 +1839,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
case ICmpInst::ICMP_NE:
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_ULE:
- // getAllOnesValue also works for vectors, unlike getTrue.
- return Constant::getAllOnesValue(ITy);
+ return getTrue(ITy);
}
}
if (RBO && match(RBO, m_URem(m_Value(), m_Specific(LHS)))) {
@@ -1840,8 +1856,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
case ICmpInst::ICMP_NE:
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_UGE:
- // getAllOnesValue also works for vectors, unlike getTrue.
- return Constant::getAllOnesValue(ITy);
+ return getTrue(ITy);
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD);
@@ -1851,8 +1866,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_ULE:
- // getNullValue also works for vectors, unlike getFalse.
- return Constant::getNullValue(ITy);
+ return getFalse(ITy);
}
}
@@ -1874,7 +1888,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return V;
break;
case Instruction::Shl: {
- bool NUW = LBO->hasNoUnsignedWrap() && LBO->hasNoUnsignedWrap();
+ bool NUW = LBO->hasNoUnsignedWrap() && RBO->hasNoUnsignedWrap();
bool NSW = LBO->hasNoSignedWrap() && RBO->hasNoSignedWrap();
if (!NUW && !NSW)
break;
@@ -1955,10 +1969,10 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
case CmpInst::ICMP_SGE:
// Always true.
- return Constant::getAllOnesValue(ITy);
+ return getTrue(ITy);
case CmpInst::ICMP_SLT:
// Always false.
- return Constant::getNullValue(ITy);
+ return getFalse(ITy);
}
}
@@ -2025,10 +2039,10 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
case CmpInst::ICMP_UGE:
// Always true.
- return Constant::getAllOnesValue(ITy);
+ return getTrue(ITy);
case CmpInst::ICMP_ULT:
// Always false.
- return Constant::getNullValue(ITy);
+ return getFalse(ITy);
}
}
@@ -2040,40 +2054,40 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// max(x, ?) pred min(x, ?).
if (Pred == CmpInst::ICMP_SGE)
// Always true.
- return Constant::getAllOnesValue(ITy);
+ return getTrue(ITy);
if (Pred == CmpInst::ICMP_SLT)
// Always false.
- return Constant::getNullValue(ITy);
+ return getFalse(ITy);
} else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) &&
match(RHS, m_SMax(m_Value(C), m_Value(D))) &&
(A == C || A == D || B == C || B == D)) {
// min(x, ?) pred max(x, ?).
if (Pred == CmpInst::ICMP_SLE)
// Always true.
- return Constant::getAllOnesValue(ITy);
+ return getTrue(ITy);
if (Pred == CmpInst::ICMP_SGT)
// Always false.
- return Constant::getNullValue(ITy);
+ return getFalse(ITy);
} else if (match(LHS, m_UMax(m_Value(A), m_Value(B))) &&
match(RHS, m_UMin(m_Value(C), m_Value(D))) &&
(A == C || A == D || B == C || B == D)) {
// max(x, ?) pred min(x, ?).
if (Pred == CmpInst::ICMP_UGE)
// Always true.
- return Constant::getAllOnesValue(ITy);
+ return getTrue(ITy);
if (Pred == CmpInst::ICMP_ULT)
// Always false.
- return Constant::getNullValue(ITy);
+ return getFalse(ITy);
} else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) &&
match(RHS, m_UMax(m_Value(C), m_Value(D))) &&
(A == C || A == D || B == C || B == D)) {
// min(x, ?) pred max(x, ?).
if (Pred == CmpInst::ICMP_ULE)
// Always true.
- return Constant::getAllOnesValue(ITy);
+ return getTrue(ITy);
if (Pred == CmpInst::ICMP_UGT)
// Always false.
- return Constant::getNullValue(ITy);
+ return getFalse(ITy);
}
// If the comparison is with the result of a select instruction, check whether
@@ -2219,43 +2233,71 @@ Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal,
/// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
/// fold the result. If not, this returns null.
-Value *llvm::SimplifyGEPInst(Value *const *Ops, unsigned NumOps,
+Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops,
const TargetData *TD, const DominatorTree *) {
// The type of the GEP pointer operand.
- const PointerType *PtrTy = cast<PointerType>(Ops[0]->getType());
+ PointerType *PtrTy = cast<PointerType>(Ops[0]->getType());
// getelementptr P -> P.
- if (NumOps == 1)
+ if (Ops.size() == 1)
return Ops[0];
if (isa<UndefValue>(Ops[0])) {
// Compute the (pointer) type returned by the GEP instruction.
- const Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, &Ops[1],
- NumOps-1);
- const Type *GEPTy = PointerType::get(LastType, PtrTy->getAddressSpace());
+ Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, Ops.slice(1));
+ Type *GEPTy = PointerType::get(LastType, PtrTy->getAddressSpace());
return UndefValue::get(GEPTy);
}
- if (NumOps == 2) {
+ if (Ops.size() == 2) {
// getelementptr P, 0 -> P.
if (ConstantInt *C = dyn_cast<ConstantInt>(Ops[1]))
if (C->isZero())
return Ops[0];
// getelementptr P, N -> P if P points to a type of zero size.
if (TD) {
- const Type *Ty = PtrTy->getElementType();
+ Type *Ty = PtrTy->getElementType();
if (Ty->isSized() && TD->getTypeAllocSize(Ty) == 0)
return Ops[0];
}
}
// Check to see if this is constant foldable.
- for (unsigned i = 0; i != NumOps; ++i)
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
if (!isa<Constant>(Ops[i]))
return 0;
- return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]),
- (Constant *const*)Ops+1, NumOps-1);
+ return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), Ops.slice(1));
+}
+
+/// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we
+/// can fold the result. If not, this returns null.
+Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val,
+ ArrayRef<unsigned> Idxs,
+ const TargetData *,
+ const DominatorTree *) {
+ if (Constant *CAgg = dyn_cast<Constant>(Agg))
+ if (Constant *CVal = dyn_cast<Constant>(Val))
+ return ConstantFoldInsertValueInstruction(CAgg, CVal, Idxs);
+
+ // insertvalue x, undef, n -> x
+ if (match(Val, m_Undef()))
+ return Agg;
+
+ // insertvalue x, (extractvalue y, n), n
+ if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(Val))
+ if (EV->getAggregateOperand()->getType() == Agg->getType() &&
+ EV->getIndices() == Idxs) {
+ // insertvalue undef, (extractvalue y, n), n -> y
+ if (match(Agg, m_Undef()))
+ return EV->getAggregateOperand();
+
+ // insertvalue y, (extractvalue y, n), n -> y
+ if (Agg == EV->getAggregateOperand())
+ return Agg;
+ }
+
+ return 0;
}
/// SimplifyPHINode - See if we can fold the given phi. If not, returns null.
@@ -2328,7 +2370,7 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
if (Constant *CLHS = dyn_cast<Constant>(LHS))
if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
Constant *COps[] = {CLHS, CRHS};
- return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, 2, TD);
+ return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, TD);
}
// If the operation is associative, try some generic simplifications.
@@ -2456,7 +2498,14 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD,
break;
case Instruction::GetElementPtr: {
SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
- Result = SimplifyGEPInst(&Ops[0], Ops.size(), TD, DT);
+ Result = SimplifyGEPInst(Ops, TD, DT);
+ break;
+ }
+ case Instruction::InsertValue: {
+ InsertValueInst *IV = cast<InsertValueInst>(I);
+ Result = SimplifyInsertValueInst(IV->getAggregateOperand(),
+ IV->getInsertedValueOperand(),
+ IV->getIndices(), TD, DT);
break;
}
case Instruction::PHI:
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index 6e27597..f80595c 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -630,7 +630,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
if (BB == &BB->getParent()->getEntryBlock()) {
assert(isa<Argument>(Val) && "Unknown live-in to the entry block");
if (NotNull) {
- const PointerType *PTy = cast<PointerType>(Val->getType());
+ PointerType *PTy = cast<PointerType>(Val->getType());
Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
} else {
Result.markOverdefined();
@@ -658,7 +658,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
// If we previously determined that this is a pointer that can't be null
// then return that rather than giving up entirely.
if (NotNull) {
- const PointerType *PTy = cast<PointerType>(Val->getType());
+ PointerType *PTy = cast<PointerType>(Val->getType());
Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
}
@@ -728,7 +728,7 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV,
ConstantRange LHSRange = LHSVal.getConstantRange();
ConstantRange RHSRange(1);
- const IntegerType *ResultTy = cast<IntegerType>(BBI->getType());
+ IntegerType *ResultTy = cast<IntegerType>(BBI->getType());
if (isa<BinaryOperator>(BBI)) {
if (ConstantInt *RHS = dyn_cast<ConstantInt>(BBI->getOperand(1))) {
RHSRange = ConstantRange(RHS->getValue());
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index 89755da..38d677d 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -71,7 +71,7 @@ namespace {
void visitCallSite(CallSite CS);
void visitMemoryReference(Instruction &I, Value *Ptr,
uint64_t Size, unsigned Align,
- const Type *Ty, unsigned Flags);
+ Type *Ty, unsigned Flags);
void visitCallInst(CallInst &I);
void visitInvokeInst(InvokeInst &I);
@@ -201,7 +201,7 @@ void Lint::visitCallSite(CallSite CS) {
"Undefined behavior: Caller and callee calling convention differ",
&I);
- const FunctionType *FT = F->getFunctionType();
+ FunctionType *FT = F->getFunctionType();
unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
Assert1(FT->isVarArg() ?
@@ -240,7 +240,7 @@ void Lint::visitCallSite(CallSite CS) {
// Check that an sret argument points to valid memory.
if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
- const Type *Ty =
+ Type *Ty =
cast<PointerType>(Formal->getType())->getElementType();
visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty),
TD ? TD->getABITypeAlignment(Ty) : 0,
@@ -364,7 +364,7 @@ void Lint::visitReturnInst(ReturnInst &I) {
// TODO: Check readnone/readonly function attributes.
void Lint::visitMemoryReference(Instruction &I,
Value *Ptr, uint64_t Size, unsigned Align,
- const Type *Ty, unsigned Flags) {
+ Type *Ty, unsigned Flags) {
// If no memory is being referenced, it doesn't matter if the pointer
// is valid.
if (Size == 0)
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index c5c676b..0e6bcbf 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -63,7 +63,7 @@ static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD,
return V;
SmallVector<Value*, 8> Indices(GEP->op_begin() + 1, GEP->op_end());
ByteOffset += TD->getIndexedOffset(GEP->getPointerOperandType(),
- &Indices[0], Indices.size());
+ Indices);
V = GEP->getPointerOperand();
} else if (Operator::getOpcode(V) == Instruction::BitCast) {
V = cast<Operator>(V)->getOperand(0);
@@ -90,7 +90,7 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
if (TD)
Base = getUnderlyingObjectWithOffset(V, TD, ByteOffset);
- const Type *BaseType = 0;
+ Type *BaseType = 0;
unsigned BaseAlign = 0;
if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
// An alloca is safe to load from as load as it is suitably aligned.
@@ -114,7 +114,7 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
return true; // Loading directly from an alloca or global is OK.
// Check if the load is within the bounds of the underlying object.
- const PointerType *AddrTy = cast<PointerType>(V->getType());
+ PointerType *AddrTy = cast<PointerType>(V->getType());
uint64_t LoadSize = TD->getTypeStoreSize(AddrTy->getElementType());
if (ByteOffset + LoadSize <= TD->getTypeAllocSize(BaseType) &&
(Align == 0 || (ByteOffset % Align) == 0))
@@ -169,7 +169,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
// If we're using alias analysis to disambiguate get the size of *Ptr.
uint64_t AccessSize = 0;
if (AA) {
- const Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
+ Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
AccessSize = AA->getTypeStoreSize(AccessTy);
}
@@ -188,12 +188,16 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
--ScanFrom;
// If this is a load of Ptr, the loaded value is available.
+ // (This is true even if the load is volatile or atomic, although
+ // those cases are unlikely.)
if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
if (AreEquivalentAddressValues(LI->getOperand(0), Ptr))
return LI;
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// If this is a store through Ptr, the value is available!
+ // (This is true even if the store is volatile or atomic, although
+ // those cases are unlikely.)
if (AreEquivalentAddressValues(SI->getOperand(1), Ptr))
return SI->getOperand(0);
diff --git a/lib/Analysis/LoopDependenceAnalysis.cpp b/lib/Analysis/LoopDependenceAnalysis.cpp
index c1afe8f..3997ac4 100644
--- a/lib/Analysis/LoopDependenceAnalysis.cpp
+++ b/lib/Analysis/LoopDependenceAnalysis.cpp
@@ -76,7 +76,13 @@ static void GetMemRefInstrs(const Loop *L,
}
static bool IsLoadOrStoreInst(Value *I) {
- return isa<LoadInst>(I) || isa<StoreInst>(I);
+ // Returns true if the load or store can be analyzed. Atomic and volatile
+ // operations have properties which this analysis does not understand.
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return LI->isUnordered();
+ else if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->isUnordered();
+ return false;
}
static Value *GetPointerOperand(Value *I) {
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index 0583140..85aacca 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -18,6 +18,7 @@
#include "llvm/Constants.h"
#include "llvm/Instructions.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/CommandLine.h"
@@ -55,12 +56,12 @@ bool Loop::isLoopInvariant(Value *V) const {
}
/// hasLoopInvariantOperands - Return true if all the operands of the
-/// specified instruction are loop invariant.
+/// specified instruction are loop invariant.
bool Loop::hasLoopInvariantOperands(Instruction *I) const {
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
if (!isLoopInvariant(I->getOperand(i)))
return false;
-
+
return true;
}
@@ -98,6 +99,9 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
return false;
if (I->mayReadFromMemory())
return false;
+ // The landingpad instruction is immobile.
+ if (isa<LandingPadInst>(I))
+ return false;
// Determine the insertion point, unless one was given.
if (!InsertPt) {
BasicBlock *Preheader = getLoopPreheader();
@@ -110,7 +114,7 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
if (!makeLoopInvariant(I->getOperand(i), Changed, InsertPt))
return false;
-
+
// Hoist.
I->moveBefore(InsertPt);
Changed = true;
@@ -383,6 +387,205 @@ void Loop::dump() const {
}
//===----------------------------------------------------------------------===//
+// UnloopUpdater implementation
+//
+
+namespace {
+/// Find the new parent loop for all blocks within the "unloop" whose last
+/// backedges has just been removed.
+class UnloopUpdater {
+ Loop *Unloop;
+ LoopInfo *LI;
+
+ LoopBlocksDFS DFS;
+
+ // Map unloop's immediate subloops to their nearest reachable parents. Nested
+ // loops within these subloops will not change parents. However, an immediate
+ // subloop's new parent will be the nearest loop reachable from either its own
+ // exits *or* any of its nested loop's exits.
+ DenseMap<Loop*, Loop*> SubloopParents;
+
+ // Flag the presence of an irreducible backedge whose destination is a block
+ // directly contained by the original unloop.
+ bool FoundIB;
+
+public:
+ UnloopUpdater(Loop *UL, LoopInfo *LInfo) :
+ Unloop(UL), LI(LInfo), DFS(UL), FoundIB(false) {}
+
+ void updateBlockParents();
+
+ void removeBlocksFromAncestors();
+
+ void updateSubloopParents();
+
+protected:
+ Loop *getNearestLoop(BasicBlock *BB, Loop *BBLoop);
+};
+} // end anonymous namespace
+
+/// updateBlockParents - Update the parent loop for all blocks that are directly
+/// contained within the original "unloop".
+void UnloopUpdater::updateBlockParents() {
+ if (Unloop->getNumBlocks()) {
+ // Perform a post order CFG traversal of all blocks within this loop,
+ // propagating the nearest loop from sucessors to predecessors.
+ LoopBlocksTraversal Traversal(DFS, LI);
+ for (LoopBlocksTraversal::POTIterator POI = Traversal.begin(),
+ POE = Traversal.end(); POI != POE; ++POI) {
+
+ Loop *L = LI->getLoopFor(*POI);
+ Loop *NL = getNearestLoop(*POI, L);
+
+ if (NL != L) {
+ // For reducible loops, NL is now an ancestor of Unloop.
+ assert((NL != Unloop && (!NL || NL->contains(Unloop))) &&
+ "uninitialized successor");
+ LI->changeLoopFor(*POI, NL);
+ }
+ else {
+ // Or the current block is part of a subloop, in which case its parent
+ // is unchanged.
+ assert((FoundIB || Unloop->contains(L)) && "uninitialized successor");
+ }
+ }
+ }
+ // Each irreducible loop within the unloop induces a round of iteration using
+ // the DFS result cached by Traversal.
+ bool Changed = FoundIB;
+ for (unsigned NIters = 0; Changed; ++NIters) {
+ assert(NIters < Unloop->getNumBlocks() && "runaway iterative algorithm");
+
+ // Iterate over the postorder list of blocks, propagating the nearest loop
+ // from successors to predecessors as before.
+ Changed = false;
+ for (LoopBlocksDFS::POIterator POI = DFS.beginPostorder(),
+ POE = DFS.endPostorder(); POI != POE; ++POI) {
+
+ Loop *L = LI->getLoopFor(*POI);
+ Loop *NL = getNearestLoop(*POI, L);
+ if (NL != L) {
+ assert(NL != Unloop && (!NL || NL->contains(Unloop)) &&
+ "uninitialized successor");
+ LI->changeLoopFor(*POI, NL);
+ Changed = true;
+ }
+ }
+ }
+}
+
+/// removeBlocksFromAncestors - Remove unloop's blocks from all ancestors below
+/// their new parents.
+void UnloopUpdater::removeBlocksFromAncestors() {
+ // Remove unloop's blocks from all ancestors below their new parents.
+ for (Loop::block_iterator BI = Unloop->block_begin(),
+ BE = Unloop->block_end(); BI != BE; ++BI) {
+ Loop *NewParent = LI->getLoopFor(*BI);
+ // If this block is an immediate subloop, remove all blocks (including
+ // nested subloops) from ancestors below the new parent loop.
+ // Otherwise, if this block is in a nested subloop, skip it.
+ if (SubloopParents.count(NewParent))
+ NewParent = SubloopParents[NewParent];
+ else if (Unloop->contains(NewParent))
+ continue;
+
+ // Remove blocks from former Ancestors except Unloop itself which will be
+ // deleted.
+ for (Loop *OldParent = Unloop->getParentLoop(); OldParent != NewParent;
+ OldParent = OldParent->getParentLoop()) {
+ assert(OldParent && "new loop is not an ancestor of the original");
+ OldParent->removeBlockFromLoop(*BI);
+ }
+ }
+}
+
+/// updateSubloopParents - Update the parent loop for all subloops directly
+/// nested within unloop.
+void UnloopUpdater::updateSubloopParents() {
+ while (!Unloop->empty()) {
+ Loop *Subloop = *llvm::prior(Unloop->end());
+ Unloop->removeChildLoop(llvm::prior(Unloop->end()));
+
+ assert(SubloopParents.count(Subloop) && "DFS failed to visit subloop");
+ if (SubloopParents[Subloop])
+ SubloopParents[Subloop]->addChildLoop(Subloop);
+ else
+ LI->addTopLevelLoop(Subloop);
+ }
+}
+
+/// getNearestLoop - Return the nearest parent loop among this block's
+/// successors. If a successor is a subloop header, consider its parent to be
+/// the nearest parent of the subloop's exits.
+///
+/// For subloop blocks, simply update SubloopParents and return NULL.
+Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) {
+
+ // Initially for blocks directly contained by Unloop, NearLoop == Unloop and
+ // is considered uninitialized.
+ Loop *NearLoop = BBLoop;
+
+ Loop *Subloop = 0;
+ if (NearLoop != Unloop && Unloop->contains(NearLoop)) {
+ Subloop = NearLoop;
+ // Find the subloop ancestor that is directly contained within Unloop.
+ while (Subloop->getParentLoop() != Unloop) {
+ Subloop = Subloop->getParentLoop();
+ assert(Subloop && "subloop is not an ancestor of the original loop");
+ }
+ // Get the current nearest parent of the Subloop exits, initially Unloop.
+ if (!SubloopParents.count(Subloop))
+ SubloopParents[Subloop] = Unloop;
+ NearLoop = SubloopParents[Subloop];
+ }
+
+ succ_iterator I = succ_begin(BB), E = succ_end(BB);
+ if (I == E) {
+ assert(!Subloop && "subloop blocks must have a successor");
+ NearLoop = 0; // unloop blocks may now exit the function.
+ }
+ for (; I != E; ++I) {
+ if (*I == BB)
+ continue; // self loops are uninteresting
+
+ Loop *L = LI->getLoopFor(*I);
+ if (L == Unloop) {
+ // This successor has not been processed. This path must lead to an
+ // irreducible backedge.
+ assert((FoundIB || !DFS.hasPostorder(*I)) && "should have seen IB");
+ FoundIB = true;
+ }
+ if (L != Unloop && Unloop->contains(L)) {
+ // Successor is in a subloop.
+ if (Subloop)
+ continue; // Branching within subloops. Ignore it.
+
+ // BB branches from the original into a subloop header.
+ assert(L->getParentLoop() == Unloop && "cannot skip into nested loops");
+
+ // Get the current nearest parent of the Subloop's exits.
+ L = SubloopParents[L];
+ // L could be Unloop if the only exit was an irreducible backedge.
+ }
+ if (L == Unloop) {
+ continue;
+ }
+ // Handle critical edges from Unloop into a sibling loop.
+ if (L && !L->contains(Unloop)) {
+ L = L->getParentLoop();
+ }
+ // Remember the nearest parent loop among successors or subloop exits.
+ if (NearLoop == Unloop || !NearLoop || NearLoop->contains(L))
+ NearLoop = L;
+ }
+ if (Subloop) {
+ SubloopParents[Subloop] = NearLoop;
+ return BBLoop;
+ }
+ return NearLoop;
+}
+
+//===----------------------------------------------------------------------===//
// LoopInfo implementation
//
bool LoopInfo::runOnFunction(Function &) {
@@ -391,6 +594,68 @@ bool LoopInfo::runOnFunction(Function &) {
return false;
}
+/// updateUnloop - The last backedge has been removed from a loop--now the
+/// "unloop". Find a new parent for the blocks contained within unloop and
+/// update the loop tree. We don't necessarily have valid dominators at this
+/// point, but LoopInfo is still valid except for the removal of this loop.
+///
+/// Note that Unloop may now be an empty loop. Calling Loop::getHeader without
+/// checking first is illegal.
+void LoopInfo::updateUnloop(Loop *Unloop) {
+
+ // First handle the special case of no parent loop to simplify the algorithm.
+ if (!Unloop->getParentLoop()) {
+ // Since BBLoop had no parent, Unloop blocks are no longer in a loop.
+ for (Loop::block_iterator I = Unloop->block_begin(),
+ E = Unloop->block_end(); I != E; ++I) {
+
+ // Don't reparent blocks in subloops.
+ if (getLoopFor(*I) != Unloop)
+ continue;
+
+ // Blocks no longer have a parent but are still referenced by Unloop until
+ // the Unloop object is deleted.
+ LI.changeLoopFor(*I, 0);
+ }
+
+ // Remove the loop from the top-level LoopInfo object.
+ for (LoopInfo::iterator I = LI.begin();; ++I) {
+ assert(I != LI.end() && "Couldn't find loop");
+ if (*I == Unloop) {
+ LI.removeLoop(I);
+ break;
+ }
+ }
+
+ // Move all of the subloops to the top-level.
+ while (!Unloop->empty())
+ LI.addTopLevelLoop(Unloop->removeChildLoop(llvm::prior(Unloop->end())));
+
+ return;
+ }
+
+ // Update the parent loop for all blocks within the loop. Blocks within
+ // subloops will not change parents.
+ UnloopUpdater Updater(Unloop, this);
+ Updater.updateBlockParents();
+
+ // Remove blocks from former ancestor loops.
+ Updater.removeBlocksFromAncestors();
+
+ // Add direct subloops as children in their new parent loop.
+ Updater.updateSubloopParents();
+
+ // Remove unloop from its parent loop.
+ Loop *ParentLoop = Unloop->getParentLoop();
+ for (Loop::iterator I = ParentLoop->begin();; ++I) {
+ assert(I != ParentLoop->end() && "Couldn't find loop");
+ if (*I == Unloop) {
+ ParentLoop->removeChildLoop(I);
+ break;
+ }
+ }
+}
+
void LoopInfo::verifyAnalysis() const {
// LoopInfo is a FunctionPass, but verifying every loop in the function
// each time verifyAnalysis is called is very expensive. The
@@ -400,12 +665,21 @@ void LoopInfo::verifyAnalysis() const {
if (!VerifyLoopInfo) return;
+ DenseSet<const Loop*> Loops;
for (iterator I = begin(), E = end(); I != E; ++I) {
assert(!(*I)->getParentLoop() && "Top-level loop has a parent!");
- (*I)->verifyLoopNest();
+ (*I)->verifyLoopNest(&Loops);
}
- // TODO: check BBMap consistency.
+ // Verify that blocks are mapped to valid loops.
+ //
+ // FIXME: With an up-to-date DFS (see LoopIterator.h) and DominatorTree, we
+ // could also verify that the blocks are still in the correct loops.
+ for (DenseMap<BasicBlock*, Loop*>::const_iterator I = LI.BBMap.begin(),
+ E = LI.BBMap.end(); I != E; ++I) {
+ assert(Loops.count(I->second) && "orphaned loop");
+ assert(I->second->contains(I->first) && "orphaned block");
+ }
}
void LoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -417,3 +691,15 @@ void LoopInfo::print(raw_ostream &OS, const Module*) const {
LI.print(OS);
}
+//===----------------------------------------------------------------------===//
+// LoopBlocksDFS implementation
+//
+
+/// Traverse the loop blocks and store the DFS result.
+/// Useful for clients that just want the final DFS result and don't need to
+/// visit blocks during the initial traversal.
+void LoopBlocksDFS::perform(LoopInfo *LI) {
+ LoopBlocksTraversal Traversal(*this, LI);
+ for (LoopBlocksTraversal::POTIterator POI = Traversal.begin(),
+ POE = Traversal.end(); POI != POE; ++POI) ;
+}
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index 10e3f29..5ba1f40 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -59,9 +59,9 @@ char PrintLoopPass::ID = 0;
static DebugInfoProbeInfo *TheDebugProbe;
static void createDebugInfoProbe() {
if (TheDebugProbe) return;
-
- // Constructed the first time this is called. This guarantees that the
- // object will be constructed, if -enable-debug-info-probe is set,
+
+ // Constructed the first time this is called. This guarantees that the
+ // object will be constructed, if -enable-debug-info-probe is set,
// before static globals, thus it will be destroyed before them.
static ManagedStatic<DebugInfoProbeInfo> DIP;
TheDebugProbe = &*DIP;
@@ -73,73 +73,29 @@ static void createDebugInfoProbe() {
char LPPassManager::ID = 0;
-LPPassManager::LPPassManager(int Depth)
- : FunctionPass(ID), PMDataManager(Depth) {
+LPPassManager::LPPassManager()
+ : FunctionPass(ID), PMDataManager() {
skipThisLoop = false;
redoThisLoop = false;
LI = NULL;
CurrentLoop = NULL;
}
-/// Delete loop from the loop queue and loop hierarchy (LoopInfo).
+/// Delete loop from the loop queue and loop hierarchy (LoopInfo).
void LPPassManager::deleteLoopFromQueue(Loop *L) {
- if (Loop *ParentLoop = L->getParentLoop()) { // Not a top-level loop.
- // Reparent all of the blocks in this loop. Since BBLoop had a parent,
- // they are now all in it.
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
- I != E; ++I)
- if (LI->getLoopFor(*I) == L) // Don't change blocks in subloops.
- LI->changeLoopFor(*I, ParentLoop);
-
- // Remove the loop from its parent loop.
- for (Loop::iterator I = ParentLoop->begin(), E = ParentLoop->end();;
- ++I) {
- assert(I != E && "Couldn't find loop");
- if (*I == L) {
- ParentLoop->removeChildLoop(I);
- break;
- }
- }
-
- // Move all subloops into the parent loop.
- while (!L->empty())
- ParentLoop->addChildLoop(L->removeChildLoop(L->end()-1));
- } else {
- // Reparent all of the blocks in this loop. Since BBLoop had no parent,
- // they no longer in a loop at all.
-
- for (unsigned i = 0; i != L->getBlocks().size(); ++i) {
- // Don't change blocks in subloops.
- if (LI->getLoopFor(L->getBlocks()[i]) == L) {
- LI->removeBlock(L->getBlocks()[i]);
- --i;
- }
- }
-
- // Remove the loop from the top-level LoopInfo object.
- for (LoopInfo::iterator I = LI->begin(), E = LI->end();; ++I) {
- assert(I != E && "Couldn't find loop");
- if (*I == L) {
- LI->removeLoop(I);
- break;
- }
- }
-
- // Move all of the subloops to the top-level.
- while (!L->empty())
- LI->addTopLevelLoop(L->removeChildLoop(L->end()-1));
- }
-
- delete L;
+ LI->updateUnloop(L);
// If L is current loop then skip rest of the passes and let
// runOnFunction remove L from LQ. Otherwise, remove L from LQ now
// and continue applying other passes on CurrentLoop.
- if (CurrentLoop == L) {
+ if (CurrentLoop == L)
skipThisLoop = true;
+
+ delete L;
+
+ if (skipThisLoop)
return;
- }
for (std::deque<Loop *>::iterator I = LQ.begin(),
E = LQ.end(); I != E; ++I) {
@@ -166,10 +122,10 @@ void LPPassManager::insertLoop(Loop *L, Loop *ParentLoop) {
void LPPassManager::insertLoopIntoQueue(Loop *L) {
// Insert L into loop queue
- if (L == CurrentLoop)
+ if (L == CurrentLoop)
redoLoop(L);
else if (!L->getParentLoop())
- // This is top level loop.
+ // This is top level loop.
LQ.push_front(L);
else {
// Insert L after the parent loop.
@@ -195,9 +151,9 @@ void LPPassManager::redoLoop(Loop *L) {
/// cloneBasicBlockSimpleAnalysis - Invoke cloneBasicBlockAnalysis hook for
/// all loop passes.
-void LPPassManager::cloneBasicBlockSimpleAnalysis(BasicBlock *From,
+void LPPassManager::cloneBasicBlockSimpleAnalysis(BasicBlock *From,
BasicBlock *To, Loop *L) {
- for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
LoopPass *LP = getContainedPass(Index);
LP->cloneBasicBlockAnalysis(From, To, L);
}
@@ -206,13 +162,13 @@ void LPPassManager::cloneBasicBlockSimpleAnalysis(BasicBlock *From,
/// deleteSimpleAnalysisValue - Invoke deleteAnalysisValue hook for all passes.
void LPPassManager::deleteSimpleAnalysisValue(Value *V, Loop *L) {
if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) {
- for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;
++BI) {
Instruction &I = *BI;
deleteSimpleAnalysisValue(&I, L);
}
}
- for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
LoopPass *LP = getContainedPass(Index);
LP->deleteAnalysisValue(V, L);
}
@@ -228,7 +184,7 @@ static void addLoopIntoQueue(Loop *L, std::deque<Loop *> &LQ) {
/// Pass Manager itself does not invalidate any analysis info.
void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
- // LPPassManager needs LoopInfo. In the long term LoopInfo class will
+ // LPPassManager needs LoopInfo. In the long term LoopInfo class will
// become part of LPPassManager.
Info.addRequired<LoopInfo>();
Info.setPreservesAll();
@@ -255,7 +211,7 @@ bool LPPassManager::runOnFunction(Function &F) {
for (std::deque<Loop *>::const_iterator I = LQ.begin(), E = LQ.end();
I != E; ++I) {
Loop *L = *I;
- for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
LoopPass *P = getContainedPass(Index);
Changed |= P->doInitialization(L, *this);
}
@@ -263,13 +219,13 @@ bool LPPassManager::runOnFunction(Function &F) {
// Walk Loops
while (!LQ.empty()) {
-
+
CurrentLoop = LQ.back();
skipThisLoop = false;
redoThisLoop = false;
// Run all passes on the current Loop.
- for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
LoopPass *P = getContainedPass(Index);
dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG,
CurrentLoop->getHeader()->getName());
@@ -319,23 +275,23 @@ bool LPPassManager::runOnFunction(Function &F) {
// Do not run other passes on this loop.
break;
}
-
+
// If the loop was deleted, release all the loop passes. This frees up
// some memory, and avoids trouble with the pass manager trying to call
// verifyAnalysis on them.
if (skipThisLoop)
- for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
Pass *P = getContainedPass(Index);
freePass(P, "<deleted>", ON_LOOP_MSG);
}
// Pop the loop from queue after running all passes.
LQ.pop_back();
-
+
if (redoThisLoop)
LQ.push_back(CurrentLoop);
}
-
+
// Finalization
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
LoopPass *P = getContainedPass(Index);
@@ -372,7 +328,7 @@ Pass *LoopPass::createPrinterPass(raw_ostream &O,
// LPPassManger as expected.
void LoopPass::preparePassManager(PMStack &PMS) {
- // Find LPPassManager
+ // Find LPPassManager
while (!PMS.empty() &&
PMS.top()->getPassManagerType() > PMT_LoopPassManager)
PMS.pop();
@@ -381,14 +337,14 @@ void LoopPass::preparePassManager(PMStack &PMS) {
// by other passes that are managed by LPM then do not insert
// this pass in current LPM. Use new LPPassManager.
if (PMS.top()->getPassManagerType() == PMT_LoopPassManager &&
- !PMS.top()->preserveHigherLevelAnalysis(this))
+ !PMS.top()->preserveHigherLevelAnalysis(this))
PMS.pop();
}
/// Assign pass manager to manage this pass.
void LoopPass::assignPassManager(PMStack &PMS,
PassManagerType PreferredType) {
- // Find LPPassManager
+ // Find LPPassManager
while (!PMS.empty() &&
PMS.top()->getPassManagerType() > PMT_LoopPassManager)
PMS.pop();
@@ -397,12 +353,12 @@ void LoopPass::assignPassManager(PMStack &PMS,
if (PMS.top()->getPassManagerType() == PMT_LoopPassManager)
LPPM = (LPPassManager*)PMS.top();
else {
- // Create new Loop Pass Manager if it does not exist.
+ // Create new Loop Pass Manager if it does not exist.
assert (!PMS.empty() && "Unable to create Loop Pass Manager");
PMDataManager *PMD = PMS.top();
- // [1] Create new Call Graph Pass Manager
- LPPM = new LPPassManager(PMD->getDepth() + 1);
+ // [1] Create new Loop Pass Manager
+ LPPM = new LPPassManager();
LPPM->populateInheritedAnalysis(PMS);
// [2] Set up new manager's top level manager
diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp
index 2283db0..fde07ea 100644
--- a/lib/Analysis/MemDepPrinter.cpp
+++ b/lib/Analysis/MemDepPrinter.cpp
@@ -25,8 +25,17 @@ namespace {
struct MemDepPrinter : public FunctionPass {
const Function *F;
- typedef PointerIntPair<const Instruction *, 1> InstAndClobberFlag;
- typedef std::pair<InstAndClobberFlag, const BasicBlock *> Dep;
+ enum DepType {
+ Clobber = 0,
+ Def,
+ NonFuncLocal,
+ Unknown
+ };
+
+ static const char* DepTypeStr[];
+
+ typedef PointerIntPair<const Instruction *, 2, DepType> InstTypePair;
+ typedef std::pair<InstTypePair, const BasicBlock *> Dep;
typedef SmallSetVector<Dep, 4> DepSet;
typedef DenseMap<const Instruction *, DepSet> DepSetMap;
DepSetMap Deps;
@@ -50,6 +59,21 @@ namespace {
Deps.clear();
F = 0;
}
+
+ private:
+ static InstTypePair getInstTypePair(MemDepResult dep) {
+ if (dep.isClobber())
+ return InstTypePair(dep.getInst(), Clobber);
+ if (dep.isDef())
+ return InstTypePair(dep.getInst(), Def);
+ if (dep.isNonFuncLocal())
+ return InstTypePair(dep.getInst(), NonFuncLocal);
+ assert(dep.isUnknown() && "unexptected dependence type");
+ return InstTypePair(dep.getInst(), Unknown);
+ }
+ static InstTypePair getInstTypePair(const Instruction* inst, DepType type) {
+ return InstTypePair(inst, type);
+ }
};
}
@@ -64,6 +88,9 @@ FunctionPass *llvm::createMemDepPrinter() {
return new MemDepPrinter();
}
+const char* MemDepPrinter::DepTypeStr[]
+ = {"Clobber", "Def", "NonFuncLocal", "Unknown"};
+
bool MemDepPrinter::runOnFunction(Function &F) {
this->F = &F;
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
@@ -79,10 +106,7 @@ bool MemDepPrinter::runOnFunction(Function &F) {
MemDepResult Res = MDA.getDependency(Inst);
if (!Res.isNonLocal()) {
- assert((Res.isUnknown() || Res.isClobber() || Res.isDef()) &&
- "Local dep should be unknown, def or clobber!");
- Deps[Inst].insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
- Res.isClobber()),
+ Deps[Inst].insert(std::make_pair(getInstTypePair(Res),
static_cast<BasicBlock *>(0)));
} else if (CallSite CS = cast<Value>(Inst)) {
const MemoryDependenceAnalysis::NonLocalDepInfo &NLDI =
@@ -92,22 +116,26 @@ bool MemDepPrinter::runOnFunction(Function &F) {
for (MemoryDependenceAnalysis::NonLocalDepInfo::const_iterator
I = NLDI.begin(), E = NLDI.end(); I != E; ++I) {
const MemDepResult &Res = I->getResult();
- assert((Res.isUnknown() || Res.isClobber() || Res.isDef()) &&
- "Resolved non-local call dep should be unknown, def or "
- "clobber!");
- InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
- Res.isClobber()),
- I->getBB()));
+ InstDeps.insert(std::make_pair(getInstTypePair(Res), I->getBB()));
}
} else {
SmallVector<NonLocalDepResult, 4> NLDI;
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
- // FIXME: Volatile is not handled properly here.
+ if (!LI->isUnordered()) {
+ // FIXME: Handle atomic/volatile loads.
+ Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown),
+ static_cast<BasicBlock *>(0)));
+ continue;
+ }
AliasAnalysis::Location Loc = AA.getLocation(LI);
- MDA.getNonLocalPointerDependency(Loc, !LI->isVolatile(),
- LI->getParent(), NLDI);
+ MDA.getNonLocalPointerDependency(Loc, true, LI->getParent(), NLDI);
} else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- // FIXME: Volatile is not handled properly here.
+ if (!LI->isUnordered()) {
+ // FIXME: Handle atomic/volatile stores.
+ Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown),
+ static_cast<BasicBlock *>(0)));
+ continue;
+ }
AliasAnalysis::Location Loc = AA.getLocation(SI);
MDA.getNonLocalPointerDependency(Loc, false, SI->getParent(), NLDI);
} else if (VAArgInst *VI = dyn_cast<VAArgInst>(Inst)) {
@@ -121,11 +149,7 @@ bool MemDepPrinter::runOnFunction(Function &F) {
for (SmallVectorImpl<NonLocalDepResult>::const_iterator
I = NLDI.begin(), E = NLDI.end(); I != E; ++I) {
const MemDepResult &Res = I->getResult();
- assert(Res.isClobber() != Res.isDef() &&
- "Resolved non-local pointer dep should be def or clobber!");
- InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
- Res.isClobber()),
- I->getBB()));
+ InstDeps.insert(std::make_pair(getInstTypePair(Res), I->getBB()));
}
}
}
@@ -146,26 +170,18 @@ void MemDepPrinter::print(raw_ostream &OS, const Module *M) const {
for (DepSet::const_iterator I = InstDeps.begin(), E = InstDeps.end();
I != E; ++I) {
const Instruction *DepInst = I->first.getPointer();
- bool isClobber = I->first.getInt();
+ DepType type = I->first.getInt();
const BasicBlock *DepBB = I->second;
OS << " ";
- if (!DepInst)
- OS << "Unknown";
- else if (isClobber)
- OS << "Clobber";
- else
- OS << " Def";
+ OS << DepTypeStr[type];
if (DepBB) {
OS << " in block ";
WriteAsOperand(OS, DepBB, /*PrintType=*/false, M);
}
if (DepInst) {
OS << " from: ";
- if (DepInst == Inst)
- OS << "<unspecified>";
- else
- DepInst->print(OS);
+ DepInst->print(OS);
}
OS << "\n";
}
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 53d4304..8d451c4 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -47,7 +47,7 @@ static bool isMallocCall(const CallInst *CI) {
// Check malloc prototype.
// FIXME: workaround for PR5130, this will be obsolete when a nobuiltin
// attribute will exist.
- const FunctionType *FTy = Callee->getFunctionType();
+ FunctionType *FTy = Callee->getFunctionType();
if (FTy->getNumParams() != 1)
return false;
return FTy->getParamType(0)->isIntegerTy(32) ||
@@ -94,12 +94,12 @@ static Value *computeArraySize(const CallInst *CI, const TargetData *TD,
return NULL;
// The size of the malloc's result type must be known to determine array size.
- const Type *T = getMallocAllocatedType(CI);
+ Type *T = getMallocAllocatedType(CI);
if (!T || !T->isSized() || !TD)
return NULL;
unsigned ElementSize = TD->getTypeAllocSize(T);
- if (const StructType *ST = dyn_cast<StructType>(T))
+ if (StructType *ST = dyn_cast<StructType>(T))
ElementSize = TD->getStructLayout(ST)->getSizeInBytes();
// If malloc call's arg can be determined to be a multiple of ElementSize,
@@ -133,10 +133,10 @@ const CallInst *llvm::isArrayMalloc(const Value *I, const TargetData *TD) {
/// 0: PointerType is the calls' return type.
/// 1: PointerType is the bitcast's result type.
/// >1: Unique PointerType cannot be determined, return NULL.
-const PointerType *llvm::getMallocType(const CallInst *CI) {
+PointerType *llvm::getMallocType(const CallInst *CI) {
assert(isMalloc(CI) && "getMallocType and not malloc call");
- const PointerType *MallocType = NULL;
+ PointerType *MallocType = NULL;
unsigned NumOfBitCastUses = 0;
// Determine if CallInst has a bitcast use.
@@ -164,8 +164,8 @@ const PointerType *llvm::getMallocType(const CallInst *CI) {
/// 0: PointerType is the malloc calls' return type.
/// 1: PointerType is the bitcast's result type.
/// >1: Unique PointerType cannot be determined, return NULL.
-const Type *llvm::getMallocAllocatedType(const CallInst *CI) {
- const PointerType *PT = getMallocType(CI);
+Type *llvm::getMallocAllocatedType(const CallInst *CI) {
+ PointerType *PT = getMallocType(CI);
return PT ? PT->getElementType() : NULL;
}
@@ -201,7 +201,7 @@ const CallInst *llvm::isFreeCall(const Value *I) {
// Check free prototype.
// FIXME: workaround for PR5130, this will be obsolete when a nobuiltin
// attribute will exist.
- const FunctionType *FTy = Callee->getFunctionType();
+ FunctionType *FTy = Callee->getFunctionType();
if (!FTy->getReturnType()->isVoidTy())
return 0;
if (FTy->getNumParams() != 1)
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index bba4482..92967c0 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -120,21 +120,27 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
AliasAnalysis::Location &Loc,
AliasAnalysis *AA) {
if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
- if (LI->isVolatile()) {
- Loc = AliasAnalysis::Location();
+ if (LI->isUnordered()) {
+ Loc = AA->getLocation(LI);
+ return AliasAnalysis::Ref;
+ } else if (LI->getOrdering() == Monotonic) {
+ Loc = AA->getLocation(LI);
return AliasAnalysis::ModRef;
}
- Loc = AA->getLocation(LI);
- return AliasAnalysis::Ref;
+ Loc = AliasAnalysis::Location();
+ return AliasAnalysis::ModRef;
}
if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- if (SI->isVolatile()) {
- Loc = AliasAnalysis::Location();
+ if (SI->isUnordered()) {
+ Loc = AA->getLocation(SI);
+ return AliasAnalysis::Mod;
+ } else if (SI->getOrdering() == Monotonic) {
+ Loc = AA->getLocation(SI);
return AliasAnalysis::ModRef;
}
- Loc = AA->getLocation(SI);
- return AliasAnalysis::Mod;
+ Loc = AliasAnalysis::Location();
+ return AliasAnalysis::ModRef;
}
if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
@@ -232,7 +238,7 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
// unknown, otherwise it is non-local.
if (BB != &BB->getParent()->getEntryBlock())
return MemDepResult::getNonLocal();
- return MemDepResult::getUnknown();
+ return MemDepResult::getNonFuncLocal();
}
/// isLoadLoadClobberIfExtendedToFullWidth - Return true if LI is a load that
@@ -270,8 +276,8 @@ unsigned MemoryDependenceAnalysis::
getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
unsigned MemLocSize, const LoadInst *LI,
const TargetData &TD) {
- // We can only extend non-volatile integer loads.
- if (!isa<IntegerType>(LI->getType()) || LI->isVolatile()) return 0;
+ // We can only extend simple integer loads.
+ if (!isa<IntegerType>(LI->getType()) || !LI->isSimple()) return 0;
// Get the base of this load.
int64_t LIOffs = 0;
@@ -369,6 +375,11 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
// Values depend on loads if the pointers are must aliased. This means that
// a load depends on another must aliased load from the same value.
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ // Atomic loads have complications involved.
+ // FIXME: This is overly conservative.
+ if (!LI->isUnordered())
+ return MemDepResult::getClobber(LI);
+
AliasAnalysis::Location LoadLoc = AA->getLocation(LI);
// If we found a pointer, check if it could be the same as our pointer.
@@ -382,7 +393,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
// location is 1 byte at P+1). If so, return it as a load/load
// clobber result, allowing the client to decide to widen the load if
// it wants to.
- if (const IntegerType *ITy = dyn_cast<IntegerType>(LI->getType()))
+ if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType()))
if (LI->getAlignment()*8 > ITy->getPrimitiveSizeInBits() &&
isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase,
MemLocOffset, LI, TD))
@@ -424,6 +435,11 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
}
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ // Atomic stores have complications involved.
+ // FIXME: This is overly conservative.
+ if (!SI->isUnordered())
+ return MemDepResult::getClobber(SI);
+
// If alias analysis can tell that this store is guaranteed to not modify
// the query pointer, ignore it. Use getModRefInfo to handle cases where
// the query pointer points to constant memory etc.
@@ -483,7 +499,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
// unknown, otherwise it is non-local.
if (BB != &BB->getParent()->getEntryBlock())
return MemDepResult::getNonLocal();
- return MemDepResult::getUnknown();
+ return MemDepResult::getNonFuncLocal();
}
/// getDependency - Return the instruction on which a memory operation
@@ -516,7 +532,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
if (QueryParent != &QueryParent->getParent()->getEntryBlock())
LocalCache = MemDepResult::getNonLocal();
else
- LocalCache = MemDepResult::getUnknown();
+ LocalCache = MemDepResult::getNonFuncLocal();
} else {
AliasAnalysis::Location MemLoc;
AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA);
@@ -672,7 +688,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
// a clobber, otherwise it is unknown.
Dep = MemDepResult::getNonLocal();
} else {
- Dep = MemDepResult::getUnknown();
+ Dep = MemDepResult::getNonFuncLocal();
}
// If we had a dirty entry for the block, update it. Otherwise, just add
@@ -790,7 +806,7 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
// If the block has a dependency (i.e. it isn't completely transparent to
// the value), remember the reverse association because we just added it
// to Cache!
- if (Dep.isNonLocal() || Dep.isUnknown())
+ if (!Dep.isDef() && !Dep.isClobber())
return Dep;
// Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index 70dcd0d..7e22ddc 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -228,7 +228,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
return GEP;
// Simplify the GEP to handle 'gep x, 0' -> x etc.
- if (Value *V = SimplifyGEPInst(&GEPOps[0], GEPOps.size(), TD, DT)) {
+ if (Value *V = SimplifyGEPInst(GEPOps, TD, DT)) {
for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
RemoveInstInputs(GEPOps[i], InstInputs);
@@ -407,9 +407,9 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
}
GetElementPtrInst *Result =
- GetElementPtrInst::Create(GEPOps[0], GEPOps.begin()+1, GEPOps.end(),
- InVal->getName()+".phi.trans.insert",
- PredBB->getTerminator());
+ GetElementPtrInst::Create(GEPOps[0], makeArrayRef(GEPOps).slice(1),
+ InVal->getName()+".phi.trans.insert",
+ PredBB->getTerminator());
Result->setIsInBounds(GEP->isInBounds());
NewInsts.push_back(Result);
return Result;
diff --git a/lib/Analysis/PathNumbering.cpp b/lib/Analysis/PathNumbering.cpp
index 7c584da..0e3b6e6 100644
--- a/lib/Analysis/PathNumbering.cpp
+++ b/lib/Analysis/PathNumbering.cpp
@@ -387,7 +387,7 @@ void BallLarusDag::buildNode(BLBlockNodeMap& inDag, BLNodeStack& dfsStack) {
TerminatorInst* terminator = currentNode->getBlock()->getTerminator();
if(isa<ReturnInst>(terminator) || isa<UnreachableInst>(terminator)
- || isa<UnwindInst>(terminator))
+ || isa<ResumeInst>(terminator) || isa<UnwindInst>(terminator))
addEdge(currentNode, getExit(),0);
currentNode->setColor(BallLarusNode::GRAY);
diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp
index 80eda79..3a3529b 100644
--- a/lib/Analysis/RegionPass.cpp
+++ b/lib/Analysis/RegionPass.cpp
@@ -27,8 +27,8 @@ using namespace llvm;
char RGPassManager::ID = 0;
-RGPassManager::RGPassManager(int Depth)
- : FunctionPass(ID), PMDataManager(Depth) {
+RGPassManager::RGPassManager()
+ : FunctionPass(ID), PMDataManager() {
skipThisRegion = false;
redoThisRegion = false;
RI = NULL;
@@ -250,7 +250,7 @@ void RegionPass::assignPassManager(PMStack &PMS,
PMDataManager *PMD = PMS.top();
// [1] Create new Region Pass Manager
- RGPM = new RGPassManager(PMD->getDepth() + 1);
+ RGPM = new RGPassManager();
RGPM->populateInheritedAnalysis(PMS);
// [2] Set up new manager's top level manager
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 025718e..e0ac56c 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -197,7 +197,7 @@ void SCEV::print(raw_ostream &OS) const {
}
case scUnknown: {
const SCEVUnknown *U = cast<SCEVUnknown>(this);
- const Type *AllocTy;
+ Type *AllocTy;
if (U->isSizeOf(AllocTy)) {
OS << "sizeof(" << *AllocTy << ")";
return;
@@ -207,7 +207,7 @@ void SCEV::print(raw_ostream &OS) const {
return;
}
- const Type *CTy;
+ Type *CTy;
Constant *FieldNo;
if (U->isOffsetOf(CTy, FieldNo)) {
OS << "offsetof(" << *CTy << ", ";
@@ -228,7 +228,7 @@ void SCEV::print(raw_ostream &OS) const {
llvm_unreachable("Unknown SCEV kind!");
}
-const Type *SCEV::getType() const {
+Type *SCEV::getType() const {
switch (getSCEVType()) {
case scConstant:
return cast<SCEVConstant>(this)->getType();
@@ -297,17 +297,17 @@ const SCEV *ScalarEvolution::getConstant(const APInt& Val) {
}
const SCEV *
-ScalarEvolution::getConstant(const Type *Ty, uint64_t V, bool isSigned) {
- const IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
+ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) {
+ IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
return getConstant(ConstantInt::get(ITy, V, isSigned));
}
SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID,
- unsigned SCEVTy, const SCEV *op, const Type *ty)
+ unsigned SCEVTy, const SCEV *op, Type *ty)
: SCEV(ID, SCEVTy), Op(op), Ty(ty) {}
SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
- const SCEV *op, const Type *ty)
+ const SCEV *op, Type *ty)
: SCEVCastExpr(ID, scTruncate, op, ty) {
assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
(Ty->isIntegerTy() || Ty->isPointerTy()) &&
@@ -315,7 +315,7 @@ SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
}
SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
- const SCEV *op, const Type *ty)
+ const SCEV *op, Type *ty)
: SCEVCastExpr(ID, scZeroExtend, op, ty) {
assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
(Ty->isIntegerTy() || Ty->isPointerTy()) &&
@@ -323,7 +323,7 @@ SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
}
SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
- const SCEV *op, const Type *ty)
+ const SCEV *op, Type *ty)
: SCEVCastExpr(ID, scSignExtend, op, ty) {
assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
(Ty->isIntegerTy() || Ty->isPointerTy()) &&
@@ -354,7 +354,7 @@ void SCEVUnknown::allUsesReplacedWith(Value *New) {
setValPtr(New);
}
-bool SCEVUnknown::isSizeOf(const Type *&AllocTy) const {
+bool SCEVUnknown::isSizeOf(Type *&AllocTy) const {
if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
if (VCE->getOpcode() == Instruction::PtrToInt)
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
@@ -371,15 +371,15 @@ bool SCEVUnknown::isSizeOf(const Type *&AllocTy) const {
return false;
}
-bool SCEVUnknown::isAlignOf(const Type *&AllocTy) const {
+bool SCEVUnknown::isAlignOf(Type *&AllocTy) const {
if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
if (VCE->getOpcode() == Instruction::PtrToInt)
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
if (CE->getOpcode() == Instruction::GetElementPtr &&
CE->getOperand(0)->isNullValue()) {
- const Type *Ty =
+ Type *Ty =
cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
- if (const StructType *STy = dyn_cast<StructType>(Ty))
+ if (StructType *STy = dyn_cast<StructType>(Ty))
if (!STy->isPacked() &&
CE->getNumOperands() == 3 &&
CE->getOperand(1)->isNullValue()) {
@@ -396,7 +396,7 @@ bool SCEVUnknown::isAlignOf(const Type *&AllocTy) const {
return false;
}
-bool SCEVUnknown::isOffsetOf(const Type *&CTy, Constant *&FieldNo) const {
+bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const {
if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
if (VCE->getOpcode() == Instruction::PtrToInt)
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
@@ -404,7 +404,7 @@ bool SCEVUnknown::isOffsetOf(const Type *&CTy, Constant *&FieldNo) const {
CE->getNumOperands() == 3 &&
CE->getOperand(0)->isNullValue() &&
CE->getOperand(1)->isNullValue()) {
- const Type *Ty =
+ Type *Ty =
cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
// Ignore vector types here so that ScalarEvolutionExpander doesn't
// emit getelementptrs that index into vectors.
@@ -652,7 +652,7 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
/// Assume, K > 0.
static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
ScalarEvolution &SE,
- const Type* ResultTy) {
+ Type *ResultTy) {
// Handle the simplest case efficiently.
if (K == 1)
return SE.getTruncateOrZeroExtend(It, ResultTy);
@@ -742,7 +742,7 @@ static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
MultiplyFactor = MultiplyFactor.trunc(W);
// Calculate the product, at width T+W
- const IntegerType *CalculationTy = IntegerType::get(SE.getContext(),
+ IntegerType *CalculationTy = IntegerType::get(SE.getContext(),
CalculationBits);
const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy);
for (unsigned i = 1; i != K; ++i) {
@@ -790,7 +790,7 @@ const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It,
//===----------------------------------------------------------------------===//
const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
- const Type *Ty) {
+ Type *Ty) {
assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&
"This is not a truncating conversion!");
assert(isSCEVable(Ty) &&
@@ -877,7 +877,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
}
const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
- const Type *Ty) {
+ Type *Ty) {
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
"This is not an extending conversion!");
assert(isSCEVable(Ty) &&
@@ -954,7 +954,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
const SCEV *RecastedMaxBECount =
getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
if (MaxBECount == RecastedMaxBECount) {
- const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
+ Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
// Check whether Start+Step*MaxBECount has no unsigned overflow.
const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step);
const SCEV *Add = getAddExpr(Start, ZMul);
@@ -1062,7 +1062,7 @@ static const SCEV *getOverflowLimitForStep(const SCEV *Step,
// result, the expression "Step + sext(PreIncAR)" is congruent with
// "sext(PostIncAR)"
static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR,
- const Type *Ty,
+ Type *Ty,
ScalarEvolution *SE) {
const Loop *L = AR->getLoop();
const SCEV *Start = AR->getStart();
@@ -1070,14 +1070,26 @@ static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR,
// Check for a simple looking step prior to loop entry.
const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
- if (!SA || SA->getNumOperands() != 2 || SA->getOperand(0) != Step)
+ if (!SA)
+ return 0;
+
+ // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
+ // subtraction is expensive. For this purpose, perform a quick and dirty
+ // difference, by checking for Step in the operand list.
+ SmallVector<const SCEV *, 4> DiffOps;
+ for (SCEVAddExpr::op_iterator I = SA->op_begin(), E = SA->op_end();
+ I != E; ++I) {
+ if (*I != Step)
+ DiffOps.push_back(*I);
+ }
+ if (DiffOps.size() == SA->getNumOperands())
return 0;
// This is a postinc AR. Check for overflow on the preinc recurrence using the
// same three conditions that getSignExtendedExpr checks.
// 1. NSW flags on the step increment.
- const SCEV *PreStart = SA->getOperand(1);
+ const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags());
const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
@@ -1086,7 +1098,7 @@ static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR,
// 2. Direct overflow check on the step operation's expression.
unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
- const Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
+ Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
const SCEV *OperandExtendedStart =
SE->getAddExpr(SE->getSignExtendExpr(PreStart, WideTy),
SE->getSignExtendExpr(Step, WideTy));
@@ -1112,7 +1124,7 @@ static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR,
// Get the normalized sign-extended expression for this AddRec's Start.
static const SCEV *getSignExtendAddRecStart(const SCEVAddRecExpr *AR,
- const Type *Ty,
+ Type *Ty,
ScalarEvolution *SE) {
const SCEV *PreStart = getPreStartForSignExtend(AR, Ty, SE);
if (!PreStart)
@@ -1123,7 +1135,7 @@ static const SCEV *getSignExtendAddRecStart(const SCEVAddRecExpr *AR,
}
const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
- const Type *Ty) {
+ Type *Ty) {
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
"This is not an extending conversion!");
assert(isSCEVable(Ty) &&
@@ -1208,7 +1220,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
const SCEV *RecastedMaxBECount =
getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
if (MaxBECount == RecastedMaxBECount) {
- const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
+ Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
// Check whether Start+Step*MaxBECount has no signed overflow.
const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
const SCEV *Add = getAddExpr(Start, SMul);
@@ -1275,7 +1287,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
/// unspecified bits out to the given type.
///
const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
- const Type *Ty) {
+ Type *Ty) {
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
"This is not an extending conversion!");
assert(isSCEVable(Ty) &&
@@ -1438,7 +1450,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
assert(!Ops.empty() && "Cannot get empty add!");
if (Ops.size() == 1) return Ops[0];
#ifndef NDEBUG
- const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+ Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
"SCEVAddExpr operand types don't match!");
@@ -1488,7 +1500,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
// Okay, check to see if the same value occurs in the operand list more than
// once. If so, merge them together into an multiply expression. Since we
// sorted the list, these values are required to be adjacent.
- const Type *Ty = Ops[0]->getType();
+ Type *Ty = Ops[0]->getType();
bool FoundMatch = false;
for (unsigned i = 0, e = Ops.size(); i != e-1; ++i)
if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2
@@ -1515,8 +1527,8 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
// if the contents of the resulting outer trunc fold to something simple.
for (; Idx < Ops.size() && isa<SCEVTruncateExpr>(Ops[Idx]); ++Idx) {
const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]);
- const Type *DstType = Trunc->getType();
- const Type *SrcType = Trunc->getOperand()->getType();
+ Type *DstType = Trunc->getType();
+ Type *SrcType = Trunc->getOperand()->getType();
SmallVector<const SCEV *, 8> LargeOps;
bool Ok = true;
// Check all the operands to see if they can be represented in the
@@ -1735,7 +1747,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
// If all of the other operands were loop invariant, we are done.
if (Ops.size() == 1) return NewRec;
- // Otherwise, add the folded AddRec by the non-liv parts.
+ // Otherwise, add the folded AddRec by the non-invariant parts.
for (unsigned i = 0;; ++i)
if (Ops[i] == AddRec) {
Ops[i] = NewRec;
@@ -1800,6 +1812,38 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
return S;
}
+static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) {
+ uint64_t k = i*j;
+ if (j > 1 && k / j != i) Overflow = true;
+ return k;
+}
+
+/// Compute the result of "n choose k", the binomial coefficient. If an
+/// intermediate computation overflows, Overflow will be set and the return will
+/// be garbage. Overflow is not cleared on absense of overflow.
+static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) {
+ // We use the multiplicative formula:
+ // n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 .
+ // At each iteration, we take the n-th term of the numeral and divide by the
+ // (k-n)th term of the denominator. This division will always produce an
+ // integral result, and helps reduce the chance of overflow in the
+ // intermediate computations. However, we can still overflow even when the
+ // final result would fit.
+
+ if (n == 0 || n == k) return 1;
+ if (k > n) return 0;
+
+ if (k > n/2)
+ k = n-k;
+
+ uint64_t r = 1;
+ for (uint64_t i = 1; i <= k; ++i) {
+ r = umul_ov(r, n-(i-1), Overflow);
+ r /= i;
+ }
+ return r;
+}
+
/// getMulExpr - Get a canonical multiply expression, or something simpler if
/// possible.
const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
@@ -1809,7 +1853,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
assert(!Ops.empty() && "Cannot get empty mul!");
if (Ops.size() == 1) return Ops[0];
#ifndef NDEBUG
- const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+ Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
"SCEVMulExpr operand types don't match!");
@@ -1960,7 +2004,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// If all of the other operands were loop invariant, we are done.
if (Ops.size() == 1) return NewRec;
- // Otherwise, multiply the folded AddRec by the non-liv parts.
+ // Otherwise, multiply the folded AddRec by the non-invariant parts.
for (unsigned i = 0;; ++i)
if (Ops[i] == AddRec) {
Ops[i] = NewRec;
@@ -1974,31 +2018,65 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// multiplied together. If so, we can fold them.
for (unsigned OtherIdx = Idx+1;
OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
- ++OtherIdx)
+ ++OtherIdx) {
if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
- // F * G, where F = {A,+,B}<L> and G = {C,+,D}<L> -->
- // {A*C,+,F*D + G*B + B*D}<L>
+ // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L>
+ // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [
+ // choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z
+ // ]]],+,...up to x=2n}.
+ // Note that the arguments to choose() are always integers with values
+ // known at compile time, never SCEV objects.
+ //
+ // The implementation avoids pointless extra computations when the two
+ // addrec's are of different length (mathematically, it's equivalent to
+ // an infinite stream of zeros on the right).
+ bool OpsModified = false;
for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
++OtherIdx)
if (const SCEVAddRecExpr *OtherAddRec =
dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
if (OtherAddRec->getLoop() == AddRecLoop) {
- const SCEVAddRecExpr *F = AddRec, *G = OtherAddRec;
- const SCEV *NewStart = getMulExpr(F->getStart(), G->getStart());
- const SCEV *B = F->getStepRecurrence(*this);
- const SCEV *D = G->getStepRecurrence(*this);
- const SCEV *NewStep = getAddExpr(getMulExpr(F, D),
- getMulExpr(G, B),
- getMulExpr(B, D));
- const SCEV *NewAddRec = getAddRecExpr(NewStart, NewStep,
- F->getLoop(),
- SCEV::FlagAnyWrap);
- if (Ops.size() == 2) return NewAddRec;
- Ops[Idx] = AddRec = cast<SCEVAddRecExpr>(NewAddRec);
- Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+ bool Overflow = false;
+ Type *Ty = AddRec->getType();
+ bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
+ SmallVector<const SCEV*, 7> AddRecOps;
+ for (int x = 0, xe = AddRec->getNumOperands() +
+ OtherAddRec->getNumOperands() - 1;
+ x != xe && !Overflow; ++x) {
+ const SCEV *Term = getConstant(Ty, 0);
+ for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
+ uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
+ for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
+ ze = std::min(x+1, (int)OtherAddRec->getNumOperands());
+ z < ze && !Overflow; ++z) {
+ uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow);
+ uint64_t Coeff;
+ if (LargerThan64Bits)
+ Coeff = umul_ov(Coeff1, Coeff2, Overflow);
+ else
+ Coeff = Coeff1*Coeff2;
+ const SCEV *CoeffTerm = getConstant(Ty, Coeff);
+ const SCEV *Term1 = AddRec->getOperand(y-z);
+ const SCEV *Term2 = OtherAddRec->getOperand(z);
+ Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2));
+ }
+ }
+ AddRecOps.push_back(Term);
+ }
+ if (!Overflow) {
+ const SCEV *NewAddRec = getAddRecExpr(AddRecOps,
+ AddRec->getLoop(),
+ SCEV::FlagAnyWrap);
+ if (Ops.size() == 2) return NewAddRec;
+ Ops[Idx] = AddRec = cast<SCEVAddRecExpr>(NewAddRec);
+ Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+ OpsModified = true;
+ }
}
- return getMulExpr(Ops);
+ if (OpsModified)
+ return getMulExpr(Ops);
}
+ }
// Otherwise couldn't fold anything into this recurrence. Move onto the
// next one.
@@ -2042,21 +2120,22 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
// Determine if the division can be folded into the operands of
// its operands.
// TODO: Generalize this to non-constants by using known-bits information.
- const Type *Ty = LHS->getType();
+ Type *Ty = LHS->getType();
unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros();
unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1;
// For non-power-of-two values, effectively round the value up to the
// nearest power of two.
if (!RHSC->getValue()->getValue().isPowerOf2())
++MaxShiftAmt;
- const IntegerType *ExtTy =
+ IntegerType *ExtTy =
IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
- // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
if (const SCEVConstant *Step =
- dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this)))
- if (!Step->getValue()->getValue()
- .urem(RHSC->getValue()->getValue()) &&
+ dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) {
+ // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
+ const APInt &StepInt = Step->getValue()->getValue();
+ const APInt &DivInt = RHSC->getValue()->getValue();
+ if (!StepInt.urem(DivInt) &&
getZeroExtendExpr(AR, ExtTy) ==
getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
getZeroExtendExpr(Step, ExtTy),
@@ -2067,6 +2146,22 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
return getAddRecExpr(Operands, AR->getLoop(),
SCEV::FlagNW);
}
+ /// Get a canonical UDivExpr for a recurrence.
+ /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
+ // We can currently only fold X%N if X is constant.
+ const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
+ if (StartC && !DivInt.urem(StepInt) &&
+ getZeroExtendExpr(AR, ExtTy) ==
+ getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
+ getZeroExtendExpr(Step, ExtTy),
+ AR->getLoop(), SCEV::FlagAnyWrap)) {
+ const APInt &StartInt = StartC->getValue()->getValue();
+ const APInt &StartRem = StartInt.urem(StepInt);
+ if (StartRem != 0)
+ LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step,
+ AR->getLoop(), SCEV::FlagNW);
+ }
+ }
// (A*B)/C --> A*(B/C) if safe and B/C can be folded.
if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
SmallVector<const SCEV *, 4> Operands;
@@ -2151,7 +2246,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
const Loop *L, SCEV::NoWrapFlags Flags) {
if (Operands.size() == 1) return Operands[0];
#ifndef NDEBUG
- const Type *ETy = getEffectiveSCEVType(Operands[0]->getType());
+ Type *ETy = getEffectiveSCEVType(Operands[0]->getType());
for (unsigned i = 1, e = Operands.size(); i != e; ++i)
assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy &&
"SCEVAddRecExpr operand types don't match!");
@@ -2269,7 +2364,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
assert(!Ops.empty() && "Cannot get empty smax!");
if (Ops.size() == 1) return Ops[0];
#ifndef NDEBUG
- const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+ Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
"SCEVSMaxExpr operand types don't match!");
@@ -2373,7 +2468,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
assert(!Ops.empty() && "Cannot get empty umax!");
if (Ops.size() == 1) return Ops[0];
#ifndef NDEBUG
- const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+ Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
"SCEVUMaxExpr operand types don't match!");
@@ -2476,7 +2571,7 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
}
-const SCEV *ScalarEvolution::getSizeOfExpr(const Type *AllocTy) {
+const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) {
// If we have TargetData, we can bypass creating a target-independent
// constant expression and then folding it back into a ConstantInt.
// This is just a compile-time optimization.
@@ -2488,20 +2583,20 @@ const SCEV *ScalarEvolution::getSizeOfExpr(const Type *AllocTy) {
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
C = Folded;
- const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
+ Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
return getTruncateOrZeroExtend(getSCEV(C), Ty);
}
-const SCEV *ScalarEvolution::getAlignOfExpr(const Type *AllocTy) {
+const SCEV *ScalarEvolution::getAlignOfExpr(Type *AllocTy) {
Constant *C = ConstantExpr::getAlignOf(AllocTy);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
C = Folded;
- const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
+ Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
return getTruncateOrZeroExtend(getSCEV(C), Ty);
}
-const SCEV *ScalarEvolution::getOffsetOfExpr(const StructType *STy,
+const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy,
unsigned FieldNo) {
// If we have TargetData, we can bypass creating a target-independent
// constant expression and then folding it back into a ConstantInt.
@@ -2514,17 +2609,17 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(const StructType *STy,
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
C = Folded;
- const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
+ Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
return getTruncateOrZeroExtend(getSCEV(C), Ty);
}
-const SCEV *ScalarEvolution::getOffsetOfExpr(const Type *CTy,
+const SCEV *ScalarEvolution::getOffsetOfExpr(Type *CTy,
Constant *FieldNo) {
Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
C = Folded;
- const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy));
+ Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy));
return getTruncateOrZeroExtend(getSCEV(C), Ty);
}
@@ -2558,14 +2653,14 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) {
/// the SCEV framework. This primarily includes integer types, and it
/// can optionally include pointer types if the ScalarEvolution class
/// has access to target-specific information.
-bool ScalarEvolution::isSCEVable(const Type *Ty) const {
+bool ScalarEvolution::isSCEVable(Type *Ty) const {
// Integers and pointers are always SCEVable.
return Ty->isIntegerTy() || Ty->isPointerTy();
}
/// getTypeSizeInBits - Return the size in bits of the specified type,
/// for which isSCEVable must return true.
-uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const {
+uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
assert(isSCEVable(Ty) && "Type is not SCEVable!");
// If we have a TargetData, use it!
@@ -2586,7 +2681,7 @@ uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const {
/// the given type and which represents how SCEV will treat the given
/// type, for which isSCEVable must return true. For pointer types,
/// this is the pointer-sized integer type.
-const Type *ScalarEvolution::getEffectiveSCEVType(const Type *Ty) const {
+Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
assert(isSCEVable(Ty) && "Type is not SCEVable!");
if (Ty->isIntegerTy())
@@ -2628,7 +2723,7 @@ const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) {
return getConstant(
cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
- const Type *Ty = V->getType();
+ Type *Ty = V->getType();
Ty = getEffectiveSCEVType(Ty);
return getMulExpr(V,
getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))));
@@ -2640,7 +2735,7 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
return getConstant(
cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
- const Type *Ty = V->getType();
+ Type *Ty = V->getType();
Ty = getEffectiveSCEVType(Ty);
const SCEV *AllOnes =
getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)));
@@ -2664,8 +2759,8 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
/// input value to the specified type. If the type must be extended, it is zero
/// extended.
const SCEV *
-ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, const Type *Ty) {
- const Type *SrcTy = V->getType();
+ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty) {
+ Type *SrcTy = V->getType();
assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
(Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot truncate or zero extend with non-integer arguments!");
@@ -2681,8 +2776,8 @@ ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, const Type *Ty) {
/// extended.
const SCEV *
ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
- const Type *Ty) {
- const Type *SrcTy = V->getType();
+ Type *Ty) {
+ Type *SrcTy = V->getType();
assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
(Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot truncate or zero extend with non-integer arguments!");
@@ -2697,8 +2792,8 @@ ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
/// input value to the specified type. If the type must be extended, it is zero
/// extended. The conversion must not be narrowing.
const SCEV *
-ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) {
- const Type *SrcTy = V->getType();
+ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, Type *Ty) {
+ Type *SrcTy = V->getType();
assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
(Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot noop or zero extend with non-integer arguments!");
@@ -2713,8 +2808,8 @@ ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) {
/// input value to the specified type. If the type must be extended, it is sign
/// extended. The conversion must not be narrowing.
const SCEV *
-ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) {
- const Type *SrcTy = V->getType();
+ScalarEvolution::getNoopOrSignExtend(const SCEV *V, Type *Ty) {
+ Type *SrcTy = V->getType();
assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
(Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot noop or sign extend with non-integer arguments!");
@@ -2730,8 +2825,8 @@ ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) {
/// it is extended with unspecified bits. The conversion must not be
/// narrowing.
const SCEV *
-ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) {
- const Type *SrcTy = V->getType();
+ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, Type *Ty) {
+ Type *SrcTy = V->getType();
assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
(Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot noop or any extend with non-integer arguments!");
@@ -2745,8 +2840,8 @@ ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) {
/// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the
/// input value to the specified type. The conversion must not be widening.
const SCEV *
-ScalarEvolution::getTruncateOrNoop(const SCEV *V, const Type *Ty) {
- const Type *SrcTy = V->getType();
+ScalarEvolution::getTruncateOrNoop(const SCEV *V, Type *Ty) {
+ Type *SrcTy = V->getType();
assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
(Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot truncate or noop with non-integer arguments!");
@@ -3032,7 +3127,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
// context.
bool isInBounds = GEP->isInBounds();
- const Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
+ Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
Value *Base = GEP->getOperand(0);
// Don't attempt to analyze GEPs over unsized objects.
if (!cast<PointerType>(Base->getType())->getElementType()->isSized())
@@ -3044,7 +3139,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
I != E; ++I) {
Value *Index = *I;
// Compute the (potentially symbolic) offset in bytes for this index.
- if (const StructType *STy = dyn_cast<StructType>(*GTI++)) {
+ if (StructType *STy = dyn_cast<StructType>(*GTI++)) {
// For a struct, add the member offset.
unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo);
@@ -3244,7 +3339,7 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
// TODO: non-affine addrec
if (AddRec->isAffine()) {
- const Type *Ty = AddRec->getType();
+ Type *Ty = AddRec->getType();
const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
@@ -3396,7 +3491,7 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
// TODO: non-affine addrec
if (AddRec->isAffine()) {
- const Type *Ty = AddRec->getType();
+ Type *Ty = AddRec->getType();
const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
@@ -3503,7 +3598,13 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
AddOps.push_back(Op1);
}
AddOps.push_back(getSCEV(U->getOperand(0)));
- return getAddExpr(AddOps);
+ SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
+ OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(V);
+ if (OBO->hasNoSignedWrap())
+ setFlags(Flags, SCEV::FlagNSW);
+ if (OBO->hasNoUnsignedWrap())
+ setFlags(Flags, SCEV::FlagNUW);
+ return getAddExpr(AddOps, Flags);
}
case Instruction::Mul: {
// See the Add code above.
@@ -3601,9 +3702,9 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
LCI->getValue() == CI->getValue())
if (const SCEVZeroExtendExpr *Z =
dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) {
- const Type *UTy = U->getType();
+ Type *UTy = U->getType();
const SCEV *Z0 = Z->getOperand();
- const Type *Z0Ty = Z0->getType();
+ Type *Z0Ty = Z0->getType();
unsigned Z0TySize = getTypeSizeInBits(Z0Ty);
// If C is a low-bits mask, the zero extend is serving to
@@ -3813,6 +3914,70 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
// Iteration Count Computation Code
//
+/// getSmallConstantTripCount - Returns the maximum trip count of this loop as a
+/// normal unsigned value, if possible. Returns 0 if the trip count is unknown
+/// or not constant. Will also return 0 if the maximum trip count is very large
+/// (>= 2^32)
+unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L,
+ BasicBlock *ExitBlock) {
+ const SCEVConstant *ExitCount =
+ dyn_cast<SCEVConstant>(getExitCount(L, ExitBlock));
+ if (!ExitCount)
+ return 0;
+
+ ConstantInt *ExitConst = ExitCount->getValue();
+
+ // Guard against huge trip counts.
+ if (ExitConst->getValue().getActiveBits() > 32)
+ return 0;
+
+ // In case of integer overflow, this returns 0, which is correct.
+ return ((unsigned)ExitConst->getZExtValue()) + 1;
+}
+
+/// getSmallConstantTripMultiple - Returns the largest constant divisor of the
+/// trip count of this loop as a normal unsigned value, if possible. This
+/// means that the actual trip count is always a multiple of the returned
+/// value (don't forget the trip count could very well be zero as well!).
+///
+/// Returns 1 if the trip count is unknown or not guaranteed to be the
+/// multiple of a constant (which is also the case if the trip count is simply
+/// constant, use getSmallConstantTripCount for that case), Will also return 1
+/// if the trip count is very large (>= 2^32).
+unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L,
+ BasicBlock *ExitBlock) {
+ const SCEV *ExitCount = getExitCount(L, ExitBlock);
+ if (ExitCount == getCouldNotCompute())
+ return 1;
+
+ // Get the trip count from the BE count by adding 1.
+ const SCEV *TCMul = getAddExpr(ExitCount,
+ getConstant(ExitCount->getType(), 1));
+ // FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt
+ // to factor simple cases.
+ if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(TCMul))
+ TCMul = Mul->getOperand(0);
+
+ const SCEVConstant *MulC = dyn_cast<SCEVConstant>(TCMul);
+ if (!MulC)
+ return 1;
+
+ ConstantInt *Result = MulC->getValue();
+
+ // Guard against huge trip counts.
+ if (!Result || Result->getValue().getActiveBits() > 32)
+ return 1;
+
+ return (unsigned)Result->getZExtValue();
+}
+
+// getExitCount - Get the expression for the number of loop iterations for which
+// this loop is guaranteed not to exit via ExitintBlock. Otherwise return
+// SCEVCouldNotCompute.
+const SCEV *ScalarEvolution::getExitCount(Loop *L, BasicBlock *ExitingBlock) {
+ return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
+}
+
/// getBackedgeTakenCount - If the specified loop has a predictable
/// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute
/// object. The backedge-taken count is the number of times the loop header
@@ -3825,14 +3990,14 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
/// hasLoopInvariantBackedgeTakenCount).
///
const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) {
- return getBackedgeTakenInfo(L).Exact;
+ return getBackedgeTakenInfo(L).getExact(this);
}
/// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except
/// return the least SCEV value that is known never to be less than the
/// actual backedge taken count.
const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) {
- return getBackedgeTakenInfo(L).Max;
+ return getBackedgeTakenInfo(L).getMax(this);
}
/// PushLoopPHIs - Push PHI nodes in the header of the given loop
@@ -3849,33 +4014,31 @@ PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
const ScalarEvolution::BackedgeTakenInfo &
ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
- // Initially insert a CouldNotCompute for this loop. If the insertion
+ // Initially insert an invalid entry for this loop. If the insertion
// succeeds, proceed to actually compute a backedge-taken count and
// update the value. The temporary CouldNotCompute value tells SCEV
// code elsewhere that it shouldn't attempt to request a new
// backedge-taken count, which could result in infinite recursion.
std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
- BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute()));
+ BackedgeTakenCounts.insert(std::make_pair(L, BackedgeTakenInfo()));
if (!Pair.second)
return Pair.first->second;
- BackedgeTakenInfo Result = getCouldNotCompute();
- BackedgeTakenInfo Computed = ComputeBackedgeTakenCount(L);
- if (Computed.Exact != getCouldNotCompute()) {
- assert(isLoopInvariant(Computed.Exact, L) &&
- isLoopInvariant(Computed.Max, L) &&
+ // ComputeBackedgeTakenCount may allocate memory for its result. Inserting it
+ // into the BackedgeTakenCounts map transfers ownership. Otherwise, the result
+ // must be cleared in this scope.
+ BackedgeTakenInfo Result = ComputeBackedgeTakenCount(L);
+
+ if (Result.getExact(this) != getCouldNotCompute()) {
+ assert(isLoopInvariant(Result.getExact(this), L) &&
+ isLoopInvariant(Result.getMax(this), L) &&
"Computed backedge-taken count isn't loop invariant for loop!");
++NumTripCountsComputed;
-
- // Update the value in the map.
- Result = Computed;
- } else {
- if (Computed.Max != getCouldNotCompute())
- // Update the value in the map.
- Result = Computed;
- if (isa<PHINode>(L->getHeader()->begin()))
- // Only count loops that have phi nodes as not being computable.
- ++NumTripCountsNotComputed;
+ }
+ else if (Result.getMax(this) == getCouldNotCompute() &&
+ isa<PHINode>(L->getHeader()->begin())) {
+ // Only count loops that have phi nodes as not being computable.
+ ++NumTripCountsNotComputed;
}
// Now that we know more about the trip count for this loop, forget any
@@ -3883,7 +4046,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
// conservative estimates made without the benefit of trip count
// information. This is similar to the code in forgetLoop, except that
// it handles SCEVUnknown PHI nodes specially.
- if (Computed.hasAnyInfo()) {
+ if (Result.hasAnyInfo()) {
SmallVector<Instruction *, 16> Worklist;
PushLoopPHIs(L, Worklist);
@@ -3928,7 +4091,12 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
/// compute a trip count, or if the loop is deleted.
void ScalarEvolution::forgetLoop(const Loop *L) {
// Drop any stored trip count value.
- BackedgeTakenCounts.erase(L);
+ DenseMap<const Loop*, BackedgeTakenInfo>::iterator BTCPos =
+ BackedgeTakenCounts.find(L);
+ if (BTCPos != BackedgeTakenCounts.end()) {
+ BTCPos->second.clear();
+ BackedgeTakenCounts.erase(BTCPos);
+ }
// Drop information about expressions based on loop-header PHIs.
SmallVector<Instruction *, 16> Worklist;
@@ -3984,6 +4152,85 @@ void ScalarEvolution::forgetValue(Value *V) {
}
}
+/// getExact - Get the exact loop backedge taken count considering all loop
+/// exits. If all exits are computable, this is the minimum computed count.
+const SCEV *
+ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
+ // If any exits were not computable, the loop is not computable.
+ if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute();
+
+ // We need at least one computable exit.
+ if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute();
+ assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info");
+
+ const SCEV *BECount = 0;
+ for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
+ ENT != 0; ENT = ENT->getNextExit()) {
+
+ assert(ENT->ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV");
+
+ if (!BECount)
+ BECount = ENT->ExactNotTaken;
+ else
+ BECount = SE->getUMinFromMismatchedTypes(BECount, ENT->ExactNotTaken);
+ }
+ assert(BECount && "Invalid not taken count for loop exit");
+ return BECount;
+}
+
+/// getExact - Get the exact not taken count for this loop exit.
+const SCEV *
+ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
+ ScalarEvolution *SE) const {
+ for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
+ ENT != 0; ENT = ENT->getNextExit()) {
+
+ if (ENT->ExitingBlock == ExitingBlock)
+ return ENT->ExactNotTaken;
+ }
+ return SE->getCouldNotCompute();
+}
+
+/// getMax - Get the max backedge taken count for the loop.
+const SCEV *
+ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const {
+ return Max ? Max : SE->getCouldNotCompute();
+}
+
+/// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
+/// computable exit into a persistent ExitNotTakenInfo array.
+ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
+ SmallVectorImpl< std::pair<BasicBlock *, const SCEV *> > &ExitCounts,
+ bool Complete, const SCEV *MaxCount) : Max(MaxCount) {
+
+ if (!Complete)
+ ExitNotTaken.setIncomplete();
+
+ unsigned NumExits = ExitCounts.size();
+ if (NumExits == 0) return;
+
+ ExitNotTaken.ExitingBlock = ExitCounts[0].first;
+ ExitNotTaken.ExactNotTaken = ExitCounts[0].second;
+ if (NumExits == 1) return;
+
+ // Handle the rare case of multiple computable exits.
+ ExitNotTakenInfo *ENT = new ExitNotTakenInfo[NumExits-1];
+
+ ExitNotTakenInfo *PrevENT = &ExitNotTaken;
+ for (unsigned i = 1; i < NumExits; ++i, PrevENT = ENT, ++ENT) {
+ PrevENT->setNextExit(ENT);
+ ENT->ExitingBlock = ExitCounts[i].first;
+ ENT->ExactNotTaken = ExitCounts[i].second;
+ }
+}
+
+/// clear - Invalidate this result and free the ExitNotTakenInfo array.
+void ScalarEvolution::BackedgeTakenInfo::clear() {
+ ExitNotTaken.ExitingBlock = 0;
+ ExitNotTaken.ExactNotTaken = 0;
+ delete[] ExitNotTaken.getNextExit();
+}
+
/// ComputeBackedgeTakenCount - Compute the number of times the backedge
/// of the specified loop will execute.
ScalarEvolution::BackedgeTakenInfo
@@ -3992,38 +4239,31 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
L->getExitingBlocks(ExitingBlocks);
// Examine all exits and pick the most conservative values.
- const SCEV *BECount = getCouldNotCompute();
const SCEV *MaxBECount = getCouldNotCompute();
- bool CouldNotComputeBECount = false;
+ bool CouldComputeBECount = true;
+ SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts;
for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
- BackedgeTakenInfo NewBTI =
- ComputeBackedgeTakenCountFromExit(L, ExitingBlocks[i]);
-
- if (NewBTI.Exact == getCouldNotCompute()) {
+ ExitLimit EL = ComputeExitLimit(L, ExitingBlocks[i]);
+ if (EL.Exact == getCouldNotCompute())
// We couldn't compute an exact value for this exit, so
// we won't be able to compute an exact value for the loop.
- CouldNotComputeBECount = true;
- BECount = getCouldNotCompute();
- } else if (!CouldNotComputeBECount) {
- if (BECount == getCouldNotCompute())
- BECount = NewBTI.Exact;
- else
- BECount = getUMinFromMismatchedTypes(BECount, NewBTI.Exact);
- }
+ CouldComputeBECount = false;
+ else
+ ExitCounts.push_back(std::make_pair(ExitingBlocks[i], EL.Exact));
+
if (MaxBECount == getCouldNotCompute())
- MaxBECount = NewBTI.Max;
- else if (NewBTI.Max != getCouldNotCompute())
- MaxBECount = getUMinFromMismatchedTypes(MaxBECount, NewBTI.Max);
+ MaxBECount = EL.Max;
+ else if (EL.Max != getCouldNotCompute())
+ MaxBECount = getUMinFromMismatchedTypes(MaxBECount, EL.Max);
}
- return BackedgeTakenInfo(BECount, MaxBECount);
+ return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount);
}
-/// ComputeBackedgeTakenCountFromExit - Compute the number of times the backedge
-/// of the specified loop will execute if it exits via the specified block.
-ScalarEvolution::BackedgeTakenInfo
-ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L,
- BasicBlock *ExitingBlock) {
+/// ComputeExitLimit - Compute the number of times the backedge of the specified
+/// loop will execute if it exits via the specified block.
+ScalarEvolution::ExitLimit
+ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
// Okay, we've chosen an exiting block. See what condition causes us to
// exit at this block.
@@ -4081,95 +4321,91 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L,
}
// Proceed to the next level to examine the exit condition expression.
- return ComputeBackedgeTakenCountFromExitCond(L, ExitBr->getCondition(),
- ExitBr->getSuccessor(0),
- ExitBr->getSuccessor(1));
+ return ComputeExitLimitFromCond(L, ExitBr->getCondition(),
+ ExitBr->getSuccessor(0),
+ ExitBr->getSuccessor(1));
}
-/// ComputeBackedgeTakenCountFromExitCond - Compute the number of times the
+/// ComputeExitLimitFromCond - Compute the number of times the
/// backedge of the specified loop will execute if its exit condition
/// were a conditional branch of ExitCond, TBB, and FBB.
-ScalarEvolution::BackedgeTakenInfo
-ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
- Value *ExitCond,
- BasicBlock *TBB,
- BasicBlock *FBB) {
+ScalarEvolution::ExitLimit
+ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
+ Value *ExitCond,
+ BasicBlock *TBB,
+ BasicBlock *FBB) {
// Check if the controlling expression for this loop is an And or Or.
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
if (BO->getOpcode() == Instruction::And) {
// Recurse on the operands of the and.
- BackedgeTakenInfo BTI0 =
- ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB);
- BackedgeTakenInfo BTI1 =
- ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB);
+ ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB);
+ ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB);
const SCEV *BECount = getCouldNotCompute();
const SCEV *MaxBECount = getCouldNotCompute();
if (L->contains(TBB)) {
// Both conditions must be true for the loop to continue executing.
// Choose the less conservative count.
- if (BTI0.Exact == getCouldNotCompute() ||
- BTI1.Exact == getCouldNotCompute())
+ if (EL0.Exact == getCouldNotCompute() ||
+ EL1.Exact == getCouldNotCompute())
BECount = getCouldNotCompute();
else
- BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
- if (BTI0.Max == getCouldNotCompute())
- MaxBECount = BTI1.Max;
- else if (BTI1.Max == getCouldNotCompute())
- MaxBECount = BTI0.Max;
+ BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
+ if (EL0.Max == getCouldNotCompute())
+ MaxBECount = EL1.Max;
+ else if (EL1.Max == getCouldNotCompute())
+ MaxBECount = EL0.Max;
else
- MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max);
+ MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
} else {
// Both conditions must be true at the same time for the loop to exit.
// For now, be conservative.
assert(L->contains(FBB) && "Loop block has no successor in loop!");
- if (BTI0.Max == BTI1.Max)
- MaxBECount = BTI0.Max;
- if (BTI0.Exact == BTI1.Exact)
- BECount = BTI0.Exact;
+ if (EL0.Max == EL1.Max)
+ MaxBECount = EL0.Max;
+ if (EL0.Exact == EL1.Exact)
+ BECount = EL0.Exact;
}
- return BackedgeTakenInfo(BECount, MaxBECount);
+ return ExitLimit(BECount, MaxBECount);
}
if (BO->getOpcode() == Instruction::Or) {
// Recurse on the operands of the or.
- BackedgeTakenInfo BTI0 =
- ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB);
- BackedgeTakenInfo BTI1 =
- ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB);
+ ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB);
+ ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB);
const SCEV *BECount = getCouldNotCompute();
const SCEV *MaxBECount = getCouldNotCompute();
if (L->contains(FBB)) {
// Both conditions must be false for the loop to continue executing.
// Choose the less conservative count.
- if (BTI0.Exact == getCouldNotCompute() ||
- BTI1.Exact == getCouldNotCompute())
+ if (EL0.Exact == getCouldNotCompute() ||
+ EL1.Exact == getCouldNotCompute())
BECount = getCouldNotCompute();
else
- BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
- if (BTI0.Max == getCouldNotCompute())
- MaxBECount = BTI1.Max;
- else if (BTI1.Max == getCouldNotCompute())
- MaxBECount = BTI0.Max;
+ BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
+ if (EL0.Max == getCouldNotCompute())
+ MaxBECount = EL1.Max;
+ else if (EL1.Max == getCouldNotCompute())
+ MaxBECount = EL0.Max;
else
- MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max);
+ MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
} else {
// Both conditions must be false at the same time for the loop to exit.
// For now, be conservative.
assert(L->contains(TBB) && "Loop block has no successor in loop!");
- if (BTI0.Max == BTI1.Max)
- MaxBECount = BTI0.Max;
- if (BTI0.Exact == BTI1.Exact)
- BECount = BTI0.Exact;
+ if (EL0.Max == EL1.Max)
+ MaxBECount = EL0.Max;
+ if (EL0.Exact == EL1.Exact)
+ BECount = EL0.Exact;
}
- return BackedgeTakenInfo(BECount, MaxBECount);
+ return ExitLimit(BECount, MaxBECount);
}
}
// With an icmp, it may be feasible to compute an exact backedge-taken count.
// Proceed to the next level to examine the icmp.
if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
- return ComputeBackedgeTakenCountFromExitCondICmp(L, ExitCondICmp, TBB, FBB);
+ return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB);
// Check for a constant condition. These are normally stripped out by
// SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
@@ -4185,17 +4421,17 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
}
// If it's not an integer or pointer comparison then compute it the hard way.
- return ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB));
+ return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
}
-/// ComputeBackedgeTakenCountFromExitCondICmp - Compute the number of times the
+/// ComputeExitLimitFromICmp - Compute the number of times the
/// backedge of the specified loop will execute if its exit condition
/// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB.
-ScalarEvolution::BackedgeTakenInfo
-ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
- ICmpInst *ExitCond,
- BasicBlock *TBB,
- BasicBlock *FBB) {
+ScalarEvolution::ExitLimit
+ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
+ ICmpInst *ExitCond,
+ BasicBlock *TBB,
+ BasicBlock *FBB) {
// If the condition was exit on true, convert the condition to exit on false
ICmpInst::Predicate Cond;
@@ -4207,8 +4443,8 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
// Handle common loops like: for (X = "string"; *X; ++X)
if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
- BackedgeTakenInfo ItCnt =
- ComputeLoadConstantCompareBackedgeTakenCount(LI, RHS, L, Cond);
+ ExitLimit ItCnt =
+ ComputeLoadConstantCompareExitLimit(LI, RHS, L, Cond);
if (ItCnt.hasAnyInfo())
return ItCnt;
}
@@ -4247,36 +4483,36 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
switch (Cond) {
case ICmpInst::ICMP_NE: { // while (X != Y)
// Convert to: while (X-Y != 0)
- BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEV(LHS, RHS), L);
- if (BTI.hasAnyInfo()) return BTI;
+ ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L);
+ if (EL.hasAnyInfo()) return EL;
break;
}
case ICmpInst::ICMP_EQ: { // while (X == Y)
// Convert to: while (X-Y == 0)
- BackedgeTakenInfo BTI = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
- if (BTI.hasAnyInfo()) return BTI;
+ ExitLimit EL = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
+ if (EL.hasAnyInfo()) return EL;
break;
}
case ICmpInst::ICMP_SLT: {
- BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, true);
- if (BTI.hasAnyInfo()) return BTI;
+ ExitLimit EL = HowManyLessThans(LHS, RHS, L, true);
+ if (EL.hasAnyInfo()) return EL;
break;
}
case ICmpInst::ICMP_SGT: {
- BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS),
+ ExitLimit EL = HowManyLessThans(getNotSCEV(LHS),
getNotSCEV(RHS), L, true);
- if (BTI.hasAnyInfo()) return BTI;
+ if (EL.hasAnyInfo()) return EL;
break;
}
case ICmpInst::ICMP_ULT: {
- BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, false);
- if (BTI.hasAnyInfo()) return BTI;
+ ExitLimit EL = HowManyLessThans(LHS, RHS, L, false);
+ if (EL.hasAnyInfo()) return EL;
break;
}
case ICmpInst::ICMP_UGT: {
- BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS),
+ ExitLimit EL = HowManyLessThans(getNotSCEV(LHS),
getNotSCEV(RHS), L, false);
- if (BTI.hasAnyInfo()) return BTI;
+ if (EL.hasAnyInfo()) return EL;
break;
}
default:
@@ -4290,8 +4526,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
#endif
break;
}
- return
- ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB));
+ return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
}
static ConstantInt *
@@ -4321,10 +4556,10 @@ GetAddressedElementFromGlobal(GlobalVariable *GV,
if (Idx >= CA->getNumOperands()) return 0; // Bogus program
Init = cast<Constant>(CA->getOperand(Idx));
} else if (isa<ConstantAggregateZero>(Init)) {
- if (const StructType *STy = dyn_cast<StructType>(Init->getType())) {
+ if (StructType *STy = dyn_cast<StructType>(Init->getType())) {
assert(Idx < STy->getNumElements() && "Bad struct index!");
Init = Constant::getNullValue(STy->getElementType(Idx));
- } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Init->getType())) {
+ } else if (ArrayType *ATy = dyn_cast<ArrayType>(Init->getType())) {
if (Idx >= ATy->getNumElements()) return 0; // Bogus program
Init = Constant::getNullValue(ATy->getElementType());
} else {
@@ -4338,15 +4573,16 @@ GetAddressedElementFromGlobal(GlobalVariable *GV,
return Init;
}
-/// ComputeLoadConstantCompareBackedgeTakenCount - Given an exit condition of
+/// ComputeLoadConstantCompareExitLimit - Given an exit condition of
/// 'icmp op load X, cst', try to see if we can compute the backedge
/// execution count.
-ScalarEvolution::BackedgeTakenInfo
-ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
- LoadInst *LI,
- Constant *RHS,
- const Loop *L,
- ICmpInst::Predicate predicate) {
+ScalarEvolution::ExitLimit
+ScalarEvolution::ComputeLoadConstantCompareExitLimit(
+ LoadInst *LI,
+ Constant *RHS,
+ const Loop *L,
+ ICmpInst::Predicate predicate) {
+
if (LI->isVolatile()) return getCouldNotCompute();
// Check to see if the loaded pointer is a getelementptr of a global.
@@ -4431,69 +4667,117 @@ static bool CanConstantFold(const Instruction *I) {
return false;
}
-/// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node
-/// in the loop that V is derived from. We allow arbitrary operations along the
-/// way, but the operands of an operation must either be constants or a value
-/// derived from a constant PHI. If this expression does not fit with these
-/// constraints, return null.
-static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
- // If this is not an instruction, or if this is an instruction outside of the
- // loop, it can't be derived from a loop PHI.
- Instruction *I = dyn_cast<Instruction>(V);
- if (I == 0 || !L->contains(I)) return 0;
+/// Determine whether this instruction can constant evolve within this loop
+/// assuming its operands can all constant evolve.
+static bool canConstantEvolve(Instruction *I, const Loop *L) {
+ // An instruction outside of the loop can't be derived from a loop PHI.
+ if (!L->contains(I)) return false;
- if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ if (isa<PHINode>(I)) {
if (L->getHeader() == I->getParent())
- return PN;
+ return true;
else
// We don't currently keep track of the control flow needed to evaluate
// PHIs, so we cannot handle PHIs inside of loops.
- return 0;
+ return false;
}
// If we won't be able to constant fold this expression even if the operands
- // are constants, return early.
- if (!CanConstantFold(I)) return 0;
+ // are constants, bail early.
+ return CanConstantFold(I);
+}
+
+/// getConstantEvolvingPHIOperands - Implement getConstantEvolvingPHI by
+/// recursing through each instruction operand until reaching a loop header phi.
+static PHINode *
+getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
+ DenseMap<Instruction *, PHINode *> &PHIMap) {
// Otherwise, we can evaluate this instruction if all of its operands are
// constant or derived from a PHI node themselves.
PHINode *PHI = 0;
- for (unsigned Op = 0, e = I->getNumOperands(); Op != e; ++Op)
- if (!isa<Constant>(I->getOperand(Op))) {
- PHINode *P = getConstantEvolvingPHI(I->getOperand(Op), L);
- if (P == 0) return 0; // Not evolving from PHI
- if (PHI == 0)
- PHI = P;
- else if (PHI != P)
- return 0; // Evolving from multiple different PHIs.
+ for (Instruction::op_iterator OpI = UseInst->op_begin(),
+ OpE = UseInst->op_end(); OpI != OpE; ++OpI) {
+
+ if (isa<Constant>(*OpI)) continue;
+
+ Instruction *OpInst = dyn_cast<Instruction>(*OpI);
+ if (!OpInst || !canConstantEvolve(OpInst, L)) return 0;
+
+ PHINode *P = dyn_cast<PHINode>(OpInst);
+ if (!P)
+ // If this operand is already visited, reuse the prior result.
+ // We may have P != PHI if this is the deepest point at which the
+ // inconsistent paths meet.
+ P = PHIMap.lookup(OpInst);
+ if (!P) {
+ // Recurse and memoize the results, whether a phi is found or not.
+ // This recursive call invalidates pointers into PHIMap.
+ P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap);
+ PHIMap[OpInst] = P;
}
-
+ if (P == 0) return 0; // Not evolving from PHI
+ if (PHI && PHI != P) return 0; // Evolving from multiple different PHIs.
+ PHI = P;
+ }
// This is a expression evolving from a constant PHI!
return PHI;
}
+/// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node
+/// in the loop that V is derived from. We allow arbitrary operations along the
+/// way, but the operands of an operation must either be constants or a value
+/// derived from a constant PHI. If this expression does not fit with these
+/// constraints, return null.
+static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0 || !canConstantEvolve(I, L)) return 0;
+
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ return PN;
+ }
+
+ // Record non-constant instructions contained by the loop.
+ DenseMap<Instruction *, PHINode *> PHIMap;
+ return getConstantEvolvingPHIOperands(I, L, PHIMap);
+}
+
/// EvaluateExpression - Given an expression that passes the
/// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node
/// in the loop has the value PHIVal. If we can't fold this expression for some
/// reason, return null.
-static Constant *EvaluateExpression(Value *V, Constant *PHIVal,
+static Constant *EvaluateExpression(Value *V, const Loop *L,
+ DenseMap<Instruction *, Constant *> &Vals,
const TargetData *TD) {
- if (isa<PHINode>(V)) return PHIVal;
+ // Convenient constant check, but redundant for recursive calls.
if (Constant *C = dyn_cast<Constant>(V)) return C;
+
Instruction *I = cast<Instruction>(V);
+ if (Constant *C = Vals.lookup(I)) return C;
+
+ assert(!isa<PHINode>(I) && "loop header phis should be mapped to constant");
+ assert(canConstantEvolve(I, L) && "cannot evaluate expression in this loop");
+ (void)L;
std::vector<Constant*> Operands(I->getNumOperands());
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
- Operands[i] = EvaluateExpression(I->getOperand(i), PHIVal, TD);
- if (Operands[i] == 0) return 0;
+ Instruction *Operand = dyn_cast<Instruction>(I->getOperand(i));
+ if (!Operand) {
+ Operands[i] = dyn_cast<Constant>(I->getOperand(i));
+ if (!Operands[i]) return 0;
+ continue;
+ }
+ Constant *C = EvaluateExpression(Operand, L, Vals, TD);
+ Vals[Operand] = C;
+ if (!C) return 0;
+ Operands[i] = C;
}
if (const CmpInst *CI = dyn_cast<CmpInst>(I))
return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
Operands[1], TD);
- return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
- &Operands[0], Operands.size(), TD);
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD);
}
/// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
@@ -4514,6 +4798,9 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];
+ // FIXME: Nick's fix for PR11034 will seed constants for multiple header phis.
+ DenseMap<Instruction *, Constant *> CurrentIterVals;
+
// Since the loop is canonicalized, the PHI node must have two entries. One
// entry must be a constant (coming in from outside of the loop), and the
// second must be derived from the same PHI.
@@ -4522,6 +4809,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge));
if (StartCST == 0)
return RetVal = 0; // Must be a constant.
+ CurrentIterVals[PN] = StartCST;
Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
if (getConstantEvolvingPHI(BEValue, L) != PN &&
@@ -4534,29 +4822,31 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
unsigned NumIterations = BEs.getZExtValue(); // must be in range
unsigned IterationNum = 0;
- for (Constant *PHIVal = StartCST; ; ++IterationNum) {
+ for (; ; ++IterationNum) {
if (IterationNum == NumIterations)
- return RetVal = PHIVal; // Got exit value!
+ return RetVal = CurrentIterVals[PN]; // Got exit value!
// Compute the value of the PHI node for the next iteration.
- Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD);
- if (NextPHI == PHIVal)
+ // EvaluateExpression adds non-phi values to the CurrentIterVals map.
+ Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD);
+ if (NextPHI == CurrentIterVals[PN])
return RetVal = NextPHI; // Stopped evolving!
if (NextPHI == 0)
return 0; // Couldn't evaluate!
- PHIVal = NextPHI;
+ DenseMap<Instruction *, Constant *> NextIterVals;
+ NextIterVals[PN] = NextPHI;
+ CurrentIterVals.swap(NextIterVals);
}
}
-/// ComputeBackedgeTakenCountExhaustively - If the loop is known to execute a
+/// ComputeExitCountExhaustively - If the loop is known to execute a
/// constant number of times (the condition evolves only from constants),
/// try to evaluate a few iterations of the loop until we get the exit
/// condition gets a value of ExitWhen (true or false). If we cannot
/// evaluate the trip count of the loop, return getCouldNotCompute().
-const SCEV *
-ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
- Value *Cond,
- bool ExitWhen) {
+const SCEV * ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
+ Value *Cond,
+ bool ExitWhen) {
PHINode *PN = getConstantEvolvingPHI(Cond, L);
if (PN == 0) return getCouldNotCompute();
@@ -4583,8 +4873,10 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis.
for (Constant *PHIVal = StartCST;
IterationNum != MaxIterations; ++IterationNum) {
+ DenseMap<Instruction *, Constant *> PHIValMap;
+ PHIValMap[PN] = PHIVal;
ConstantInt *CondVal =
- dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, PHIVal, TD));
+ dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, PHIValMap, TD));
// Couldn't symbolically evaluate.
if (!CondVal) return getCouldNotCompute();
@@ -4595,7 +4887,7 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
}
// Compute the value of the PHI node for the next iteration.
- Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD);
+ Constant *NextPHI = EvaluateExpression(BEValue, L, PHIValMap, TD);
if (NextPHI == 0 || NextPHI == PHIVal)
return getCouldNotCompute();// Couldn't evaluate or not making progress...
PHIVal = NextPHI;
@@ -4703,7 +4995,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
Operands[0], Operands[1], TD);
else
C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
- &Operands[0], Operands.size(), TD);
+ Operands, TD);
if (!C) return V;
return getSCEV(C);
}
@@ -4925,7 +5217,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
// Compute the two solutions for the quadratic formula.
// The divisions must be performed as signed divisions.
APInt NegB(-B);
- APInt TwoA( A << 1 );
+ APInt TwoA(A << 1);
if (TwoA.isMinValue()) {
const SCEV *CNC = SE.getCouldNotCompute();
return std::make_pair(CNC, CNC);
@@ -4940,7 +5232,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
return std::make_pair(SE.getConstant(Solution1),
SE.getConstant(Solution2));
- } // end APIntOps namespace
+ } // end APIntOps namespace
}
/// HowFarToZero - Return the number of times a backedge comparing the specified
@@ -4950,7 +5242,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
/// now expressed as a single expression, V = x-y. So the exit test is
/// effectively V != 0. We know and take advantage of the fact that this
/// expression only being used in a comparison by zero context.
-ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ExitLimit
ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
// If the value is a constant
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
@@ -5034,8 +5326,19 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
// Handle unitary steps, which cannot wraparound.
// 1*N = -Start; -1*N = Start (mod 2^BW), so:
// N = Distance (as unsigned)
- if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue())
- return Distance;
+ if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue()) {
+ ConstantRange CR = getUnsignedRange(Start);
+ const SCEV *MaxBECount;
+ if (!CountDown && CR.getUnsignedMin().isMinValue())
+ // When counting up, the worst starting value is 1, not 0.
+ MaxBECount = CR.getUnsignedMax().isMinValue()
+ ? getConstant(APInt::getMinValue(CR.getBitWidth()))
+ : getConstant(APInt::getMaxValue(CR.getBitWidth()));
+ else
+ MaxBECount = getConstant(CountDown ? CR.getUnsignedMax()
+ : -CR.getUnsignedMin());
+ return ExitLimit(Distance, MaxBECount);
+ }
// If the recurrence is known not to wraparound, unsigned divide computes the
// back edge count. We know that the value will either become zero (and thus
@@ -5062,7 +5365,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
/// HowFarToNonZero - Return the number of times a backedge checking the
/// specified value for nonzero will execute. If not computable, return
/// CouldNotCompute
-ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ExitLimit
ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
// Loops that look like: while (X == 0) are very strange indeed. We don't
// handle them yet except for the trivial case. This could be expanded in the
@@ -5741,7 +6044,7 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
assert(!isKnownNegative(Step) &&
"This code doesn't handle negative strides yet!");
- const Type *Ty = Start->getType();
+ Type *Ty = Start->getType();
// When Start == End, we have an exact BECount == 0. Short-circuit this case
// here because SCEV may not be able to determine that the unsigned division
@@ -5760,7 +6063,7 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
if (!NoWrap) {
// Check Add for unsigned overflow.
// TODO: More sophisticated things could be done here.
- const Type *WideTy = IntegerType::get(getContext(),
+ Type *WideTy = IntegerType::get(getContext(),
getTypeSizeInBits(Ty) + 1);
const SCEV *EDiff = getZeroExtendExpr(Diff, WideTy);
const SCEV *ERoundUp = getZeroExtendExpr(RoundUp, WideTy);
@@ -5775,7 +6078,7 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
/// HowManyLessThans - Return the number of times a backedge containing the
/// specified less-than comparison will execute. If not computable, return
/// CouldNotCompute.
-ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ExitLimit
ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
const Loop *L, bool isSigned) {
// Only handle: "ADDREC < LoopInvariant".
@@ -5882,7 +6185,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
if (isa<SCEVCouldNotCompute>(MaxBECount))
MaxBECount = BECount;
- return BackedgeTakenInfo(BECount, MaxBECount);
+ return ExitLimit(BECount, MaxBECount);
}
return getCouldNotCompute();
@@ -6090,6 +6393,15 @@ void ScalarEvolution::releaseMemory() {
FirstUnknown = 0;
ValueExprMap.clear();
+
+ // Free any extra memory created for ExitNotTakenInfo in the unlikely event
+ // that a loop had multiple computable exits.
+ for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I =
+ BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end();
+ I != E; ++I) {
+ I->second.clear();
+ }
+
BackedgeTakenCounts.clear();
ConstantEvolutionLoopExitValue.clear();
ValuesAtScopes.clear();
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index befe6d2..47f0f32 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -17,6 +17,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetData.h"
#include "llvm/ADT/STLExtras.h"
@@ -26,7 +27,7 @@ using namespace llvm;
/// reusing an existing cast if a suitable one exists, moving an existing
/// cast if a suitable one exists but isn't in the right place, or
/// creating a new one.
-Value *SCEVExpander::ReuseOrCreateCast(Value *V, const Type *Ty,
+Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
Instruction::CastOps Op,
BasicBlock::iterator IP) {
// Check to see if there is already a cast!
@@ -62,7 +63,7 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, const Type *Ty,
/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
/// which must be possible with a noop cast, doing what we can to share
/// the casts.
-Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) {
+Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false);
assert((Op == Instruction::BitCast ||
Op == Instruction::PtrToInt ||
@@ -103,7 +104,8 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) {
while ((isa<BitCastInst>(IP) &&
isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) &&
cast<BitCastInst>(IP)->getOperand(0) != A) ||
- isa<DbgInfoIntrinsic>(IP))
+ isa<DbgInfoIntrinsic>(IP) ||
+ isa<LandingPadInst>(IP))
++IP;
return ReuseOrCreateCast(A, Ty, Op, IP);
}
@@ -113,7 +115,9 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) {
BasicBlock::iterator IP = I; ++IP;
if (InvokeInst *II = dyn_cast<InvokeInst>(I))
IP = II->getNormalDest()->begin();
- while (isa<PHINode>(IP) || isa<DbgInfoIntrinsic>(IP)) ++IP;
+ while (isa<PHINode>(IP) || isa<DbgInfoIntrinsic>(IP) ||
+ isa<LandingPadInst>(IP))
+ ++IP;
return ReuseOrCreateCast(I, Ty, Op, IP);
}
@@ -160,7 +164,7 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
}
// If we haven't found this binop, insert it.
- Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS, "tmp"));
+ Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS));
BO->setDebugLoc(SaveInsertPt->getDebugLoc());
rememberInstruction(BO);
@@ -277,7 +281,7 @@ static bool FactorOutConstant(const SCEV *&S,
/// the list.
///
static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops,
- const Type *Ty,
+ Type *Ty,
ScalarEvolution &SE) {
unsigned NumAddRecs = 0;
for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i)
@@ -306,7 +310,7 @@ static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops,
/// into GEP indices.
///
static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops,
- const Type *Ty,
+ Type *Ty,
ScalarEvolution &SE) {
// Find the addrecs.
SmallVector<const SCEV *, 8> AddRecs;
@@ -365,10 +369,10 @@ static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops,
///
Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
const SCEV *const *op_end,
- const PointerType *PTy,
- const Type *Ty,
+ PointerType *PTy,
+ Type *Ty,
Value *V) {
- const Type *ElTy = PTy->getElementType();
+ Type *ElTy = PTy->getElementType();
SmallVector<Value *, 4> GepIndices;
SmallVector<const SCEV *, 8> Ops(op_begin, op_end);
bool AnyNonZeroIndices = false;
@@ -423,7 +427,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
GepIndices.push_back(Scaled);
// Collect struct field index operands.
- while (const StructType *STy = dyn_cast<StructType>(ElTy)) {
+ while (StructType *STy = dyn_cast<StructType>(ElTy)) {
bool FoundFieldNo = false;
// An empty struct has no fields.
if (STy->getNumElements() == 0) break;
@@ -451,7 +455,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
// appropriate struct type.
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Ops[i])) {
- const Type *CTy;
+ Type *CTy;
Constant *FieldNo;
if (U->isOffsetOf(CTy, FieldNo) && CTy == STy) {
GepIndices.push_back(FieldNo);
@@ -474,7 +478,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
}
}
- if (const ArrayType *ATy = dyn_cast<ArrayType>(ElTy))
+ if (ArrayType *ATy = dyn_cast<ArrayType>(ElTy))
ElTy = ATy->getElementType();
else
break;
@@ -494,7 +498,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
// Fold a GEP with constant operands.
if (Constant *CLHS = dyn_cast<Constant>(V))
if (Constant *CRHS = dyn_cast<Constant>(Idx))
- return ConstantExpr::getGetElementPtr(CLHS, &CRHS, 1);
+ return ConstantExpr::getGetElementPtr(CLHS, CRHS);
// Do a quick scan to see if we have this GEP nearby. If so, reuse it.
unsigned ScanLimit = 6;
@@ -572,8 +576,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
if (V->getType() != PTy)
Casted = InsertNoopCastOfTo(Casted, PTy);
Value *GEP = Builder.CreateGEP(Casted,
- GepIndices.begin(),
- GepIndices.end(),
+ GepIndices,
"scevgep");
Ops.push_back(SE.getUnknown(GEP));
rememberInstruction(GEP);
@@ -691,7 +694,7 @@ public:
}
Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
- const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Type *Ty = SE.getEffectiveSCEVType(S->getType());
// Collect all the add operands in a loop, along with their associated loops.
// Iterate in reverse so that constants are emitted last, all else equal, and
@@ -717,7 +720,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
// This is the first operand. Just expand it.
Sum = expand(Op);
++I;
- } else if (const PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) {
+ } else if (PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) {
// The running sum expression is a pointer. Try to form a getelementptr
// at this level with that as the base.
SmallVector<const SCEV *, 4> NewOps;
@@ -731,7 +734,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
NewOps.push_back(X);
}
Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum);
- } else if (const PointerType *PTy = dyn_cast<PointerType>(Op->getType())) {
+ } else if (PointerType *PTy = dyn_cast<PointerType>(Op->getType())) {
// The running sum is an integer, and there's a pointer at this level.
// Try to form a getelementptr. If the running sum is instructions,
// use a SCEVUnknown to avoid re-analyzing them.
@@ -762,7 +765,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
}
Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
- const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Type *Ty = SE.getEffectiveSCEVType(S->getType());
// Collect all the mul operands in a loop, along with their associated loops.
// Iterate in reverse so that constants are emitted last, all else equal.
@@ -804,7 +807,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
}
Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
- const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Type *Ty = SE.getEffectiveSCEVType(S->getType());
Value *LHS = expandCodeFor(S->getLHS(), Ty);
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
@@ -841,81 +844,141 @@ static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest,
}
}
+/// Determine if this is a well-behaved chain of instructions leading back to
+/// the PHI. If so, it may be reused by expanded expressions.
+bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
+ const Loop *L) {
+ if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) ||
+ (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV)))
+ return false;
+ // If any of the operands don't dominate the insert position, bail.
+ // Addrec operands are always loop-invariant, so this can only happen
+ // if there are instructions which haven't been hoisted.
+ if (L == IVIncInsertLoop) {
+ for (User::op_iterator OI = IncV->op_begin()+1,
+ OE = IncV->op_end(); OI != OE; ++OI)
+ if (Instruction *OInst = dyn_cast<Instruction>(OI))
+ if (!SE.DT->dominates(OInst, IVIncInsertPos))
+ return false;
+ }
+ // Advance to the next instruction.
+ IncV = dyn_cast<Instruction>(IncV->getOperand(0));
+ if (!IncV)
+ return false;
+
+ if (IncV->mayHaveSideEffects())
+ return false;
+
+ if (IncV != PN)
+ return true;
+
+ return isNormalAddRecExprPHI(PN, IncV, L);
+}
+
+/// Determine if this cyclic phi is in a form that would have been generated by
+/// LSR. We don't care if the phi was actually expanded in this pass, as long
+/// as it is in a low-cost form, for example, no implied multiplication. This
+/// should match any patterns generated by getAddRecExprPHILiterally and
+/// expandAddtoGEP.
+bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
+ const Loop *L) {
+ switch (IncV->getOpcode()) {
+ // Check for a simple Add/Sub or GEP of a loop invariant step.
+ case Instruction::Add:
+ case Instruction::Sub:
+ return IncV->getOperand(0) == PN
+ && L->isLoopInvariant(IncV->getOperand(1));
+ case Instruction::BitCast:
+ IncV = dyn_cast<GetElementPtrInst>(IncV->getOperand(0));
+ if (!IncV)
+ return false;
+ // fall-thru to GEP handling
+ case Instruction::GetElementPtr: {
+ // This must be a pointer addition of constants (pretty) or some number of
+ // address-size elements (ugly).
+ for (Instruction::op_iterator I = IncV->op_begin()+1, E = IncV->op_end();
+ I != E; ++I) {
+ if (isa<Constant>(*I))
+ continue;
+ // ugly geps have 2 operands.
+ // i1* is used by the expander to represent an address-size element.
+ if (IncV->getNumOperands() != 2)
+ return false;
+ unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace();
+ if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS)
+ && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS))
+ return false;
+ // Ensure the operands dominate the insertion point. I don't know of a
+ // case when this would not be true, so this is somewhat untested.
+ if (L == IVIncInsertLoop) {
+ for (User::op_iterator OI = IncV->op_begin()+1,
+ OE = IncV->op_end(); OI != OE; ++OI)
+ if (Instruction *OInst = dyn_cast<Instruction>(OI))
+ if (!SE.DT->dominates(OInst, IVIncInsertPos))
+ return false;
+ }
+ break;
+ }
+ IncV = dyn_cast<Instruction>(IncV->getOperand(0));
+ if (IncV && IncV->getOpcode() == Instruction::BitCast)
+ IncV = dyn_cast<Instruction>(IncV->getOperand(0));
+ return IncV == PN;
+ }
+ default:
+ return false;
+ }
+}
+
/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
/// the base addrec, which is the addrec without any non-loop-dominating
/// values, and return the PHI.
PHINode *
SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
const Loop *L,
- const Type *ExpandTy,
- const Type *IntTy) {
+ Type *ExpandTy,
+ Type *IntTy) {
assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position");
// Reuse a previously-inserted PHI, if present.
- for (BasicBlock::iterator I = L->getHeader()->begin();
- PHINode *PN = dyn_cast<PHINode>(I); ++I)
- if (SE.isSCEVable(PN->getType()) &&
- (SE.getEffectiveSCEVType(PN->getType()) ==
- SE.getEffectiveSCEVType(Normalized->getType())) &&
- SE.getSCEV(PN) == Normalized)
- if (BasicBlock *LatchBlock = L->getLoopLatch()) {
- Instruction *IncV =
- cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
-
- // Determine if this is a well-behaved chain of instructions leading
- // back to the PHI. It probably will be, if we're scanning an inner
- // loop already visited by LSR for example, but it wouldn't have
- // to be.
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ if (LatchBlock) {
+ for (BasicBlock::iterator I = L->getHeader()->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ if (!SE.isSCEVable(PN->getType()) ||
+ (SE.getEffectiveSCEVType(PN->getType()) !=
+ SE.getEffectiveSCEVType(Normalized->getType())) ||
+ SE.getSCEV(PN) != Normalized)
+ continue;
+
+ Instruction *IncV =
+ cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
+
+ if (LSRMode) {
+ if (!isExpandedAddRecExprPHI(PN, IncV, L))
+ continue;
+ }
+ else {
+ if (!isNormalAddRecExprPHI(PN, IncV, L))
+ continue;
+ }
+ // Ok, the add recurrence looks usable.
+ // Remember this PHI, even in post-inc mode.
+ InsertedValues.insert(PN);
+ // Remember the increment.
+ rememberInstruction(IncV);
+ if (L == IVIncInsertLoop)
do {
- if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) ||
- (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV))) {
- IncV = 0;
+ if (SE.DT->dominates(IncV, IVIncInsertPos))
break;
- }
- // If any of the operands don't dominate the insert position, bail.
- // Addrec operands are always loop-invariant, so this can only happen
- // if there are instructions which haven't been hoisted.
- if (L == IVIncInsertLoop) {
- for (User::op_iterator OI = IncV->op_begin()+1,
- OE = IncV->op_end(); OI != OE; ++OI)
- if (Instruction *OInst = dyn_cast<Instruction>(OI))
- if (!SE.DT->dominates(OInst, IVIncInsertPos)) {
- IncV = 0;
- break;
- }
- }
- if (!IncV)
- break;
- // Advance to the next instruction.
- IncV = dyn_cast<Instruction>(IncV->getOperand(0));
- if (!IncV)
- break;
- if (IncV->mayHaveSideEffects()) {
- IncV = 0;
- break;
- }
+ // Make sure the increment is where we want it. But don't move it
+ // down past a potential existing post-inc user.
+ IncV->moveBefore(IVIncInsertPos);
+ IVIncInsertPos = IncV;
+ IncV = cast<Instruction>(IncV->getOperand(0));
} while (IncV != PN);
-
- if (IncV) {
- // Ok, the add recurrence looks usable.
- // Remember this PHI, even in post-inc mode.
- InsertedValues.insert(PN);
- // Remember the increment.
- IncV = cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
- rememberInstruction(IncV);
- if (L == IVIncInsertLoop)
- do {
- if (SE.DT->dominates(IncV, IVIncInsertPos))
- break;
- // Make sure the increment is where we want it. But don't move it
- // down past a potential existing post-inc user.
- IncV->moveBefore(IVIncInsertPos);
- IVIncInsertPos = IncV;
- IncV = cast<Instruction>(IncV->getOperand(0));
- } while (IncV != PN);
- return PN;
- }
- }
+ return PN;
+ }
+ }
// Save the original insertion point so we can restore it when we're done.
BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
@@ -969,7 +1032,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
Value *IncV;
// If the PHI is a pointer, use a GEP, otherwise use an add or sub.
if (isPointer) {
- const PointerType *GEPPtrTy = cast<PointerType>(ExpandTy);
+ PointerType *GEPPtrTy = cast<PointerType>(ExpandTy);
// If the step isn't constant, don't use an implicitly scaled GEP, because
// that would require a multiply inside the loop.
if (!isa<ConstantInt>(StepV))
@@ -978,7 +1041,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
const SCEV *const StepArray[1] = { SE.getSCEV(StepV) };
IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN);
if (IncV->getType() != PN->getType()) {
- IncV = Builder.CreateBitCast(IncV, PN->getType(), "tmp");
+ IncV = Builder.CreateBitCast(IncV, PN->getType());
rememberInstruction(IncV);
}
} else {
@@ -1001,8 +1064,8 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
}
Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
- const Type *STy = S->getType();
- const Type *IntTy = SE.getEffectiveSCEVType(STy);
+ Type *STy = S->getType();
+ Type *IntTy = SE.getEffectiveSCEVType(STy);
const Loop *L = S->getLoop();
// Determine a normalized form of this expression, which is the expression
@@ -1045,7 +1108,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
// Expand the core addrec. If we need post-loop scaling, force it to
// expand to an integer type to avoid the need for additional casting.
- const Type *ExpandTy = PostLoopScale ? IntTy : STy;
+ Type *ExpandTy = PostLoopScale ? IntTy : STy;
PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy);
// Accommodate post-inc mode, if necessary.
@@ -1057,6 +1120,14 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
BasicBlock *LatchBlock = L->getLoopLatch();
assert(LatchBlock && "PostInc mode requires a unique loop latch!");
Result = PN->getIncomingValueForBlock(LatchBlock);
+
+ // For an expansion to use the postinc form, the client must call
+ // expandCodeFor with an InsertPoint that is either outside the PostIncLoop
+ // or dominated by IVIncInsertPos.
+ assert((!isa<Instruction>(Result) ||
+ SE.DT->dominates(cast<Instruction>(Result),
+ Builder.GetInsertPoint())) &&
+ "postinc expansion does not dominate use");
}
// Re-apply any non-loop-dominating scale.
@@ -1069,7 +1140,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
// Re-apply any non-loop-dominating offset.
if (PostLoopOffset) {
- if (const PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) {
+ if (PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) {
const SCEV *const OffsetArray[1] = { PostLoopOffset };
Result = expandAddToGEP(OffsetArray, OffsetArray+1, PTy, IntTy, Result);
} else {
@@ -1086,7 +1157,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
if (!CanonicalMode) return expandAddRecExprLiterally(S);
- const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Type *Ty = SE.getEffectiveSCEVType(S->getType());
const Loop *L = S->getLoop();
// First check for an existing canonical IV in a suitable type.
@@ -1110,7 +1181,8 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
BasicBlock::iterator NewInsertPt =
llvm::next(BasicBlock::iterator(cast<Instruction>(V)));
- while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt))
+ while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt) ||
+ isa<LandingPadInst>(NewInsertPt))
++NewInsertPt;
V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0,
NewInsertPt);
@@ -1132,7 +1204,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
// Dig into the expression to find the pointer base for a GEP.
ExposePointerBase(Base, RestArray[0], SE);
// If we found a pointer, expand the AddRec with a GEP.
- if (const PointerType *PTy = dyn_cast<PointerType>(Base->getType())) {
+ if (PointerType *PTy = dyn_cast<PointerType>(Base->getType())) {
// Make sure the Base isn't something exotic, such as a multiplied
// or divided pointer value. In those cases, the result type isn't
// actually a pointer type.
@@ -1216,35 +1288,35 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
}
Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
- const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Type *Ty = SE.getEffectiveSCEVType(S->getType());
Value *V = expandCodeFor(S->getOperand(),
SE.getEffectiveSCEVType(S->getOperand()->getType()));
- Value *I = Builder.CreateTrunc(V, Ty, "tmp");
+ Value *I = Builder.CreateTrunc(V, Ty);
rememberInstruction(I);
return I;
}
Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
- const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Type *Ty = SE.getEffectiveSCEVType(S->getType());
Value *V = expandCodeFor(S->getOperand(),
SE.getEffectiveSCEVType(S->getOperand()->getType()));
- Value *I = Builder.CreateZExt(V, Ty, "tmp");
+ Value *I = Builder.CreateZExt(V, Ty);
rememberInstruction(I);
return I;
}
Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
- const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Type *Ty = SE.getEffectiveSCEVType(S->getType());
Value *V = expandCodeFor(S->getOperand(),
SE.getEffectiveSCEVType(S->getOperand()->getType()));
- Value *I = Builder.CreateSExt(V, Ty, "tmp");
+ Value *I = Builder.CreateSExt(V, Ty);
rememberInstruction(I);
return I;
}
Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
- const Type *Ty = LHS->getType();
+ Type *Ty = LHS->getType();
for (int i = S->getNumOperands()-2; i >= 0; --i) {
// In the case of mixed integer and pointer types, do the
// rest of the comparisons as integer.
@@ -1253,7 +1325,7 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
LHS = InsertNoopCastOfTo(LHS, Ty);
}
Value *RHS = expandCodeFor(S->getOperand(i), Ty);
- Value *ICmp = Builder.CreateICmpSGT(LHS, RHS, "tmp");
+ Value *ICmp = Builder.CreateICmpSGT(LHS, RHS);
rememberInstruction(ICmp);
Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax");
rememberInstruction(Sel);
@@ -1268,7 +1340,7 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
- const Type *Ty = LHS->getType();
+ Type *Ty = LHS->getType();
for (int i = S->getNumOperands()-2; i >= 0; --i) {
// In the case of mixed integer and pointer types, do the
// rest of the comparisons as integer.
@@ -1277,7 +1349,7 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
LHS = InsertNoopCastOfTo(LHS, Ty);
}
Value *RHS = expandCodeFor(S->getOperand(i), Ty);
- Value *ICmp = Builder.CreateICmpUGT(LHS, RHS, "tmp");
+ Value *ICmp = Builder.CreateICmpUGT(LHS, RHS);
rememberInstruction(ICmp);
Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax");
rememberInstruction(Sel);
@@ -1290,7 +1362,7 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
return LHS;
}
-Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty,
+Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
Instruction *I) {
BasicBlock::iterator IP = I;
while (isInsertedInstruction(IP) || isa<DbgInfoIntrinsic>(IP))
@@ -1299,7 +1371,7 @@ Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty,
return expandCodeFor(SH, Ty);
}
-Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty) {
+Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) {
// Expand the code for this SCEV.
Value *V = expand(SH);
if (Ty) {
@@ -1325,7 +1397,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
// after the PHIs (and after any other instructions that we've inserted
// there) so that it is guaranteed to dominate any user inside the loop.
if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
- InsertPt = L->getHeader()->getFirstNonPHI();
+ InsertPt = L->getHeader()->getFirstInsertionPt();
while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt))
InsertPt = llvm::next(BasicBlock::iterator(InsertPt));
break;
@@ -1346,8 +1418,12 @@ Value *SCEVExpander::expand(const SCEV *S) {
Value *V = visit(S);
// Remember the expanded value for this SCEV at this location.
- if (PostIncLoops.empty())
- InsertedExpressions[std::make_pair(S, InsertPt)] = V;
+ //
+ // This is independent of PostIncLoops. The mapped value simply materializes
+ // the expression at this insertion point. If the mapped value happened to be
+ // a postinc expansion, it could be reused by a non postinc user, but only if
+ // its insertion point was already at the head of the loop.
+ InsertedExpressions[std::make_pair(S, InsertPt)] = V;
restoreInsertPoint(SaveInsertBB, SaveInsertPt);
return V;
@@ -1384,7 +1460,7 @@ void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) {
/// starts at zero and steps by one on each iteration.
PHINode *
SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
- const Type *Ty) {
+ Type *Ty) {
assert(Ty->isIntegerTy() && "Can only insert integer induction variables!");
// Build a SCEV for {0,+,1}<L>.
@@ -1401,3 +1477,102 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
return V;
}
+
+/// hoistStep - Attempt to hoist an IV increment above a potential use.
+///
+/// To successfully hoist, two criteria must be met:
+/// - IncV operands dominate InsertPos and
+/// - InsertPos dominates IncV
+///
+/// Meeting the second condition means that we don't need to check all of IncV's
+/// existing uses (it's moving up in the domtree).
+///
+/// This does not yet recursively hoist the operands, although that would
+/// not be difficult.
+///
+/// This does not require a SCEVExpander instance and could be replaced by a
+/// general code-insertion helper.
+bool SCEVExpander::hoistStep(Instruction *IncV, Instruction *InsertPos,
+ const DominatorTree *DT) {
+ if (DT->dominates(IncV, InsertPos))
+ return true;
+
+ if (!DT->dominates(InsertPos->getParent(), IncV->getParent()))
+ return false;
+
+ if (IncV->mayHaveSideEffects())
+ return false;
+
+ // Attempt to hoist IncV
+ for (User::op_iterator OI = IncV->op_begin(), OE = IncV->op_end();
+ OI != OE; ++OI) {
+ Instruction *OInst = dyn_cast<Instruction>(OI);
+ if (OInst && !DT->dominates(OInst, InsertPos))
+ return false;
+ }
+ IncV->moveBefore(InsertPos);
+ return true;
+}
+
+/// replaceCongruentIVs - Check for congruent phis in this loop header and
+/// replace them with their most canonical representative. Return the number of
+/// phis eliminated.
+///
+/// This does not depend on any SCEVExpander state but should be used in
+/// the same context that SCEVExpander is used.
+unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
+ SmallVectorImpl<WeakVH> &DeadInsts) {
+ unsigned NumElim = 0;
+ DenseMap<const SCEV *, PHINode *> ExprToIVMap;
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+ PHINode *Phi = cast<PHINode>(I);
+ if (!SE.isSCEVable(Phi->getType()))
+ continue;
+
+ PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)];
+ if (!OrigPhiRef) {
+ OrigPhiRef = Phi;
+ continue;
+ }
+
+ // If one phi derives from the other via GEPs, types may differ.
+ // We could consider adding a bitcast here to handle it.
+ if (OrigPhiRef->getType() != Phi->getType())
+ continue;
+
+ if (BasicBlock *LatchBlock = L->getLoopLatch()) {
+ Instruction *OrigInc =
+ cast<Instruction>(OrigPhiRef->getIncomingValueForBlock(LatchBlock));
+ Instruction *IsomorphicInc =
+ cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
+
+ // If this phi is more canonical, swap it with the original.
+ if (!isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L)
+ && isExpandedAddRecExprPHI(Phi, IsomorphicInc, L)) {
+ std::swap(OrigPhiRef, Phi);
+ std::swap(OrigInc, IsomorphicInc);
+ }
+ // Replacing the congruent phi is sufficient because acyclic redundancy
+ // elimination, CSE/GVN, should handle the rest. However, once SCEV proves
+ // that a phi is congruent, it's often the head of an IV user cycle that
+ // is isomorphic with the original phi. So it's worth eagerly cleaning up
+ // the common case of a single IV increment.
+ if (OrigInc != IsomorphicInc &&
+ OrigInc->getType() == IsomorphicInc->getType() &&
+ SE.getSCEV(OrigInc) == SE.getSCEV(IsomorphicInc) &&
+ hoistStep(OrigInc, IsomorphicInc, DT)) {
+ DEBUG_WITH_TYPE(DebugType, dbgs()
+ << "INDVARS: Eliminated congruent iv.inc: "
+ << *IsomorphicInc << '\n');
+ IsomorphicInc->replaceAllUsesWith(OrigInc);
+ DeadInsts.push_back(IsomorphicInc);
+ }
+ }
+ DEBUG_WITH_TYPE(DebugType, dbgs()
+ << "INDVARS: Eliminated congruent iv: " << *Phi << '\n');
+ ++NumElim;
+ Phi->replaceAllUsesWith(OrigPhiRef);
+ DeadInsts.push_back(Phi);
+ }
+ return NumElim;
+}
diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp
index 60e630a..c66ecd6 100644
--- a/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -60,20 +60,40 @@ static bool IVUseShouldUsePostIncValue(Instruction *User, Value *Operand,
return true;
}
-const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
- const SCEV *S,
- Instruction *User,
- Value *OperandValToReplace,
- PostIncLoopSet &Loops,
- ScalarEvolution &SE,
- DominatorTree &DT) {
- if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S))
- return S;
+namespace {
+
+/// Hold the state used during post-inc expression transformation, including a
+/// map of transformed expressions.
+class PostIncTransform {
+ TransformKind Kind;
+ PostIncLoopSet &Loops;
+ ScalarEvolution &SE;
+ DominatorTree &DT;
+
+ DenseMap<const SCEV*, const SCEV*> Transformed;
+
+public:
+ PostIncTransform(TransformKind kind, PostIncLoopSet &loops,
+ ScalarEvolution &se, DominatorTree &dt):
+ Kind(kind), Loops(loops), SE(se), DT(dt) {}
+
+ const SCEV *TransformSubExpr(const SCEV *S, Instruction *User,
+ Value *OperandValToReplace);
+
+protected:
+ const SCEV *TransformImpl(const SCEV *S, Instruction *User,
+ Value *OperandValToReplace);
+};
+
+} // namespace
+
+/// Implement post-inc transformation for all valid expression types.
+const SCEV *PostIncTransform::
+TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
if (const SCEVCastExpr *X = dyn_cast<SCEVCastExpr>(S)) {
const SCEV *O = X->getOperand();
- const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace,
- Loops, SE, DT);
+ const SCEV *N = TransformSubExpr(O, User, OperandValToReplace);
if (O != N)
switch (S->getSCEVType()) {
case scZeroExtend: return SE.getZeroExtendExpr(N, S->getType());
@@ -93,9 +113,7 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
// Transform each operand.
for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
I != E; ++I) {
- const SCEV *O = *I;
- const SCEV *N = TransformForPostIncUse(Kind, O, LUser, 0, Loops, SE, DT);
- Operands.push_back(N);
+ Operands.push_back(TransformSubExpr(*I, LUser, 0));
}
// Conservatively use AnyWrap until/unless we need FlagNW.
const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap);
@@ -104,8 +122,8 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
case NormalizeAutodetect:
if (IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) {
const SCEV *TransformedStep =
- TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
- User, OperandValToReplace, Loops, SE, DT);
+ TransformSubExpr(AR->getStepRecurrence(SE),
+ User, OperandValToReplace);
Result = SE.getMinusSCEV(Result, TransformedStep);
Loops.insert(L);
}
@@ -114,24 +132,20 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
// sometimes fails to canonicalize two equal SCEVs to exactly the same
// form. It's possibly a pessimization when this happens, but it isn't a
// correctness problem, so disable this assert for now.
- assert(S == TransformForPostIncUse(Denormalize, Result,
- User, OperandValToReplace,
- Loops, SE, DT) &&
+ assert(S == TransformSubExpr(Result, User, OperandValToReplace) &&
"SCEV normalization is not invertible!");
#endif
break;
case Normalize:
if (Loops.count(L)) {
const SCEV *TransformedStep =
- TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
- User, OperandValToReplace, Loops, SE, DT);
+ TransformSubExpr(AR->getStepRecurrence(SE),
+ User, OperandValToReplace);
Result = SE.getMinusSCEV(Result, TransformedStep);
}
#if 0
// See the comment on the assert above.
- assert(S == TransformForPostIncUse(Denormalize, Result,
- User, OperandValToReplace,
- Loops, SE, DT) &&
+ assert(S == TransformSubExpr(Result, User, OperandValToReplace) &&
"SCEV normalization is not invertible!");
#endif
break;
@@ -150,8 +164,7 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
for (SCEVNAryExpr::op_iterator I = X->op_begin(), E = X->op_end();
I != E; ++I) {
const SCEV *O = *I;
- const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace,
- Loops, SE, DT);
+ const SCEV *N = TransformSubExpr(O, User, OperandValToReplace);
Changed |= N != O;
Operands.push_back(N);
}
@@ -170,10 +183,8 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
if (const SCEVUDivExpr *X = dyn_cast<SCEVUDivExpr>(S)) {
const SCEV *LO = X->getLHS();
const SCEV *RO = X->getRHS();
- const SCEV *LN = TransformForPostIncUse(Kind, LO, User, OperandValToReplace,
- Loops, SE, DT);
- const SCEV *RN = TransformForPostIncUse(Kind, RO, User, OperandValToReplace,
- Loops, SE, DT);
+ const SCEV *LN = TransformSubExpr(LO, User, OperandValToReplace);
+ const SCEV *RN = TransformSubExpr(RO, User, OperandValToReplace);
if (LO != LN || RO != RN)
return SE.getUDivExpr(LN, RN);
return S;
@@ -182,3 +193,33 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
llvm_unreachable("Unexpected SCEV kind!");
return 0;
}
+
+/// Manage recursive transformation across an expression DAG. Revisiting
+/// expressions would lead to exponential recursion.
+const SCEV *PostIncTransform::
+TransformSubExpr(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
+
+ if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S))
+ return S;
+
+ const SCEV *Result = Transformed.lookup(S);
+ if (Result)
+ return Result;
+
+ Result = TransformImpl(S, User, OperandValToReplace);
+ Transformed[S] = Result;
+ return Result;
+}
+
+/// Top level driver for transforming an expression DAG into its requested
+/// post-inc form (either "Normalized" or "Denormalized".
+const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
+ const SCEV *S,
+ Instruction *User,
+ Value *OperandValToReplace,
+ PostIncLoopSet &Loops,
+ ScalarEvolution &SE,
+ DominatorTree &DT) {
+ PostIncTransform Transform(Kind, Loops, SE, DT);
+ return Transform.TransformSubExpr(S, User, OperandValToReplace);
+}
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 455c910..4d94f61 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -34,7 +34,7 @@ const unsigned MaxDepth = 6;
/// getBitWidth - Returns the bitwidth of the given scalar or pointer type (if
/// unknown returns 0). For vector types, returns the element type's bitwidth.
-static unsigned getBitWidth(const Type *Ty, const TargetData *TD) {
+static unsigned getBitWidth(Type *Ty, const TargetData *TD) {
if (unsigned BitWidth = Ty->getScalarSizeInBits())
return BitWidth;
assert(isa<PointerType>(Ty) && "Expected a pointer type!");
@@ -103,7 +103,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
unsigned Align = GV->getAlignment();
if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) {
- const Type *ObjectType = GV->getType()->getElementType();
+ Type *ObjectType = GV->getType()->getElementType();
// If the object is defined in the current Module, we'll be giving
// it the preferred alignment. Otherwise, we have to assume that it
// may only have the minimum ABI alignment.
@@ -268,7 +268,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
// FALL THROUGH and handle them the same as zext/trunc.
case Instruction::ZExt:
case Instruction::Trunc: {
- const Type *SrcTy = I->getOperand(0)->getType();
+ Type *SrcTy = I->getOperand(0)->getType();
unsigned SrcBitWidth;
// Note that we handle pointer operands here because of inttoptr/ptrtoint
@@ -291,7 +291,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
return;
}
case Instruction::BitCast: {
- const Type *SrcTy = I->getOperand(0)->getType();
+ Type *SrcTy = I->getOperand(0)->getType();
if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
// TODO: For now, not handling conversions like:
// (bitcast i64 %x to <2 x i32>)
@@ -559,7 +559,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
gep_type_iterator GTI = gep_type_begin(I);
for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
Value *Index = I->getOperand(i);
- if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
// Handle struct member offset arithmetic.
if (!TD) return;
const StructLayout *SL = TD->getStructLayout(STy);
@@ -569,7 +569,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
CountTrailingZeros_64(Offset));
} else {
// Handle array index arithmetic.
- const Type *IndexedTy = GTI.getIndexedType();
+ Type *IndexedTy = GTI.getIndexedType();
if (!IndexedTy->isSized()) return;
unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits();
uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1;
@@ -898,7 +898,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
assert((TD || V->getType()->isIntOrIntVectorTy()) &&
"ComputeNumSignBits requires a TargetData object to operate "
"on non-integer values!");
- const Type *Ty = V->getType();
+ Type *Ty = V->getType();
unsigned TyBits = TD ? TD->getTypeSizeInBits(V->getType()->getScalarType()) :
Ty->getScalarSizeInBits();
unsigned Tmp, Tmp2;
@@ -1078,7 +1078,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
assert(Depth <= MaxDepth && "Limit Search Depth");
assert(V->getType()->isIntegerTy() && "Not integer or pointer type!");
- const Type *T = V->getType();
+ Type *T = V->getType();
ConstantInt *CI = dyn_cast<ConstantInt>(V);
@@ -1315,11 +1315,11 @@ Value *llvm::isBytewiseValue(Value *V) {
// indices from Idxs that should be left out when inserting into the resulting
// struct. To is the result struct built so far, new insertvalue instructions
// build on that.
-static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType,
+static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType,
SmallVector<unsigned, 10> &Idxs,
unsigned IdxSkip,
Instruction *InsertBefore) {
- const llvm::StructType *STy = llvm::dyn_cast<llvm::StructType>(IndexedType);
+ llvm::StructType *STy = llvm::dyn_cast<llvm::StructType>(IndexedType);
if (STy) {
// Save the original To argument so we can modify it
Value *OrigTo = To;
@@ -1358,8 +1358,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType,
return NULL;
// Insert the value in the new (sub) aggregrate
- return llvm::InsertValueInst::Create(To, V,
- ArrayRef<unsigned>(Idxs).slice(IdxSkip),
+ return llvm::InsertValueInst::Create(To, V, makeArrayRef(Idxs).slice(IdxSkip),
"tmp", InsertBefore);
}
@@ -1378,7 +1377,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType,
static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range,
Instruction *InsertBefore) {
assert(InsertBefore && "Must have someplace to insert!");
- const Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(),
+ Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(),
idx_range);
Value *To = UndefValue::get(IndexedType);
SmallVector<unsigned, 10> Idxs(idx_range.begin(), idx_range.end());
@@ -1404,7 +1403,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
&& "Not looking at a struct or array?");
assert(ExtractValueInst::getIndexedType(V->getType(), idx_range)
&& "Invalid indices for type?");
- const CompositeType *PTy = cast<CompositeType>(V->getType());
+ CompositeType *PTy = cast<CompositeType>(V->getType());
if (isa<UndefValue>(V))
return UndefValue::get(ExtractValueInst::getIndexedType(PTy,
@@ -1435,9 +1434,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
// %C = insertvalue {i32, i32 } %A, i32 11, 1
// which allows the unused 0,0 element from the nested struct to be
// removed.
- return BuildSubAggregate(V,
- ArrayRef<unsigned>(idx_range.begin(),
- req_idx),
+ return BuildSubAggregate(V, makeArrayRef(idx_range.begin(), req_idx),
InsertBefore);
else
// We can't handle this without inserting insertvalues
@@ -1455,7 +1452,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
// requested (though possibly only partially). Now we recursively look at
// the inserted value, passing any remaining indices.
return FindInsertedValue(I->getInsertedValueOperand(),
- ArrayRef<unsigned>(req_idx, idx_range.end()),
+ makeArrayRef(req_idx, idx_range.end()),
InsertBefore);
} else if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
// If we're extracting a value from an aggregrate that was extracted from
@@ -1506,7 +1503,7 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
if (OpC->isZero()) continue;
// Handle a struct and array indices which add their offset to the pointer.
- if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
} else {
uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
@@ -1557,8 +1554,8 @@ bool llvm::GetConstantStringInfo(const Value *V, std::string &Str,
return false;
// Make sure the index-ee is a pointer to array of i8.
- const PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType());
- const ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType());
+ PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType());
+ ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType());
if (AT == 0 || !AT->getElementType()->isIntegerTy(8))
return false;
diff --git a/lib/Archive/CMakeLists.txt b/lib/Archive/CMakeLists.txt
index 7ff478a..b52974e 100644
--- a/lib/Archive/CMakeLists.txt
+++ b/lib/Archive/CMakeLists.txt
@@ -3,3 +3,9 @@ add_llvm_library(LLVMArchive
ArchiveReader.cpp
ArchiveWriter.cpp
)
+
+add_llvm_library_dependencies(LLVMArchive
+ LLVMBitReader
+ LLVMCore
+ LLVMSupport
+ )
diff --git a/lib/AsmParser/CMakeLists.txt b/lib/AsmParser/CMakeLists.txt
index 985ebe2..7496015 100644
--- a/lib/AsmParser/CMakeLists.txt
+++ b/lib/AsmParser/CMakeLists.txt
@@ -4,3 +4,8 @@ add_llvm_library(LLVMAsmParser
LLParser.cpp
Parser.cpp
)
+
+add_llvm_library_dependencies(LLVMAsmParser
+ LLVMCore
+ LLVMSupport
+ )
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 3c63106..d0dd986 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -506,6 +506,15 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(deplibs);
KEYWORD(datalayout);
KEYWORD(volatile);
+ KEYWORD(atomic);
+ KEYWORD(unordered);
+ KEYWORD(monotonic);
+ KEYWORD(acquire);
+ KEYWORD(release);
+ KEYWORD(acq_rel);
+ KEYWORD(seq_cst);
+ KEYWORD(singlethread);
+
KEYWORD(nuw);
KEYWORD(nsw);
KEYWORD(exact);
@@ -549,6 +558,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(readnone);
KEYWORD(readonly);
KEYWORD(uwtable);
+ KEYWORD(returns_twice);
KEYWORD(inlinehint);
KEYWORD(noinline);
@@ -559,7 +569,6 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(noredzone);
KEYWORD(noimplicitfloat);
KEYWORD(naked);
- KEYWORD(hotpatch);
KEYWORD(nonlazybind);
KEYWORD(type);
@@ -570,8 +579,16 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole);
KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);
+ KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax);
+ KEYWORD(umin);
+
KEYWORD(x);
KEYWORD(blockaddress);
+
+ KEYWORD(personality);
+ KEYWORD(cleanup);
+ KEYWORD(catch);
+ KEYWORD(filter);
#undef KEYWORD
// Keywords for types.
@@ -624,12 +641,16 @@ lltok::Kind LLLexer::LexIdentifier() {
INSTKEYWORD(switch, Switch);
INSTKEYWORD(indirectbr, IndirectBr);
INSTKEYWORD(invoke, Invoke);
+ INSTKEYWORD(resume, Resume);
INSTKEYWORD(unwind, Unwind);
INSTKEYWORD(unreachable, Unreachable);
INSTKEYWORD(alloca, Alloca);
INSTKEYWORD(load, Load);
INSTKEYWORD(store, Store);
+ INSTKEYWORD(cmpxchg, AtomicCmpXchg);
+ INSTKEYWORD(atomicrmw, AtomicRMW);
+ INSTKEYWORD(fence, Fence);
INSTKEYWORD(getelementptr, GetElementPtr);
INSTKEYWORD(extractelement, ExtractElement);
@@ -637,6 +658,7 @@ lltok::Kind LLLexer::LexIdentifier() {
INSTKEYWORD(shufflevector, ShuffleVector);
INSTKEYWORD(extractvalue, ExtractValue);
INSTKEYWORD(insertvalue, InsertValue);
+ INSTKEYWORD(landingpad, LandingPad);
#undef INSTKEYWORD
// Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
@@ -704,17 +726,17 @@ lltok::Kind LLLexer::Lex0x() {
case 'K':
// F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)
FP80HexToIntPair(TokStart+3, CurPtr, Pair);
- APFloatVal = APFloat(APInt(80, 2, Pair));
+ APFloatVal = APFloat(APInt(80, Pair));
return lltok::APFloat;
case 'L':
// F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes)
HexToIntPair(TokStart+3, CurPtr, Pair);
- APFloatVal = APFloat(APInt(128, 2, Pair), true);
+ APFloatVal = APFloat(APInt(128, Pair), true);
return lltok::APFloat;
case 'M':
// PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes)
HexToIntPair(TokStart+3, CurPtr, Pair);
- APFloatVal = APFloat(APInt(128, 2, Pair));
+ APFloatVal = APFloat(APInt(128, Pair));
return lltok::APFloat;
}
}
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index cfc31f3..cafaab0 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -26,7 +26,7 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-static std::string getTypeString(const Type *T) {
+static std::string getTypeString(Type *T) {
std::string Result;
raw_string_ostream Tmp(Result);
Tmp << *T;
@@ -120,6 +120,9 @@ bool LLParser::ValidateEndOfModule() {
for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; )
UpgradeCallsToIntrinsic(FI++); // must be post-increment, as we remove
+ // Upgrade to new EH scheme. N.B. This will go away in 3.1.
+ UpgradeExceptionHandling(M);
+
// Check debug info intrinsics.
CheckDebugInfoIntrinsics(M);
return false;
@@ -744,9 +747,9 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
/// GetGlobalVal - Get a value with the specified name or ID, creating a
/// forward reference record if needed. This can return null if the value
/// exists but does not have the right type.
-GlobalValue *LLParser::GetGlobalVal(const std::string &Name, const Type *Ty,
+GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty,
LocTy Loc) {
- const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ PointerType *PTy = dyn_cast<PointerType>(Ty);
if (PTy == 0) {
Error(Loc, "global variable reference must have pointer type");
return 0;
@@ -775,7 +778,7 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, const Type *Ty,
// Otherwise, create a new forward reference for this value and remember it.
GlobalValue *FwdVal;
- if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType()))
+ if (FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType()))
FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, Name, M);
else
FwdVal = new GlobalVariable(*M, PTy->getElementType(), false,
@@ -785,8 +788,8 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, const Type *Ty,
return FwdVal;
}
-GlobalValue *LLParser::GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc) {
- const PointerType *PTy = dyn_cast<PointerType>(Ty);
+GlobalValue *LLParser::GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc) {
+ PointerType *PTy = dyn_cast<PointerType>(Ty);
if (PTy == 0) {
Error(Loc, "global variable reference must have pointer type");
return 0;
@@ -813,7 +816,7 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc) {
// Otherwise, create a new forward reference for this value and remember it.
GlobalValue *FwdVal;
- if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType()))
+ if (FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType()))
FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, "", M);
else
FwdVal = new GlobalVariable(*M, PTy->getElementType(), false,
@@ -908,6 +911,7 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
case lltok::kw_noreturn: Attrs |= Attribute::NoReturn; break;
case lltok::kw_nounwind: Attrs |= Attribute::NoUnwind; break;
case lltok::kw_uwtable: Attrs |= Attribute::UWTable; break;
+ case lltok::kw_returns_twice: Attrs |= Attribute::ReturnsTwice; break;
case lltok::kw_noinline: Attrs |= Attribute::NoInline; break;
case lltok::kw_readnone: Attrs |= Attribute::ReadNone; break;
case lltok::kw_readonly: Attrs |= Attribute::ReadOnly; break;
@@ -919,7 +923,6 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
case lltok::kw_noredzone: Attrs |= Attribute::NoRedZone; break;
case lltok::kw_noimplicitfloat: Attrs |= Attribute::NoImplicitFloat; break;
case lltok::kw_naked: Attrs |= Attribute::Naked; break;
- case lltok::kw_hotpatch: Attrs |= Attribute::Hotpatch; break;
case lltok::kw_nonlazybind: Attrs |= Attribute::NonLazyBind; break;
case lltok::kw_alignstack: {
@@ -1145,6 +1148,32 @@ bool LLParser::ParseOptionalCommaAlign(unsigned &Alignment,
return false;
}
+/// ParseScopeAndOrdering
+/// if isAtomic: ::= 'singlethread'? AtomicOrdering
+/// else: ::=
+///
+/// This sets Scope and Ordering to the parsed values.
+bool LLParser::ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope,
+ AtomicOrdering &Ordering) {
+ if (!isAtomic)
+ return false;
+
+ Scope = CrossThread;
+ if (EatIfPresent(lltok::kw_singlethread))
+ Scope = SingleThread;
+ switch (Lex.getKind()) {
+ default: return TokError("Expected ordering on atomic instruction");
+ case lltok::kw_unordered: Ordering = Unordered; break;
+ case lltok::kw_monotonic: Ordering = Monotonic; break;
+ case lltok::kw_acquire: Ordering = Acquire; break;
+ case lltok::kw_release: Ordering = Release; break;
+ case lltok::kw_acq_rel: Ordering = AcquireRelease; break;
+ case lltok::kw_seq_cst: Ordering = SequentiallyConsistent; break;
+ }
+ Lex.Lex();
+ return false;
+}
+
/// ParseOptionalStackAlignment
/// ::= /* empty */
/// ::= 'alignstack' '(' 4 ')'
@@ -1237,7 +1266,7 @@ bool LLParser::ParseType(Type *&Result, bool AllowVoid) {
// If the type hasn't been defined yet, create a forward definition and
// remember where that forward def'n was seen (in case it never is defined).
if (Entry.first == 0) {
- Entry.first = StructType::createNamed(Context, Lex.getStrVal());
+ Entry.first = StructType::create(Context, Lex.getStrVal());
Entry.second = Lex.getLoc();
}
Result = Entry.first;
@@ -1254,7 +1283,7 @@ bool LLParser::ParseType(Type *&Result, bool AllowVoid) {
// If the type hasn't been defined yet, create a forward definition and
// remember where that forward def'n was seen (in case it never is defined).
if (Entry.first == 0) {
- Entry.first = StructType::createNamed(Context, "");
+ Entry.first = StructType::create(Context);
Entry.second = Lex.getLoc();
}
Result = Entry.first;
@@ -1476,7 +1505,7 @@ bool LLParser::ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
// If this type number has never been uttered, create it.
if (Entry.first == 0)
- Entry.first = StructType::createNamed(Context, Name);
+ Entry.first = StructType::create(Context, Name);
ResultTy = Entry.first;
return false;
}
@@ -1502,7 +1531,7 @@ bool LLParser::ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
// If this type number has never been uttered, create it.
if (Entry.first == 0)
- Entry.first = StructType::createNamed(Context, Name);
+ Entry.first = StructType::create(Context, Name);
StructType *STy = cast<StructType>(Entry.first);
@@ -1668,7 +1697,7 @@ bool LLParser::PerFunctionState::FinishFunction() {
/// forward reference record if needed. This can return null if the value
/// exists but does not have the right type.
Value *LLParser::PerFunctionState::GetVal(const std::string &Name,
- const Type *Ty, LocTy Loc) {
+ Type *Ty, LocTy Loc) {
// Look this name up in the normal function symbol table.
Value *Val = F.getValueSymbolTable().lookup(Name);
@@ -1709,7 +1738,7 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name,
return FwdVal;
}
-Value *LLParser::PerFunctionState::GetVal(unsigned ID, const Type *Ty,
+Value *LLParser::PerFunctionState::GetVal(unsigned ID, Type *Ty,
LocTy Loc) {
// Look this name up in the normal function symbol table.
Value *Val = ID < NumberedVals.size() ? NumberedVals[ID] : 0;
@@ -2273,16 +2302,11 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
if (Elts.size() == 0 || !Elts[0]->getType()->isPointerTy())
return Error(ID.Loc, "getelementptr requires pointer operand");
- if (!GetElementPtrInst::getIndexedType(Elts[0]->getType(),
- (Value**)(Elts.data() + 1),
- Elts.size() - 1))
+ ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end());
+ if (!GetElementPtrInst::getIndexedType(Elts[0]->getType(), Indices))
return Error(ID.Loc, "invalid indices for getelementptr");
- ID.ConstantVal = InBounds ?
- ConstantExpr::getInBoundsGetElementPtr(Elts[0],
- Elts.data() + 1,
- Elts.size() - 1) :
- ConstantExpr::getGetElementPtr(Elts[0],
- Elts.data() + 1, Elts.size() - 1);
+ ID.ConstantVal = ConstantExpr::getGetElementPtr(Elts[0], Indices,
+ InBounds);
} else if (Opc == Instruction::Select) {
if (Elts.size() != 3)
return Error(ID.Loc, "expected three operands to select");
@@ -2323,7 +2347,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
}
/// ParseGlobalValue - Parse a global value with the specified type.
-bool LLParser::ParseGlobalValue(const Type *Ty, Constant *&C) {
+bool LLParser::ParseGlobalValue(Type *Ty, Constant *&C) {
C = 0;
ValID ID;
Value *V = NULL;
@@ -2410,7 +2434,7 @@ bool LLParser::ParseMetadataValue(ValID &ID, PerFunctionState *PFS) {
// Function Parsing.
//===----------------------------------------------------------------------===//
-bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
+bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
PerFunctionState *PFS) {
if (Ty->isFunctionTy())
return Error(ID.Loc, "functions are not values, refer to them as pointers");
@@ -2426,8 +2450,8 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
V = PFS->GetVal(ID.StrVal, Ty, ID.Loc);
return (V == 0);
case ValID::t_InlineAsm: {
- const PointerType *PTy = dyn_cast<PointerType>(Ty);
- const FunctionType *FTy =
+ PointerType *PTy = dyn_cast<PointerType>(Ty);
+ FunctionType *FTy =
PTy ? dyn_cast<FunctionType>(PTy->getElementType()) : 0;
if (!FTy || !InlineAsm::Verify(FTy, ID.StrVal2))
return Error(ID.Loc, "invalid type for inline asm constraint string");
@@ -2506,7 +2530,7 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
return false;
case ValID::t_ConstantStruct:
case ValID::t_PackedConstantStruct:
- if (const StructType *ST = dyn_cast<StructType>(Ty)) {
+ if (StructType *ST = dyn_cast<StructType>(Ty)) {
if (ST->getNumElements() != ID.UIntVal)
return Error(ID.Loc,
"initializer with struct type has wrong # elements");
@@ -2519,15 +2543,15 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
return Error(ID.Loc, "element " + Twine(i) +
" of struct initializer doesn't match struct element type");
- V = ConstantStruct::get(ST, ArrayRef<Constant*>(ID.ConstantStructElts,
- ID.UIntVal));
+ V = ConstantStruct::get(ST, makeArrayRef(ID.ConstantStructElts,
+ ID.UIntVal));
} else
return Error(ID.Loc, "constant expression type mismatch");
return false;
}
}
-bool LLParser::ParseValue(const Type *Ty, Value *&V, PerFunctionState *PFS) {
+bool LLParser::ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS) {
V = 0;
ValID ID;
return ParseValID(ID, PFS) ||
@@ -2671,9 +2695,9 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
if (PAL.paramHasAttr(1, Attribute::StructRet) && !RetType->isVoidTy())
return Error(RetTypeLoc, "functions with 'sret' argument must return void");
- const FunctionType *FT =
+ FunctionType *FT =
FunctionType::get(RetType, ParamTypeList, isVarArg);
- const PointerType *PFT = PointerType::getUnqual(FT);
+ PointerType *PFT = PointerType::getUnqual(FT);
Fn = 0;
if (!FunctionName.empty()) {
@@ -2864,6 +2888,7 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
case lltok::kw_switch: return ParseSwitch(Inst, PFS);
case lltok::kw_indirectbr: return ParseIndirectBr(Inst, PFS);
case lltok::kw_invoke: return ParseInvoke(Inst, PFS);
+ case lltok::kw_resume: return ParseResume(Inst, PFS);
// Binary Operators.
case lltok::kw_add:
case lltok::kw_sub:
@@ -2923,13 +2948,18 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
case lltok::kw_insertelement: return ParseInsertElement(Inst, PFS);
case lltok::kw_shufflevector: return ParseShuffleVector(Inst, PFS);
case lltok::kw_phi: return ParsePHI(Inst, PFS);
+ case lltok::kw_landingpad: return ParseLandingPad(Inst, PFS);
case lltok::kw_call: return ParseCall(Inst, PFS, false);
case lltok::kw_tail: return ParseCall(Inst, PFS, true);
// Memory.
case lltok::kw_alloca: return ParseAlloc(Inst, PFS);
case lltok::kw_load: return ParseLoad(Inst, PFS, false);
case lltok::kw_store: return ParseStore(Inst, PFS, false);
+ case lltok::kw_cmpxchg: return ParseCmpXchg(Inst, PFS);
+ case lltok::kw_atomicrmw: return ParseAtomicRMW(Inst, PFS);
+ case lltok::kw_fence: return ParseFence(Inst, PFS);
case lltok::kw_volatile:
+ // For compatibility; canonical location is after load
if (EatIfPresent(lltok::kw_load))
return ParseLoad(Inst, PFS, true);
else if (EatIfPresent(lltok::kw_store))
@@ -3162,8 +3192,8 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
// If RetType is a non-function pointer type, then this is the short syntax
// for the call, which means that RetType is just the return type. Infer the
// rest of the function argument types from the arguments that are present.
- const PointerType *PFTy = 0;
- const FunctionType *Ty = 0;
+ PointerType *PFTy = 0;
+ FunctionType *Ty = 0;
if (!(PFTy = dyn_cast<PointerType>(RetType)) ||
!(Ty = dyn_cast<FunctionType>(PFTy->getElementType()))) {
// Pull out the types of all of the arguments...
@@ -3194,7 +3224,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
FunctionType::param_iterator I = Ty->param_begin();
FunctionType::param_iterator E = Ty->param_end();
for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
- const Type *ExpectedTy = 0;
+ Type *ExpectedTy = 0;
if (I != E) {
ExpectedTy = *I++;
} else if (!Ty->isVarArg()) {
@@ -3225,7 +3255,17 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
return false;
}
+/// ParseResume
+/// ::= 'resume' TypeAndValue
+bool LLParser::ParseResume(Instruction *&Inst, PerFunctionState &PFS) {
+ Value *Exn; LocTy ExnLoc;
+ if (ParseTypeAndValue(Exn, ExnLoc, PFS))
+ return true;
+ ResumeInst *RI = ResumeInst::Create(Exn);
+ Inst = RI;
+ return false;
+}
//===----------------------------------------------------------------------===//
// Binary Operators.
@@ -3473,6 +3513,56 @@ int LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) {
return AteExtraComma ? InstExtraComma : InstNormal;
}
+/// ParseLandingPad
+/// ::= 'landingpad' Type 'personality' TypeAndValue 'cleanup'? Clause+
+/// Clause
+/// ::= 'catch' TypeAndValue
+/// ::= 'filter'
+/// ::= 'filter' TypeAndValue ( ',' TypeAndValue )*
+bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) {
+ Type *Ty = 0; LocTy TyLoc;
+ Value *PersFn; LocTy PersFnLoc;
+
+ if (ParseType(Ty, TyLoc) ||
+ ParseToken(lltok::kw_personality, "expected 'personality'") ||
+ ParseTypeAndValue(PersFn, PersFnLoc, PFS))
+ return true;
+
+ LandingPadInst *LP = LandingPadInst::Create(Ty, PersFn, 0);
+ LP->setCleanup(EatIfPresent(lltok::kw_cleanup));
+
+ while (Lex.getKind() == lltok::kw_catch || Lex.getKind() == lltok::kw_filter){
+ LandingPadInst::ClauseType CT;
+ if (EatIfPresent(lltok::kw_catch))
+ CT = LandingPadInst::Catch;
+ else if (EatIfPresent(lltok::kw_filter))
+ CT = LandingPadInst::Filter;
+ else
+ return TokError("expected 'catch' or 'filter' clause type");
+
+ Value *V; LocTy VLoc;
+ if (ParseTypeAndValue(V, VLoc, PFS)) {
+ delete LP;
+ return true;
+ }
+
+ // A 'catch' type expects a non-array constant. A filter clause expects an
+ // array constant.
+ if (CT == LandingPadInst::Catch) {
+ if (isa<ArrayType>(V->getType()))
+ Error(VLoc, "'catch' clause has an invalid type");
+ } else {
+ if (!isa<ArrayType>(V->getType()))
+ Error(VLoc, "'filter' clause has an invalid type");
+ }
+
+ LP->addClause(V);
+ }
+
+ Inst = LP;
+ return false;
+}
+
/// ParseCall
/// ::= 'tail'? 'call' OptionalCallingConv OptionalAttrs Type Value
/// ParameterList OptionalAttrs
@@ -3498,8 +3588,8 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
// If RetType is a non-function pointer type, then this is the short syntax
// for the call, which means that RetType is just the return type. Infer the
// rest of the function argument types from the arguments that are present.
- const PointerType *PFTy = 0;
- const FunctionType *Ty = 0;
+ PointerType *PFTy = 0;
+ FunctionType *Ty = 0;
if (!(PFTy = dyn_cast<PointerType>(RetType)) ||
!(Ty = dyn_cast<FunctionType>(PFTy->getElementType()))) {
// Pull out the types of all of the arguments...
@@ -3530,7 +3620,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
FunctionType::param_iterator I = Ty->param_begin();
FunctionType::param_iterator E = Ty->param_end();
for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
- const Type *ExpectedTy = 0;
+ Type *ExpectedTy = 0;
if (I != E) {
ExpectedTy = *I++;
} else if (!Ty->isVarArg()) {
@@ -3596,34 +3686,85 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) {
}
/// ParseLoad
-/// ::= 'volatile'? 'load' TypeAndValue (',' OptionalInfo)?
+/// ::= 'load' 'volatile'? TypeAndValue (',' 'align' i32)?
+/// ::= 'load' 'atomic' 'volatile'? TypeAndValue
+/// 'singlethread'? AtomicOrdering (',' 'align' i32)?
+/// Compatibility:
+/// ::= 'volatile' 'load' TypeAndValue (',' 'align' i32)?
int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS,
bool isVolatile) {
Value *Val; LocTy Loc;
unsigned Alignment = 0;
bool AteExtraComma = false;
+ bool isAtomic = false;
+ AtomicOrdering Ordering = NotAtomic;
+ SynchronizationScope Scope = CrossThread;
+
+ if (Lex.getKind() == lltok::kw_atomic) {
+ if (isVolatile)
+ return TokError("mixing atomic with old volatile placement");
+ isAtomic = true;
+ Lex.Lex();
+ }
+
+ if (Lex.getKind() == lltok::kw_volatile) {
+ if (isVolatile)
+ return TokError("duplicate volatile before and after store");
+ isVolatile = true;
+ Lex.Lex();
+ }
+
if (ParseTypeAndValue(Val, Loc, PFS) ||
+ ParseScopeAndOrdering(isAtomic, Scope, Ordering) ||
ParseOptionalCommaAlign(Alignment, AteExtraComma))
return true;
if (!Val->getType()->isPointerTy() ||
!cast<PointerType>(Val->getType())->getElementType()->isFirstClassType())
return Error(Loc, "load operand must be a pointer to a first class type");
+ if (isAtomic && !Alignment)
+ return Error(Loc, "atomic load must have explicit non-zero alignment");
+ if (Ordering == Release || Ordering == AcquireRelease)
+ return Error(Loc, "atomic load cannot use Release ordering");
- Inst = new LoadInst(Val, "", isVolatile, Alignment);
+ Inst = new LoadInst(Val, "", isVolatile, Alignment, Ordering, Scope);
return AteExtraComma ? InstExtraComma : InstNormal;
}
/// ParseStore
-/// ::= 'volatile'? 'store' TypeAndValue ',' TypeAndValue (',' 'align' i32)?
+
+/// ::= 'store' 'volatile'? TypeAndValue ',' TypeAndValue (',' 'align' i32)?
+/// ::= 'store' 'atomic' 'volatile'? TypeAndValue ',' TypeAndValue
+/// 'singlethread'? AtomicOrdering (',' 'align' i32)?
+/// Compatibility:
+/// ::= 'volatile' 'store' TypeAndValue ',' TypeAndValue (',' 'align' i32)?
int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS,
bool isVolatile) {
Value *Val, *Ptr; LocTy Loc, PtrLoc;
unsigned Alignment = 0;
bool AteExtraComma = false;
+ bool isAtomic = false;
+ AtomicOrdering Ordering = NotAtomic;
+ SynchronizationScope Scope = CrossThread;
+
+ if (Lex.getKind() == lltok::kw_atomic) {
+ if (isVolatile)
+ return TokError("mixing atomic with old volatile placement");
+ isAtomic = true;
+ Lex.Lex();
+ }
+
+ if (Lex.getKind() == lltok::kw_volatile) {
+ if (isVolatile)
+ return TokError("duplicate volatile before and after store");
+ isVolatile = true;
+ Lex.Lex();
+ }
+
if (ParseTypeAndValue(Val, Loc, PFS) ||
ParseToken(lltok::comma, "expected ',' after store operand") ||
ParseTypeAndValue(Ptr, PtrLoc, PFS) ||
+ ParseScopeAndOrdering(isAtomic, Scope, Ordering) ||
ParseOptionalCommaAlign(Alignment, AteExtraComma))
return true;
@@ -3633,11 +3774,131 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS,
return Error(Loc, "store operand must be a first class value");
if (cast<PointerType>(Ptr->getType())->getElementType() != Val->getType())
return Error(Loc, "stored value and pointer type do not match");
+ if (isAtomic && !Alignment)
+ return Error(Loc, "atomic store must have explicit non-zero alignment");
+ if (Ordering == Acquire || Ordering == AcquireRelease)
+ return Error(Loc, "atomic store cannot use Acquire ordering");
+
+ Inst = new StoreInst(Val, Ptr, isVolatile, Alignment, Ordering, Scope);
+ return AteExtraComma ? InstExtraComma : InstNormal;
+}
- Inst = new StoreInst(Val, Ptr, isVolatile, Alignment);
+/// ParseCmpXchg
+/// ::= 'cmpxchg' 'volatile'? TypeAndValue ',' TypeAndValue ',' TypeAndValue
+/// 'singlethread'? AtomicOrdering
+int LLParser::ParseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) {
+ Value *Ptr, *Cmp, *New; LocTy PtrLoc, CmpLoc, NewLoc;
+ bool AteExtraComma = false;
+ AtomicOrdering Ordering = NotAtomic;
+ SynchronizationScope Scope = CrossThread;
+ bool isVolatile = false;
+
+ if (EatIfPresent(lltok::kw_volatile))
+ isVolatile = true;
+
+ if (ParseTypeAndValue(Ptr, PtrLoc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after cmpxchg address") ||
+ ParseTypeAndValue(Cmp, CmpLoc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after cmpxchg cmp operand") ||
+ ParseTypeAndValue(New, NewLoc, PFS) ||
+ ParseScopeAndOrdering(true /*Always atomic*/, Scope, Ordering))
+ return true;
+
+ if (Ordering == Unordered)
+ return TokError("cmpxchg cannot be unordered");
+ if (!Ptr->getType()->isPointerTy())
+ return Error(PtrLoc, "cmpxchg operand must be a pointer");
+ if (cast<PointerType>(Ptr->getType())->getElementType() != Cmp->getType())
+ return Error(CmpLoc, "compare value and pointer type do not match");
+ if (cast<PointerType>(Ptr->getType())->getElementType() != New->getType())
+ return Error(NewLoc, "new value and pointer type do not match");
+ if (!New->getType()->isIntegerTy())
+ return Error(NewLoc, "cmpxchg operand must be an integer");
+ unsigned Size = New->getType()->getPrimitiveSizeInBits();
+ if (Size < 8 || (Size & (Size - 1)))
+ return Error(NewLoc, "cmpxchg operand must be power-of-two byte-sized"
+ " integer");
+
+ AtomicCmpXchgInst *CXI =
+ new AtomicCmpXchgInst(Ptr, Cmp, New, Ordering, Scope);
+ CXI->setVolatile(isVolatile);
+ Inst = CXI;
return AteExtraComma ? InstExtraComma : InstNormal;
}
+/// ParseAtomicRMW
+/// ::= 'atomicrmw' 'volatile'? BinOp TypeAndValue ',' TypeAndValue
+/// 'singlethread'? AtomicOrdering
+int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
+ Value *Ptr, *Val; LocTy PtrLoc, ValLoc;
+ bool AteExtraComma = false;
+ AtomicOrdering Ordering = NotAtomic;
+ SynchronizationScope Scope = CrossThread;
+ bool isVolatile = false;
+ AtomicRMWInst::BinOp Operation;
+
+ if (EatIfPresent(lltok::kw_volatile))
+ isVolatile = true;
+
+ switch (Lex.getKind()) {
+ default: return TokError("expected binary operation in atomicrmw");
+ case lltok::kw_xchg: Operation = AtomicRMWInst::Xchg; break;
+ case lltok::kw_add: Operation = AtomicRMWInst::Add; break;
+ case lltok::kw_sub: Operation = AtomicRMWInst::Sub; break;
+ case lltok::kw_and: Operation = AtomicRMWInst::And; break;
+ case lltok::kw_nand: Operation = AtomicRMWInst::Nand; break;
+ case lltok::kw_or: Operation = AtomicRMWInst::Or; break;
+ case lltok::kw_xor: Operation = AtomicRMWInst::Xor; break;
+ case lltok::kw_max: Operation = AtomicRMWInst::Max; break;
+ case lltok::kw_min: Operation = AtomicRMWInst::Min; break;
+ case lltok::kw_umax: Operation = AtomicRMWInst::UMax; break;
+ case lltok::kw_umin: Operation = AtomicRMWInst::UMin; break;
+ }
+ Lex.Lex(); // Eat the operation.
+
+ if (ParseTypeAndValue(Ptr, PtrLoc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after atomicrmw address") ||
+ ParseTypeAndValue(Val, ValLoc, PFS) ||
+ ParseScopeAndOrdering(true /*Always atomic*/, Scope, Ordering))
+ return true;
+
+ if (Ordering == Unordered)
+ return TokError("atomicrmw cannot be unordered");
+ if (!Ptr->getType()->isPointerTy())
+ return Error(PtrLoc, "atomicrmw operand must be a pointer");
+ if (cast<PointerType>(Ptr->getType())->getElementType() != Val->getType())
+ return Error(ValLoc, "atomicrmw value and pointer type do not match");
+ if (!Val->getType()->isIntegerTy())
+ return Error(ValLoc, "atomicrmw operand must be an integer");
+ unsigned Size = Val->getType()->getPrimitiveSizeInBits();
+ if (Size < 8 || (Size & (Size - 1)))
+ return Error(ValLoc, "atomicrmw operand must be power-of-two byte-sized"
+ " integer");
+
+ AtomicRMWInst *RMWI =
+ new AtomicRMWInst(Operation, Ptr, Val, Ordering, Scope);
+ RMWI->setVolatile(isVolatile);
+ Inst = RMWI;
+ return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+/// ParseFence
+/// ::= 'fence' 'singlethread'? AtomicOrdering
+int LLParser::ParseFence(Instruction *&Inst, PerFunctionState &PFS) {
+ AtomicOrdering Ordering = NotAtomic;
+ SynchronizationScope Scope = CrossThread;
+ if (ParseScopeAndOrdering(true /*Always atomic*/, Scope, Ordering))
+ return true;
+
+ if (Ordering == Unordered)
+ return TokError("fence cannot be unordered");
+ if (Ordering == Monotonic)
+ return TokError("fence cannot be monotonic");
+
+ Inst = new FenceInst(Context, Ordering, Scope);
+ return InstNormal;
+}
+
/// ParseGetElementPtr
/// ::= 'getelementptr' 'inbounds'? TypeAndValue (',' TypeAndValue)*
int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
@@ -3663,10 +3924,9 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
Indices.push_back(Val);
}
- if (!GetElementPtrInst::getIndexedType(Ptr->getType(),
- Indices.begin(), Indices.end()))
+ if (!GetElementPtrInst::getIndexedType(Ptr->getType(), Indices))
return Error(Loc, "invalid getelementptr indices");
- Inst = GetElementPtrInst::Create(Ptr, Indices.begin(), Indices.end());
+ Inst = GetElementPtrInst::Create(Ptr, Indices);
if (InBounds)
cast<GetElementPtrInst>(Inst)->setIsInBounds(true);
return AteExtraComma ? InstExtraComma : InstNormal;
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 9630657..cbc3c23 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -15,6 +15,7 @@
#define LLVM_ASMPARSER_LLPARSER_H
#include "LLLexer.h"
+#include "llvm/Instructions.h"
#include "llvm/Module.h"
#include "llvm/Type.h"
#include "llvm/ADT/DenseMap.h"
@@ -142,8 +143,8 @@ namespace llvm {
/// GetGlobalVal - Get a value with the specified name or ID, creating a
/// forward reference record if needed. This can return null if the value
/// exists but does not have the right type.
- GlobalValue *GetGlobalVal(const std::string &N, const Type *Ty, LocTy Loc);
- GlobalValue *GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc);
+ GlobalValue *GetGlobalVal(const std::string &N, Type *Ty, LocTy Loc);
+ GlobalValue *GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc);
// Helper Routines.
bool ParseToken(lltok::Kind T, const char *ErrMsg);
@@ -178,6 +179,8 @@ namespace llvm {
bool ParseOptionalVisibility(unsigned &Visibility);
bool ParseOptionalCallingConv(CallingConv::ID &CC);
bool ParseOptionalAlignment(unsigned &Alignment);
+ bool ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope,
+ AtomicOrdering &Ordering);
bool ParseOptionalStackAlignment(unsigned &Alignment);
bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma);
bool ParseIndexList(SmallVectorImpl<unsigned> &Indices,bool &AteExtraComma);
@@ -249,8 +252,8 @@ namespace llvm {
/// GetVal - Get a value with the specified name or ID, creating a
/// forward reference record if needed. This can return null if the value
/// exists but does not have the right type.
- Value *GetVal(const std::string &Name, const Type *Ty, LocTy Loc);
- Value *GetVal(unsigned ID, const Type *Ty, LocTy Loc);
+ Value *GetVal(const std::string &Name, Type *Ty, LocTy Loc);
+ Value *GetVal(unsigned ID, Type *Ty, LocTy Loc);
/// SetInstName - After an instruction is parsed and inserted into its
/// basic block, this installs its name.
@@ -269,14 +272,14 @@ namespace llvm {
BasicBlock *DefineBB(const std::string &Name, LocTy Loc);
};
- bool ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
+ bool ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
PerFunctionState *PFS);
- bool ParseValue(const Type *Ty, Value *&V, PerFunctionState *PFS);
- bool ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS) {
+ bool ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS);
+ bool ParseValue(Type *Ty, Value *&V, PerFunctionState &PFS) {
return ParseValue(Ty, V, &PFS);
}
- bool ParseValue(const Type *Ty, Value *&V, LocTy &Loc,
+ bool ParseValue(Type *Ty, Value *&V, LocTy &Loc,
PerFunctionState &PFS) {
Loc = Lex.getLoc();
return ParseValue(Ty, V, &PFS);
@@ -310,7 +313,7 @@ namespace llvm {
// Constant Parsing.
bool ParseValID(ValID &ID, PerFunctionState *PFS = NULL);
- bool ParseGlobalValue(const Type *Ty, Constant *&V);
+ bool ParseGlobalValue(Type *Ty, Constant *&V);
bool ParseGlobalTypeAndValue(Constant *&V);
bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts);
bool ParseMetadataListValue(ValID &ID, PerFunctionState *PFS);
@@ -344,6 +347,7 @@ namespace llvm {
bool ParseSwitch(Instruction *&Inst, PerFunctionState &PFS);
bool ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS);
bool ParseInvoke(Instruction *&Inst, PerFunctionState &PFS);
+ bool ParseResume(Instruction *&Inst, PerFunctionState &PFS);
bool ParseArithmetic(Instruction *&I, PerFunctionState &PFS, unsigned Opc,
unsigned OperandType);
@@ -356,10 +360,14 @@ namespace llvm {
bool ParseInsertElement(Instruction *&I, PerFunctionState &PFS);
bool ParseShuffleVector(Instruction *&I, PerFunctionState &PFS);
int ParsePHI(Instruction *&I, PerFunctionState &PFS);
+ bool ParseLandingPad(Instruction *&I, PerFunctionState &PFS);
bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail);
int ParseAlloc(Instruction *&I, PerFunctionState &PFS);
int ParseLoad(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
int ParseStore(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
+ int ParseCmpXchg(Instruction *&I, PerFunctionState &PFS);
+ int ParseAtomicRMW(Instruction *&I, PerFunctionState &PFS);
+ int ParseFence(Instruction *&I, PerFunctionState &PFS);
int ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS);
int ParseExtractValue(Instruction *&I, PerFunctionState &PFS);
int ParseInsertValue(Instruction *&I, PerFunctionState &PFS);
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index a5f89fc..8f16772 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -53,6 +53,9 @@ namespace lltok {
kw_deplibs,
kw_datalayout,
kw_volatile,
+ kw_atomic,
+ kw_unordered, kw_monotonic, kw_acquire, kw_release, kw_acq_rel, kw_seq_cst,
+ kw_singlethread,
kw_nuw,
kw_nsw,
kw_exact,
@@ -87,6 +90,7 @@ namespace lltok {
kw_readnone,
kw_readonly,
kw_uwtable,
+ kw_returns_twice,
kw_inlinehint,
kw_noinline,
@@ -97,7 +101,6 @@ namespace lltok {
kw_noredzone,
kw_noimplicitfloat,
kw_naked,
- kw_hotpatch,
kw_nonlazybind,
kw_type,
@@ -107,6 +110,9 @@ namespace lltok {
kw_uge, kw_oeq, kw_one, kw_olt, kw_ogt, kw_ole, kw_oge, kw_ord, kw_uno,
kw_ueq, kw_une,
+ // atomicrmw operations that aren't also instruction keywords.
+ kw_xchg, kw_nand, kw_max, kw_min, kw_umax, kw_umin,
+
// Instruction Opcodes (Opcode in UIntVal).
kw_add, kw_fadd, kw_sub, kw_fsub, kw_mul, kw_fmul,
kw_udiv, kw_sdiv, kw_fdiv,
@@ -118,10 +124,13 @@ namespace lltok {
kw_fptoui, kw_fptosi, kw_inttoptr, kw_ptrtoint, kw_bitcast,
kw_select, kw_va_arg,
- kw_ret, kw_br, kw_switch, kw_indirectbr, kw_invoke, kw_unwind,
+ kw_landingpad, kw_personality, kw_cleanup, kw_catch, kw_filter,
+
+ kw_ret, kw_br, kw_switch, kw_indirectbr, kw_invoke, kw_unwind, kw_resume,
kw_unreachable,
- kw_alloca, kw_load, kw_store, kw_getelementptr,
+ kw_alloca, kw_load, kw_store, kw_fence, kw_cmpxchg, kw_atomicrmw,
+ kw_getelementptr,
kw_extractelement, kw_insertelement, kw_shufflevector,
kw_extractvalue, kw_insertvalue, kw_blockaddress,
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 24c2994..46565f3 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -107,7 +107,7 @@ static int GetDecodedCastOpcode(unsigned Val) {
case bitc::CAST_BITCAST : return Instruction::BitCast;
}
}
-static int GetDecodedBinaryOpcode(unsigned Val, const Type *Ty) {
+static int GetDecodedBinaryOpcode(unsigned Val, Type *Ty) {
switch (Val) {
default: return -1;
case bitc::BINOP_ADD:
@@ -131,6 +131,44 @@ static int GetDecodedBinaryOpcode(unsigned Val, const Type *Ty) {
}
}
+static AtomicRMWInst::BinOp GetDecodedRMWOperation(unsigned Val) {
+ switch (Val) {
+ default: return AtomicRMWInst::BAD_BINOP;
+ case bitc::RMW_XCHG: return AtomicRMWInst::Xchg;
+ case bitc::RMW_ADD: return AtomicRMWInst::Add;
+ case bitc::RMW_SUB: return AtomicRMWInst::Sub;
+ case bitc::RMW_AND: return AtomicRMWInst::And;
+ case bitc::RMW_NAND: return AtomicRMWInst::Nand;
+ case bitc::RMW_OR: return AtomicRMWInst::Or;
+ case bitc::RMW_XOR: return AtomicRMWInst::Xor;
+ case bitc::RMW_MAX: return AtomicRMWInst::Max;
+ case bitc::RMW_MIN: return AtomicRMWInst::Min;
+ case bitc::RMW_UMAX: return AtomicRMWInst::UMax;
+ case bitc::RMW_UMIN: return AtomicRMWInst::UMin;
+ }
+}
+
+static AtomicOrdering GetDecodedOrdering(unsigned Val) {
+ switch (Val) {
+ case bitc::ORDERING_NOTATOMIC: return NotAtomic;
+ case bitc::ORDERING_UNORDERED: return Unordered;
+ case bitc::ORDERING_MONOTONIC: return Monotonic;
+ case bitc::ORDERING_ACQUIRE: return Acquire;
+ case bitc::ORDERING_RELEASE: return Release;
+ case bitc::ORDERING_ACQREL: return AcquireRelease;
+ default: // Map unknown orderings to sequentially-consistent.
+ case bitc::ORDERING_SEQCST: return SequentiallyConsistent;
+ }
+}
+
+static SynchronizationScope GetDecodedSynchScope(unsigned Val) {
+ switch (Val) {
+ case bitc::SYNCHSCOPE_SINGLETHREAD: return SingleThread;
+ default: // Map unknown scopes to cross-thread.
+ case bitc::SYNCHSCOPE_CROSSTHREAD: return CrossThread;
+ }
+}
+
namespace llvm {
namespace {
/// @brief A class for maintaining the slot number definition
@@ -142,7 +180,7 @@ namespace {
void *operator new(size_t s) {
return User::operator new(s, 1);
}
- explicit ConstantPlaceHolder(const Type *Ty, LLVMContext& Context)
+ explicit ConstantPlaceHolder(Type *Ty, LLVMContext& Context)
: ConstantExpr(Ty, Instruction::UserOp1, &Op<0>(), 1) {
Op<0>() = UndefValue::get(Type::getInt32Ty(Context));
}
@@ -198,7 +236,7 @@ void BitcodeReaderValueList::AssignValue(Value *V, unsigned Idx) {
Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx,
- const Type *Ty) {
+ Type *Ty) {
if (Idx >= size())
resize(Idx + 1);
@@ -213,7 +251,7 @@ Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx,
return C;
}
-Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, const Type *Ty) {
+Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, Type *Ty) {
if (Idx >= size())
resize(Idx + 1);
@@ -362,7 +400,7 @@ Type *BitcodeReader::getTypeByID(unsigned ID) {
// If we have a forward reference, the only possible case is when it is to a
// named struct. Just create a placeholder for now.
- return TypeList[ID] = StructType::createNamed(Context, "");
+ return TypeList[ID] = StructType::create(Context);
}
/// FIXME: Remove in LLVM 3.1, only used by ParseOldTypeTable.
@@ -630,7 +668,7 @@ bool BitcodeReader::ParseTypeTableBody() {
Res->setName(TypeName);
TypeList[NumRecords] = 0;
} else // Otherwise, create a new struct.
- Res = StructType::createNamed(Context, TypeName);
+ Res = StructType::create(Context, TypeName);
TypeName.clear();
SmallVector<Type*, 8> EltTys;
@@ -659,7 +697,7 @@ bool BitcodeReader::ParseTypeTableBody() {
Res->setName(TypeName);
TypeList[NumRecords] = 0;
} else // Otherwise, create a new struct with no body.
- Res = StructType::createNamed(Context, TypeName);
+ Res = StructType::create(Context, TypeName);
TypeName.clear();
ResultTy = Res;
break;
@@ -793,7 +831,7 @@ RestartScan:
break;
case bitc::TYPE_CODE_OPAQUE: // OPAQUE
if (NextTypeID < TypeList.size() && TypeList[NextTypeID] == 0)
- ResultTy = StructType::createNamed(Context, "");
+ ResultTy = StructType::create(Context);
break;
case bitc::TYPE_CODE_STRUCT_OLD: {// STRUCT_OLD
if (NextTypeID >= TypeList.size()) break;
@@ -804,7 +842,7 @@ RestartScan:
// Set a type.
if (TypeList[NextTypeID] == 0)
- TypeList[NextTypeID] = StructType::createNamed(Context, "");
+ TypeList[NextTypeID] = StructType::create(Context);
std::vector<Type*> EltTys;
for (unsigned i = 1, e = Record.size(); i != e; ++i) {
@@ -923,7 +961,7 @@ bool BitcodeReader::ParseOldTypeSymbolTable() {
// Only apply the type name to a struct type with no name.
if (StructType *STy = dyn_cast<StructType>(TypeList[TypeID]))
- if (!STy->isAnonymous() && !STy->hasName())
+ if (!STy->isLiteral() && !STy->hasName())
STy->setName(TypeName);
TypeName.clear();
break;
@@ -1063,7 +1101,7 @@ bool BitcodeReader::ParseMetadata() {
unsigned Size = Record.size();
SmallVector<Value*, 8> Elts;
for (unsigned i = 0; i != Size; i += 2) {
- const Type *Ty = getTypeByID(Record[i]);
+ Type *Ty = getTypeByID(Record[i]);
if (!Ty) return Error("Invalid METADATA_NODE record");
if (Ty->isMetadataTy())
Elts.push_back(MDValueList.getValueFwdRef(Record[i+1]));
@@ -1163,7 +1201,7 @@ bool BitcodeReader::ParseConstants() {
SmallVector<uint64_t, 64> Record;
// Read all the records for this value table.
- const Type *CurTy = Type::getInt32Ty(Context);
+ Type *CurTy = Type::getInt32Ty(Context);
unsigned NextCstNo = ValueList.size();
while (1) {
unsigned Code = Stream.ReadCode();
@@ -1218,7 +1256,7 @@ bool BitcodeReader::ParseConstants() {
Words[i] = DecodeSignRotatedValue(Record[i]);
V = ConstantInt::get(Context,
APInt(cast<IntegerType>(CurTy)->getBitWidth(),
- NumWords, &Words[0]));
+ Words));
break;
}
case bitc::CST_CODE_FLOAT: { // FLOAT: [fpval]
@@ -1233,11 +1271,11 @@ bool BitcodeReader::ParseConstants() {
uint64_t Rearrange[2];
Rearrange[0] = (Record[1] & 0xffffLL) | (Record[0] << 16);
Rearrange[1] = Record[0] >> 48;
- V = ConstantFP::get(Context, APFloat(APInt(80, 2, Rearrange)));
+ V = ConstantFP::get(Context, APFloat(APInt(80, Rearrange)));
} else if (CurTy->isFP128Ty())
- V = ConstantFP::get(Context, APFloat(APInt(128, 2, &Record[0]), true));
+ V = ConstantFP::get(Context, APFloat(APInt(128, Record), true));
else if (CurTy->isPPC_FP128Ty())
- V = ConstantFP::get(Context, APFloat(APInt(128, 2, &Record[0])));
+ V = ConstantFP::get(Context, APFloat(APInt(128, Record)));
else
V = UndefValue::get(CurTy);
break;
@@ -1250,18 +1288,18 @@ bool BitcodeReader::ParseConstants() {
unsigned Size = Record.size();
std::vector<Constant*> Elts;
- if (const StructType *STy = dyn_cast<StructType>(CurTy)) {
+ if (StructType *STy = dyn_cast<StructType>(CurTy)) {
for (unsigned i = 0; i != Size; ++i)
Elts.push_back(ValueList.getConstantFwdRef(Record[i],
STy->getElementType(i)));
V = ConstantStruct::get(STy, Elts);
- } else if (const ArrayType *ATy = dyn_cast<ArrayType>(CurTy)) {
- const Type *EltTy = ATy->getElementType();
+ } else if (ArrayType *ATy = dyn_cast<ArrayType>(CurTy)) {
+ Type *EltTy = ATy->getElementType();
for (unsigned i = 0; i != Size; ++i)
Elts.push_back(ValueList.getConstantFwdRef(Record[i], EltTy));
V = ConstantArray::get(ATy, Elts);
- } else if (const VectorType *VTy = dyn_cast<VectorType>(CurTy)) {
- const Type *EltTy = VTy->getElementType();
+ } else if (VectorType *VTy = dyn_cast<VectorType>(CurTy)) {
+ Type *EltTy = VTy->getElementType();
for (unsigned i = 0; i != Size; ++i)
Elts.push_back(ValueList.getConstantFwdRef(Record[i], EltTy));
V = ConstantVector::get(Elts);
@@ -1274,8 +1312,8 @@ bool BitcodeReader::ParseConstants() {
if (Record.empty())
return Error("Invalid CST_AGGREGATE record");
- const ArrayType *ATy = cast<ArrayType>(CurTy);
- const Type *EltTy = ATy->getElementType();
+ ArrayType *ATy = cast<ArrayType>(CurTy);
+ Type *EltTy = ATy->getElementType();
unsigned Size = Record.size();
std::vector<Constant*> Elts;
@@ -1288,8 +1326,8 @@ bool BitcodeReader::ParseConstants() {
if (Record.empty())
return Error("Invalid CST_AGGREGATE record");
- const ArrayType *ATy = cast<ArrayType>(CurTy);
- const Type *EltTy = ATy->getElementType();
+ ArrayType *ATy = cast<ArrayType>(CurTy);
+ Type *EltTy = ATy->getElementType();
unsigned Size = Record.size();
std::vector<Constant*> Elts;
@@ -1335,7 +1373,7 @@ bool BitcodeReader::ParseConstants() {
if (Opc < 0) {
V = UndefValue::get(CurTy); // Unknown cast.
} else {
- const Type *OpTy = getTypeByID(Record[1]);
+ Type *OpTy = getTypeByID(Record[1]);
if (!OpTy) return Error("Invalid CE_CAST record");
Constant *Op = ValueList.getConstantFwdRef(Record[2], OpTy);
V = ConstantExpr::getCast(Opc, Op, CurTy);
@@ -1347,16 +1385,14 @@ bool BitcodeReader::ParseConstants() {
if (Record.size() & 1) return Error("Invalid CE_GEP record");
SmallVector<Constant*, 16> Elts;
for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
- const Type *ElTy = getTypeByID(Record[i]);
+ Type *ElTy = getTypeByID(Record[i]);
if (!ElTy) return Error("Invalid CE_GEP record");
Elts.push_back(ValueList.getConstantFwdRef(Record[i+1], ElTy));
}
- if (BitCode == bitc::CST_CODE_CE_INBOUNDS_GEP)
- V = ConstantExpr::getInBoundsGetElementPtr(Elts[0], &Elts[1],
- Elts.size()-1);
- else
- V = ConstantExpr::getGetElementPtr(Elts[0], &Elts[1],
- Elts.size()-1);
+ ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end());
+ V = ConstantExpr::getGetElementPtr(Elts[0], Indices,
+ BitCode ==
+ bitc::CST_CODE_CE_INBOUNDS_GEP);
break;
}
case bitc::CST_CODE_CE_SELECT: // CE_SELECT: [opval#, opval#, opval#]
@@ -1368,7 +1404,7 @@ bool BitcodeReader::ParseConstants() {
break;
case bitc::CST_CODE_CE_EXTRACTELT: { // CE_EXTRACTELT: [opty, opval, opval]
if (Record.size() < 3) return Error("Invalid CE_EXTRACTELT record");
- const VectorType *OpTy =
+ VectorType *OpTy =
dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
if (OpTy == 0) return Error("Invalid CE_EXTRACTELT record");
Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
@@ -1377,7 +1413,7 @@ bool BitcodeReader::ParseConstants() {
break;
}
case bitc::CST_CODE_CE_INSERTELT: { // CE_INSERTELT: [opval, opval, opval]
- const VectorType *OpTy = dyn_cast<VectorType>(CurTy);
+ VectorType *OpTy = dyn_cast<VectorType>(CurTy);
if (Record.size() < 3 || OpTy == 0)
return Error("Invalid CE_INSERTELT record");
Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
@@ -1388,26 +1424,26 @@ bool BitcodeReader::ParseConstants() {
break;
}
case bitc::CST_CODE_CE_SHUFFLEVEC: { // CE_SHUFFLEVEC: [opval, opval, opval]
- const VectorType *OpTy = dyn_cast<VectorType>(CurTy);
+ VectorType *OpTy = dyn_cast<VectorType>(CurTy);
if (Record.size() < 3 || OpTy == 0)
return Error("Invalid CE_SHUFFLEVEC record");
Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
Constant *Op1 = ValueList.getConstantFwdRef(Record[1], OpTy);
- const Type *ShufTy = VectorType::get(Type::getInt32Ty(Context),
+ Type *ShufTy = VectorType::get(Type::getInt32Ty(Context),
OpTy->getNumElements());
Constant *Op2 = ValueList.getConstantFwdRef(Record[2], ShufTy);
V = ConstantExpr::getShuffleVector(Op0, Op1, Op2);
break;
}
case bitc::CST_CODE_CE_SHUFVEC_EX: { // [opty, opval, opval, opval]
- const VectorType *RTy = dyn_cast<VectorType>(CurTy);
- const VectorType *OpTy =
+ VectorType *RTy = dyn_cast<VectorType>(CurTy);
+ VectorType *OpTy =
dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
if (Record.size() < 4 || RTy == 0 || OpTy == 0)
return Error("Invalid CE_SHUFVEC_EX record");
Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy);
- const Type *ShufTy = VectorType::get(Type::getInt32Ty(Context),
+ Type *ShufTy = VectorType::get(Type::getInt32Ty(Context),
RTy->getNumElements());
Constant *Op2 = ValueList.getConstantFwdRef(Record[3], ShufTy);
V = ConstantExpr::getShuffleVector(Op0, Op1, Op2);
@@ -1415,7 +1451,7 @@ bool BitcodeReader::ParseConstants() {
}
case bitc::CST_CODE_CE_CMP: { // CE_CMP: [opty, opval, opval, pred]
if (Record.size() < 4) return Error("Invalid CE_CMP record");
- const Type *OpTy = getTypeByID(Record[0]);
+ Type *OpTy = getTypeByID(Record[0]);
if (OpTy == 0) return Error("Invalid CE_CMP record");
Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy);
@@ -1442,14 +1478,14 @@ bool BitcodeReader::ParseConstants() {
AsmStr += (char)Record[2+i];
for (unsigned i = 0; i != ConstStrSize; ++i)
ConstrStr += (char)Record[3+AsmStrSize+i];
- const PointerType *PTy = cast<PointerType>(CurTy);
+ PointerType *PTy = cast<PointerType>(CurTy);
V = InlineAsm::get(cast<FunctionType>(PTy->getElementType()),
AsmStr, ConstrStr, HasSideEffects, IsAlignStack);
break;
}
case bitc::CST_CODE_BLOCKADDRESS:{
if (Record.size() < 3) return Error("Invalid CE_BLOCKADDRESS record");
- const Type *FnTy = getTypeByID(Record[0]);
+ Type *FnTy = getTypeByID(Record[0]);
if (FnTy == 0) return Error("Invalid CE_BLOCKADDRESS record");
Function *Fn =
dyn_cast_or_null<Function>(ValueList.getConstantFwdRef(Record[1],FnTy));
@@ -1662,7 +1698,7 @@ bool BitcodeReader::ParseModule() {
case bitc::MODULE_CODE_GLOBALVAR: {
if (Record.size() < 6)
return Error("Invalid MODULE_CODE_GLOBALVAR record");
- const Type *Ty = getTypeByID(Record[0]);
+ Type *Ty = getTypeByID(Record[0]);
if (!Ty) return Error("Invalid MODULE_CODE_GLOBALVAR record");
if (!Ty->isPointerTy())
return Error("Global not a pointer type!");
@@ -1711,11 +1747,11 @@ bool BitcodeReader::ParseModule() {
case bitc::MODULE_CODE_FUNCTION: {
if (Record.size() < 8)
return Error("Invalid MODULE_CODE_FUNCTION record");
- const Type *Ty = getTypeByID(Record[0]);
+ Type *Ty = getTypeByID(Record[0]);
if (!Ty) return Error("Invalid MODULE_CODE_FUNCTION record");
if (!Ty->isPointerTy())
return Error("Function not a pointer type!");
- const FunctionType *FTy =
+ FunctionType *FTy =
dyn_cast<FunctionType>(cast<PointerType>(Ty)->getElementType());
if (!FTy)
return Error("Function not a pointer to function type!");
@@ -1757,7 +1793,7 @@ bool BitcodeReader::ParseModule() {
case bitc::MODULE_CODE_ALIAS: {
if (Record.size() < 3)
return Error("Invalid MODULE_ALIAS record");
- const Type *Ty = getTypeByID(Record[0]);
+ Type *Ty = getTypeByID(Record[0]);
if (!Ty) return Error("Invalid MODULE_ALIAS record");
if (!Ty->isPointerTy())
return Error("Function not a pointer type!");
@@ -1823,9 +1859,9 @@ bool BitcodeReader::ParseBitcodeInto(Module *M) {
if (Code != bitc::ENTER_SUBBLOCK) {
- // The ranlib in xcode 4 will align archive members by appending newlines to the
- // end of them. If this file size is a multiple of 4 but not 8, we have to read and
- // ignore these final 4 bytes :-(
+ // The ranlib in xcode 4 will align archive members by appending newlines
+ // to the end of them. If this file size is a multiple of 4 but not 8, we
+ // have to read and ignore these final 4 bytes :-(
if (Stream.GetAbbrevIDWidth() == 2 && Code == 2 &&
Stream.Read(6) == 2 && Stream.Read(24) == 0xa0a0a &&
Stream.AtEndOfStream())
@@ -2160,7 +2196,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
OpNum+2 != Record.size())
return Error("Invalid CAST record");
- const Type *ResTy = getTypeByID(Record[OpNum]);
+ Type *ResTy = getTypeByID(Record[OpNum]);
int Opc = GetDecodedCastOpcode(Record[OpNum+1]);
if (Opc == -1 || ResTy == 0)
return Error("Invalid CAST record");
@@ -2183,7 +2219,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
GEPIdx.push_back(Op);
}
- I = GetElementPtrInst::Create(BasePtr, GEPIdx.begin(), GEPIdx.end());
+ I = GetElementPtrInst::Create(BasePtr, GEPIdx);
InstructionList.push_back(I);
if (BitCode == bitc::FUNC_CODE_INST_INBOUNDS_GEP)
cast<GetElementPtrInst>(I)->setIsInBounds(true);
@@ -2261,8 +2297,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
return Error("Invalid SELECT record");
// select condition can be either i1 or [N x i1]
- if (const VectorType* vector_type =
- dyn_cast<const VectorType>(Cond->getType())) {
+ if (VectorType* vector_type =
+ dyn_cast<VectorType>(Cond->getType())) {
// expect <n x i1>
if (vector_type->getElementType() != Type::getInt1Ty(Context))
return Error("Invalid SELECT condition type");
@@ -2381,7 +2417,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
case bitc::FUNC_CODE_INST_SWITCH: { // SWITCH: [opty, op0, op1, ...]
if (Record.size() < 3 || (Record.size() & 1) == 0)
return Error("Invalid SWITCH record");
- const Type *OpTy = getTypeByID(Record[0]);
+ Type *OpTy = getTypeByID(Record[0]);
Value *Cond = getFnValueByID(Record[1], OpTy);
BasicBlock *Default = getBasicBlock(Record[2]);
if (OpTy == 0 || Cond == 0 || Default == 0)
@@ -2405,7 +2441,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
case bitc::FUNC_CODE_INST_INDIRECTBR: { // INDIRECTBR: [opty, op0, op1, ...]
if (Record.size() < 2)
return Error("Invalid INDIRECTBR record");
- const Type *OpTy = getTypeByID(Record[0]);
+ Type *OpTy = getTypeByID(Record[0]);
Value *Address = getFnValueByID(Record[1], OpTy);
if (OpTy == 0 || Address == 0)
return Error("Invalid INDIRECTBR record");
@@ -2437,8 +2473,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
return Error("Invalid INVOKE record");
- const PointerType *CalleeTy = dyn_cast<PointerType>(Callee->getType());
- const FunctionType *FTy = !CalleeTy ? 0 :
+ PointerType *CalleeTy = dyn_cast<PointerType>(Callee->getType());
+ FunctionType *FTy = !CalleeTy ? 0 :
dyn_cast<FunctionType>(CalleeTy->getElementType());
// Check that the right number of fixed parameters are here.
@@ -2472,6 +2508,15 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
cast<InvokeInst>(I)->setAttributes(PAL);
break;
}
+ case bitc::FUNC_CODE_INST_RESUME: { // RESUME: [opval]
+ unsigned Idx = 0;
+ Value *Val = 0;
+ if (getValueTypePair(Record, Idx, NextValueNo, Val))
+ return Error("Invalid RESUME record");
+ I = ResumeInst::Create(Val);
+ InstructionList.push_back(I);
+ break;
+ }
case bitc::FUNC_CODE_INST_UNWIND: // UNWIND
I = new UnwindInst(Context);
InstructionList.push_back(I);
@@ -2483,7 +2528,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
case bitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...]
if (Record.size() < 1 || ((Record.size()-1)&1))
return Error("Invalid PHI record");
- const Type *Ty = getTypeByID(Record[0]);
+ Type *Ty = getTypeByID(Record[0]);
if (!Ty) return Error("Invalid PHI record");
PHINode *PN = PHINode::Create(Ty, (Record.size()-1)/2);
@@ -2499,12 +2544,51 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
break;
}
+ case bitc::FUNC_CODE_INST_LANDINGPAD: {
+ // LANDINGPAD: [ty, val, val, num, (id0,val0 ...)?]
+ unsigned Idx = 0;
+ if (Record.size() < 4)
+ return Error("Invalid LANDINGPAD record");
+ Type *Ty = getTypeByID(Record[Idx++]);
+ if (!Ty) return Error("Invalid LANDINGPAD record");
+ Value *PersFn = 0;
+ if (getValueTypePair(Record, Idx, NextValueNo, PersFn))
+ return Error("Invalid LANDINGPAD record");
+
+ bool IsCleanup = !!Record[Idx++];
+ unsigned NumClauses = Record[Idx++];
+ LandingPadInst *LP = LandingPadInst::Create(Ty, PersFn, NumClauses);
+ LP->setCleanup(IsCleanup);
+ for (unsigned J = 0; J != NumClauses; ++J) {
+ LandingPadInst::ClauseType CT =
+ LandingPadInst::ClauseType(Record[Idx++]); (void)CT;
+ Value *Val;
+
+ if (getValueTypePair(Record, Idx, NextValueNo, Val)) {
+ delete LP;
+ return Error("Invalid LANDINGPAD record");
+ }
+
+ assert((CT != LandingPadInst::Catch ||
+ !isa<ArrayType>(Val->getType())) &&
+ "Catch clause has a invalid type!");
+ assert((CT != LandingPadInst::Filter ||
+ isa<ArrayType>(Val->getType())) &&
+ "Filter clause has invalid type!");
+ LP->addClause(Val);
+ }
+
+ I = LP;
+ InstructionList.push_back(I);
+ break;
+ }
+
case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, opty, op, align]
if (Record.size() != 4)
return Error("Invalid ALLOCA record");
- const PointerType *Ty =
+ PointerType *Ty =
dyn_cast_or_null<PointerType>(getTypeByID(Record[0]));
- const Type *OpTy = getTypeByID(Record[1]);
+ Type *OpTy = getTypeByID(Record[1]);
Value *Size = getFnValueByID(Record[2], OpTy);
unsigned Align = Record[3];
if (!Ty || !Size) return Error("Invalid ALLOCA record");
@@ -2523,6 +2607,28 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
InstructionList.push_back(I);
break;
}
+ case bitc::FUNC_CODE_INST_LOADATOMIC: {
+ // LOADATOMIC: [opty, op, align, vol, ordering, synchscope]
+ unsigned OpNum = 0;
+ Value *Op;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
+ OpNum+4 != Record.size())
+ return Error("Invalid LOADATOMIC record");
+
+
+ AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]);
+ if (Ordering == NotAtomic || Ordering == Release ||
+ Ordering == AcquireRelease)
+ return Error("Invalid LOADATOMIC record");
+ if (Ordering != NotAtomic && Record[OpNum] == 0)
+ return Error("Invalid LOADATOMIC record");
+ SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+3]);
+
+ I = new LoadInst(Op, "", Record[OpNum+1], (1 << Record[OpNum]) >> 1,
+ Ordering, SynchScope);
+ InstructionList.push_back(I);
+ break;
+ }
case bitc::FUNC_CODE_INST_STORE: { // STORE2:[ptrty, ptr, val, align, vol]
unsigned OpNum = 0;
Value *Val, *Ptr;
@@ -2536,6 +2642,83 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
InstructionList.push_back(I);
break;
}
+ case bitc::FUNC_CODE_INST_STOREATOMIC: {
+ // STOREATOMIC: [ptrty, ptr, val, align, vol, ordering, synchscope]
+ unsigned OpNum = 0;
+ Value *Val, *Ptr;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
+ getValue(Record, OpNum,
+ cast<PointerType>(Ptr->getType())->getElementType(), Val) ||
+ OpNum+4 != Record.size())
+ return Error("Invalid STOREATOMIC record");
+
+ AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]);
+ if (Ordering == NotAtomic || Ordering == Acquire ||
+ Ordering == AcquireRelease)
+ return Error("Invalid STOREATOMIC record");
+ SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+3]);
+ if (Ordering != NotAtomic && Record[OpNum] == 0)
+ return Error("Invalid STOREATOMIC record");
+
+ I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1,
+ Ordering, SynchScope);
+ InstructionList.push_back(I);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_CMPXCHG: {
+ // CMPXCHG:[ptrty, ptr, cmp, new, vol, ordering, synchscope]
+ unsigned OpNum = 0;
+ Value *Ptr, *Cmp, *New;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
+ getValue(Record, OpNum,
+ cast<PointerType>(Ptr->getType())->getElementType(), Cmp) ||
+ getValue(Record, OpNum,
+ cast<PointerType>(Ptr->getType())->getElementType(), New) ||
+ OpNum+3 != Record.size())
+ return Error("Invalid CMPXCHG record");
+ AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+1]);
+ if (Ordering == NotAtomic || Ordering == Unordered)
+ return Error("Invalid CMPXCHG record");
+ SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+2]);
+ I = new AtomicCmpXchgInst(Ptr, Cmp, New, Ordering, SynchScope);
+ cast<AtomicCmpXchgInst>(I)->setVolatile(Record[OpNum]);
+ InstructionList.push_back(I);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_ATOMICRMW: {
+ // ATOMICRMW:[ptrty, ptr, val, op, vol, ordering, synchscope]
+ unsigned OpNum = 0;
+ Value *Ptr, *Val;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
+ getValue(Record, OpNum,
+ cast<PointerType>(Ptr->getType())->getElementType(), Val) ||
+ OpNum+4 != Record.size())
+ return Error("Invalid ATOMICRMW record");
+ AtomicRMWInst::BinOp Operation = GetDecodedRMWOperation(Record[OpNum]);
+ if (Operation < AtomicRMWInst::FIRST_BINOP ||
+ Operation > AtomicRMWInst::LAST_BINOP)
+ return Error("Invalid ATOMICRMW record");
+ AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]);
+ if (Ordering == NotAtomic || Ordering == Unordered)
+ return Error("Invalid ATOMICRMW record");
+ SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+3]);
+ I = new AtomicRMWInst(Operation, Ptr, Val, Ordering, SynchScope);
+ cast<AtomicRMWInst>(I)->setVolatile(Record[OpNum+1]);
+ InstructionList.push_back(I);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_FENCE: { // FENCE:[ordering, synchscope]
+ if (2 != Record.size())
+ return Error("Invalid FENCE record");
+ AtomicOrdering Ordering = GetDecodedOrdering(Record[0]);
+ if (Ordering == NotAtomic || Ordering == Unordered ||
+ Ordering == Monotonic)
+ return Error("Invalid FENCE record");
+ SynchronizationScope SynchScope = GetDecodedSynchScope(Record[1]);
+ I = new FenceInst(Context, Ordering, SynchScope);
+ InstructionList.push_back(I);
+ break;
+ }
case bitc::FUNC_CODE_INST_CALL: {
// CALL: [paramattrs, cc, fnty, fnid, arg0, arg1...]
if (Record.size() < 3)
@@ -2549,8 +2732,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
return Error("Invalid CALL record");
- const PointerType *OpTy = dyn_cast<PointerType>(Callee->getType());
- const FunctionType *FTy = 0;
+ PointerType *OpTy = dyn_cast<PointerType>(Callee->getType());
+ FunctionType *FTy = 0;
if (OpTy) FTy = dyn_cast<FunctionType>(OpTy->getElementType());
if (!FTy || Record.size() < FTy->getNumParams()+OpNum)
return Error("Invalid CALL record");
@@ -2589,9 +2772,9 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
case bitc::FUNC_CODE_INST_VAARG: { // VAARG: [valistty, valist, instty]
if (Record.size() < 3)
return Error("Invalid VAARG record");
- const Type *OpTy = getTypeByID(Record[0]);
+ Type *OpTy = getTypeByID(Record[0]);
Value *Op = getFnValueByID(Record[1], OpTy);
- const Type *ResTy = getTypeByID(Record[2]);
+ Type *ResTy = getTypeByID(Record[2]);
if (!OpTy || !Op || !ResTy)
return Error("Invalid VAARG record");
I = new VAArgInst(Op, ResTy);
@@ -2756,6 +2939,9 @@ bool BitcodeReader::MaterializeModule(Module *M, std::string *ErrInfo) {
}
std::vector<std::pair<Function*, Function*> >().swap(UpgradedIntrinsics);
+ // Upgrade to new EH scheme. N.B. This will go away in 3.1.
+ UpgradeExceptionHandling(M);
+
// Check debug info intrinsics.
CheckDebugInfoIntrinsics(TheModule);
diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
index 1b3bf1a..6e6118c 100644
--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -76,8 +76,8 @@ public:
ValuePtrs.resize(N);
}
- Constant *getConstantFwdRef(unsigned Idx, const Type *Ty);
- Value *getValueFwdRef(unsigned Idx, const Type *Ty);
+ Constant *getConstantFwdRef(unsigned Idx, Type *Ty);
+ Value *getValueFwdRef(unsigned Idx, Type *Ty);
void AssignValue(Value *V, unsigned Idx);
@@ -212,7 +212,7 @@ public:
private:
Type *getTypeByID(unsigned ID);
Type *getTypeByIDOrNull(unsigned ID);
- Value *getFnValueByID(unsigned ID, const Type *Ty) {
+ Value *getFnValueByID(unsigned ID, Type *Ty) {
if (Ty && Ty->isMetadataTy())
return MDValueList.getValueFwdRef(ID);
return ValueList.getValueFwdRef(ID, Ty);
@@ -248,7 +248,7 @@ private:
return ResVal == 0;
}
bool getValue(SmallVector<uint64_t, 64> &Record, unsigned &Slot,
- const Type *Ty, Value *&ResVal) {
+ Type *Ty, Value *&ResVal) {
if (Slot == Record.size()) return true;
unsigned ValNo = (unsigned)Record[Slot++];
ResVal = getFnValueByID(ValNo, Ty);
diff --git a/lib/Bitcode/Reader/CMakeLists.txt b/lib/Bitcode/Reader/CMakeLists.txt
index 693d431..37bebc4 100644
--- a/lib/Bitcode/Reader/CMakeLists.txt
+++ b/lib/Bitcode/Reader/CMakeLists.txt
@@ -2,3 +2,8 @@ add_llvm_library(LLVMBitReader
BitReader.cpp
BitcodeReader.cpp
)
+
+add_llvm_library_dependencies(LLVMBitReader
+ LLVMCore
+ LLVMSupport
+ )
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 85d67ce..5b3d969 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -58,7 +58,6 @@ enum {
FUNCTION_INST_UNREACHABLE_ABBREV
};
-
static unsigned GetEncodedCastOpcode(unsigned Opcode) {
switch (Opcode) {
default: llvm_unreachable("Unknown cast instruction!");
@@ -101,6 +100,44 @@ static unsigned GetEncodedBinaryOpcode(unsigned Opcode) {
}
}
+static unsigned GetEncodedRMWOperation(AtomicRMWInst::BinOp Op) {
+ switch (Op) {
+ default: llvm_unreachable("Unknown RMW operation!");
+ case AtomicRMWInst::Xchg: return bitc::RMW_XCHG;
+ case AtomicRMWInst::Add: return bitc::RMW_ADD;
+ case AtomicRMWInst::Sub: return bitc::RMW_SUB;
+ case AtomicRMWInst::And: return bitc::RMW_AND;
+ case AtomicRMWInst::Nand: return bitc::RMW_NAND;
+ case AtomicRMWInst::Or: return bitc::RMW_OR;
+ case AtomicRMWInst::Xor: return bitc::RMW_XOR;
+ case AtomicRMWInst::Max: return bitc::RMW_MAX;
+ case AtomicRMWInst::Min: return bitc::RMW_MIN;
+ case AtomicRMWInst::UMax: return bitc::RMW_UMAX;
+ case AtomicRMWInst::UMin: return bitc::RMW_UMIN;
+ }
+}
+
+static unsigned GetEncodedOrdering(AtomicOrdering Ordering) {
+ switch (Ordering) {
+ default: llvm_unreachable("Unknown atomic ordering");
+ case NotAtomic: return bitc::ORDERING_NOTATOMIC;
+ case Unordered: return bitc::ORDERING_UNORDERED;
+ case Monotonic: return bitc::ORDERING_MONOTONIC;
+ case Acquire: return bitc::ORDERING_ACQUIRE;
+ case Release: return bitc::ORDERING_RELEASE;
+ case AcquireRelease: return bitc::ORDERING_ACQREL;
+ case SequentiallyConsistent: return bitc::ORDERING_SEQCST;
+ }
+}
+
+static unsigned GetEncodedSynchScope(SynchronizationScope SynchScope) {
+ switch (SynchScope) {
+ default: llvm_unreachable("Unknown synchronization scope");
+ case SingleThread: return bitc::SYNCHSCOPE_SINGLETHREAD;
+ case CrossThread: return bitc::SYNCHSCOPE_CROSSTHREAD;
+ }
+}
+
static void WriteStringRecord(unsigned Code, StringRef Str,
unsigned AbbrevToUse, BitstreamWriter &Stream) {
SmallVector<unsigned, 64> Vals;
@@ -199,7 +236,6 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
Log2_32_Ceil(VE.getTypes().size()+1)));
unsigned StructNamedAbbrev = Stream.EmitAbbrev(Abbv);
-
// Abbrev for TYPE_CODE_ARRAY.
Abbv = new BitCodeAbbrev();
@@ -216,7 +252,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
// Loop over all of the types, emitting each in turn.
for (unsigned i = 0, e = TypeList.size(); i != e; ++i) {
- const Type *T = TypeList[i];
+ Type *T = TypeList[i];
int AbbrevToUse = 0;
unsigned Code = 0;
@@ -237,7 +273,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
TypeVals.push_back(cast<IntegerType>(T)->getBitWidth());
break;
case Type::PointerTyID: {
- const PointerType *PTy = cast<PointerType>(T);
+ PointerType *PTy = cast<PointerType>(T);
// POINTER: [pointee type, address space]
Code = bitc::TYPE_CODE_POINTER;
TypeVals.push_back(VE.getTypeID(PTy->getElementType()));
@@ -247,7 +283,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
break;
}
case Type::FunctionTyID: {
- const FunctionType *FT = cast<FunctionType>(T);
+ FunctionType *FT = cast<FunctionType>(T);
// FUNCTION: [isvararg, attrid, retty, paramty x N]
Code = bitc::TYPE_CODE_FUNCTION;
TypeVals.push_back(FT->isVarArg());
@@ -259,7 +295,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
break;
}
case Type::StructTyID: {
- const StructType *ST = cast<StructType>(T);
+ StructType *ST = cast<StructType>(T);
// STRUCT: [ispacked, eltty x N]
TypeVals.push_back(ST->isPacked());
// Output all of the element types.
@@ -267,7 +303,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
E = ST->element_end(); I != E; ++I)
TypeVals.push_back(VE.getTypeID(*I));
- if (ST->isAnonymous()) {
+ if (ST->isLiteral()) {
Code = bitc::TYPE_CODE_STRUCT_ANON;
AbbrevToUse = StructAnonAbbrev;
} else {
@@ -286,7 +322,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
break;
}
case Type::ArrayTyID: {
- const ArrayType *AT = cast<ArrayType>(T);
+ ArrayType *AT = cast<ArrayType>(T);
// ARRAY: [numelts, eltty]
Code = bitc::TYPE_CODE_ARRAY;
TypeVals.push_back(AT->getNumElements());
@@ -295,7 +331,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
break;
}
case Type::VectorTyID: {
- const VectorType *VT = cast<VectorType>(T);
+ VectorType *VT = cast<VectorType>(T);
// VECTOR [numelts, eltty]
Code = bitc::TYPE_CODE_VECTOR;
TypeVals.push_back(VT->getNumElements());
@@ -372,14 +408,15 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
GV != E; ++GV) {
MaxAlignment = std::max(MaxAlignment, GV->getAlignment());
MaxGlobalType = std::max(MaxGlobalType, VE.getTypeID(GV->getType()));
-
- if (!GV->hasSection()) continue;
- // Give section names unique ID's.
- unsigned &Entry = SectionMap[GV->getSection()];
- if (Entry != 0) continue;
- WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, GV->getSection(),
- 0/*TODO*/, Stream);
- Entry = SectionMap.size();
+ if (GV->hasSection()) {
+ // Give section names unique ID's.
+ unsigned &Entry = SectionMap[GV->getSection()];
+ if (!Entry) {
+ WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, GV->getSection(),
+ 0/*TODO*/, Stream);
+ Entry = SectionMap.size();
+ }
+ }
}
for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
MaxAlignment = std::max(MaxAlignment, F->getAlignment());
@@ -716,7 +753,7 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
SmallVector<uint64_t, 64> Record;
const ValueEnumerator::ValueList &Vals = VE.getValues();
- const Type *LastTy = 0;
+ Type *LastTy = 0;
for (unsigned i = FirstVal; i != LastVal; ++i) {
const Value *V = Vals[i].first;
// If we need to switch types, do so now.
@@ -781,7 +818,7 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
}
} else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
Code = bitc::CST_CODE_FLOAT;
- const Type *Ty = CFP->getType();
+ Type *Ty = CFP->getType();
if (Ty->isFloatTy() || Ty->isDoubleTy()) {
Record.push_back(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
} else if (Ty->isX86_FP80Ty()) {
@@ -1083,8 +1120,8 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
case Instruction::Invoke: {
const InvokeInst *II = cast<InvokeInst>(&I);
const Value *Callee(II->getCalledValue());
- const PointerType *PTy = cast<PointerType>(Callee->getType());
- const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+ PointerType *PTy = cast<PointerType>(Callee->getType());
+ FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
Code = bitc::FUNC_CODE_INST_INVOKE;
Vals.push_back(VE.getAttributeID(II->getAttributes()));
@@ -1105,6 +1142,10 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
}
break;
}
+ case Instruction::Resume:
+ Code = bitc::FUNC_CODE_INST_RESUME;
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+ break;
case Instruction::Unwind:
Code = bitc::FUNC_CODE_INST_UNWIND;
break;
@@ -1124,6 +1165,23 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
break;
}
+ case Instruction::LandingPad: {
+ const LandingPadInst &LP = cast<LandingPadInst>(I);
+ Code = bitc::FUNC_CODE_INST_LANDINGPAD;
+ Vals.push_back(VE.getTypeID(LP.getType()));
+ PushValueAndType(LP.getPersonalityFn(), InstID, Vals, VE);
+ Vals.push_back(LP.isCleanup());
+ Vals.push_back(LP.getNumClauses());
+ for (unsigned I = 0, E = LP.getNumClauses(); I != E; ++I) {
+ if (LP.isCatch(I))
+ Vals.push_back(LandingPadInst::Catch);
+ else
+ Vals.push_back(LandingPadInst::Filter);
+ PushValueAndType(LP.getClause(I), InstID, Vals, VE);
+ }
+ break;
+ }
+
case Instruction::Alloca:
Code = bitc::FUNC_CODE_INST_ALLOCA;
Vals.push_back(VE.getTypeID(I.getType()));
@@ -1133,24 +1191,66 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
break;
case Instruction::Load:
- Code = bitc::FUNC_CODE_INST_LOAD;
- if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE)) // ptr
- AbbrevToUse = FUNCTION_INST_LOAD_ABBREV;
-
+ if (cast<LoadInst>(I).isAtomic()) {
+ Code = bitc::FUNC_CODE_INST_LOADATOMIC;
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+ } else {
+ Code = bitc::FUNC_CODE_INST_LOAD;
+ if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE)) // ptr
+ AbbrevToUse = FUNCTION_INST_LOAD_ABBREV;
+ }
Vals.push_back(Log2_32(cast<LoadInst>(I).getAlignment())+1);
Vals.push_back(cast<LoadInst>(I).isVolatile());
+ if (cast<LoadInst>(I).isAtomic()) {
+ Vals.push_back(GetEncodedOrdering(cast<LoadInst>(I).getOrdering()));
+ Vals.push_back(GetEncodedSynchScope(cast<LoadInst>(I).getSynchScope()));
+ }
break;
case Instruction::Store:
- Code = bitc::FUNC_CODE_INST_STORE;
+ if (cast<StoreInst>(I).isAtomic())
+ Code = bitc::FUNC_CODE_INST_STOREATOMIC;
+ else
+ Code = bitc::FUNC_CODE_INST_STORE;
PushValueAndType(I.getOperand(1), InstID, Vals, VE); // ptrty + ptr
Vals.push_back(VE.getValueID(I.getOperand(0))); // val.
Vals.push_back(Log2_32(cast<StoreInst>(I).getAlignment())+1);
Vals.push_back(cast<StoreInst>(I).isVolatile());
+ if (cast<StoreInst>(I).isAtomic()) {
+ Vals.push_back(GetEncodedOrdering(cast<StoreInst>(I).getOrdering()));
+ Vals.push_back(GetEncodedSynchScope(cast<StoreInst>(I).getSynchScope()));
+ }
+ break;
+ case Instruction::AtomicCmpXchg:
+ Code = bitc::FUNC_CODE_INST_CMPXCHG;
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE); // ptrty + ptr
+ Vals.push_back(VE.getValueID(I.getOperand(1))); // cmp.
+ Vals.push_back(VE.getValueID(I.getOperand(2))); // newval.
+ Vals.push_back(cast<AtomicCmpXchgInst>(I).isVolatile());
+ Vals.push_back(GetEncodedOrdering(
+ cast<AtomicCmpXchgInst>(I).getOrdering()));
+ Vals.push_back(GetEncodedSynchScope(
+ cast<AtomicCmpXchgInst>(I).getSynchScope()));
+ break;
+ case Instruction::AtomicRMW:
+ Code = bitc::FUNC_CODE_INST_ATOMICRMW;
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE); // ptrty + ptr
+ Vals.push_back(VE.getValueID(I.getOperand(1))); // val.
+ Vals.push_back(GetEncodedRMWOperation(
+ cast<AtomicRMWInst>(I).getOperation()));
+ Vals.push_back(cast<AtomicRMWInst>(I).isVolatile());
+ Vals.push_back(GetEncodedOrdering(cast<AtomicRMWInst>(I).getOrdering()));
+ Vals.push_back(GetEncodedSynchScope(
+ cast<AtomicRMWInst>(I).getSynchScope()));
+ break;
+ case Instruction::Fence:
+ Code = bitc::FUNC_CODE_INST_FENCE;
+ Vals.push_back(GetEncodedOrdering(cast<FenceInst>(I).getOrdering()));
+ Vals.push_back(GetEncodedSynchScope(cast<FenceInst>(I).getSynchScope()));
break;
case Instruction::Call: {
const CallInst &CI = cast<CallInst>(I);
- const PointerType *PTy = cast<PointerType>(CI.getCalledValue()->getType());
- const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+ PointerType *PTy = cast<PointerType>(CI.getCalledValue()->getType());
+ FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
Code = bitc::FUNC_CODE_INST_CALL;
diff --git a/lib/Bitcode/Writer/CMakeLists.txt b/lib/Bitcode/Writer/CMakeLists.txt
index f097b09..3cf9056 100644
--- a/lib/Bitcode/Writer/CMakeLists.txt
+++ b/lib/Bitcode/Writer/CMakeLists.txt
@@ -4,3 +4,8 @@ add_llvm_library(LLVMBitWriter
BitcodeWriterPass.cpp
ValueEnumerator.cpp
)
+
+add_llvm_library_dependencies(LLVMBitWriter
+ LLVMCore
+ LLVMSupport
+ )
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index b68bf92..9ae9905 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -315,7 +315,7 @@ void ValueEnumerator::EnumerateValue(const Value *V) {
}
-void ValueEnumerator::EnumerateType(const Type *Ty) {
+void ValueEnumerator::EnumerateType(Type *Ty) {
unsigned *TypeID = &TypeMap[Ty];
// We've already seen this type.
@@ -325,8 +325,8 @@ void ValueEnumerator::EnumerateType(const Type *Ty) {
// If it is a non-anonymous struct, mark the type as being visited so that we
// don't recursively visit it. This is safe because we allow forward
// references of these in the bitcode reader.
- if (const StructType *STy = dyn_cast<StructType>(Ty))
- if (!STy->isAnonymous())
+ if (StructType *STy = dyn_cast<StructType>(Ty))
+ if (!STy->isLiteral())
*TypeID = ~0U;
// Enumerate all of the subtypes before we enumerate this type. This ensures
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h
index 6617b60..b6fc920 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -35,12 +35,12 @@ class MDSymbolTable;
class ValueEnumerator {
public:
- typedef std::vector<const Type*> TypeList;
+ typedef std::vector<Type*> TypeList;
// For each value, we remember its Value* and occurrence frequency.
typedef std::vector<std::pair<const Value*, unsigned> > ValueList;
private:
- typedef DenseMap<const Type*, unsigned> TypeMapType;
+ typedef DenseMap<Type*, unsigned> TypeMapType;
TypeMapType TypeMap;
TypeList Types;
@@ -85,7 +85,7 @@ public:
unsigned getValueID(const Value *V) const;
- unsigned getTypeID(const Type *T) const {
+ unsigned getTypeID(Type *T) const {
TypeMapType::const_iterator I = TypeMap.find(T);
assert(I != TypeMap.end() && "Type not in ValueEnumerator!");
return I->second-1;
@@ -140,7 +140,7 @@ private:
void EnumerateFunctionLocalMetadata(const MDNode *N);
void EnumerateNamedMDNode(const NamedMDNode *NMD);
void EnumerateValue(const Value *V);
- void EnumerateType(const Type *T);
+ void EnumerateType(Type *T);
void EnumerateOperandType(const Value *V);
void EnumerateAttributes(const AttrListPtr &PAL);
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
index 80118f0..fb63c63 100644
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -1,4 +1,4 @@
-# `Support' library is added on the top-level CMakeLists.txt
+# `Support' and `TableGen' libraries are added on the top-level CMakeLists.txt
add_subdirectory(VMCore)
add_subdirectory(CodeGen)
@@ -7,8 +7,8 @@ add_subdirectory(Transforms)
add_subdirectory(Linker)
add_subdirectory(Analysis)
add_subdirectory(MC)
-add_subdirectory(CompilerDriver)
add_subdirectory(Object)
+add_subdirectory(DebugInfo)
add_subdirectory(ExecutionEngine)
add_subdirectory(Target)
add_subdirectory(AsmParser)
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index 125e641..fafc010 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -31,7 +31,7 @@ using namespace llvm;
/// of insertvalue or extractvalue indices that identify a member, return
/// the linearized index of the start of the member.
///
-unsigned llvm::ComputeLinearIndex(const Type *Ty,
+unsigned llvm::ComputeLinearIndex(Type *Ty,
const unsigned *Indices,
const unsigned *IndicesEnd,
unsigned CurIndex) {
@@ -40,7 +40,7 @@ unsigned llvm::ComputeLinearIndex(const Type *Ty,
return CurIndex;
// Given a struct type, recursively traverse the elements.
- if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ if (StructType *STy = dyn_cast<StructType>(Ty)) {
for (StructType::element_iterator EB = STy->element_begin(),
EI = EB,
EE = STy->element_end();
@@ -52,8 +52,8 @@ unsigned llvm::ComputeLinearIndex(const Type *Ty,
return CurIndex;
}
// Given an array type, recursively traverse the elements.
- else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
- const Type *EltTy = ATy->getElementType();
+ else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ Type *EltTy = ATy->getElementType();
for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
if (Indices && *Indices == i)
return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex);
@@ -72,12 +72,12 @@ unsigned llvm::ComputeLinearIndex(const Type *Ty,
/// If Offsets is non-null, it points to a vector to be filled in
/// with the in-memory offsets of each of the individual values.
///
-void llvm::ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
+void llvm::ComputeValueVTs(const TargetLowering &TLI, Type *Ty,
SmallVectorImpl<EVT> &ValueVTs,
SmallVectorImpl<uint64_t> *Offsets,
uint64_t StartingOffset) {
// Given a struct type, recursively traverse the elements.
- if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ if (StructType *STy = dyn_cast<StructType>(Ty)) {
const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy);
for (StructType::element_iterator EB = STy->element_begin(),
EI = EB,
@@ -88,8 +88,8 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
return;
}
// Given an array type, recursively traverse the elements.
- if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
- const Type *EltTy = ATy->getElementType();
+ if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ Type *EltTy = ATy->getElementType();
uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy);
for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets,
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index 5861fa4..3f23873 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -17,7 +17,6 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -27,7 +26,6 @@
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 7f314ee..1999f36 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -33,7 +33,6 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetAsmInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
@@ -45,6 +44,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Timer.h"
using namespace llvm;
@@ -290,10 +290,10 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// Handle common and BSS local symbols (.lcomm).
if (GVKind.isCommon() || GVKind.isBSSLocal()) {
if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+ unsigned Align = 1 << AlignLog;
// Handle common symbols.
if (GVKind.isCommon()) {
- unsigned Align = 1 << AlignLog;
if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
Align = 0;
@@ -307,17 +307,17 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
const MCSection *TheSection =
getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM);
// .zerofill __DATA, __bss, _foo, 400, 5
- OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog);
+ OutStreamer.EmitZerofill(TheSection, GVSym, Size, Align);
return;
}
- if (MAI->hasLCOMMDirective()) {
+ if (MAI->getLCOMMDirectiveType() != LCOMM::None &&
+ (MAI->getLCOMMDirectiveType() != LCOMM::NoAlignment || Align == 1)) {
// .lcomm _foo, 42
- OutStreamer.EmitLocalCommonSymbol(GVSym, Size);
+ OutStreamer.EmitLocalCommonSymbol(GVSym, Size, Align);
return;
}
- unsigned Align = 1 << AlignLog;
if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
Align = 0;
@@ -474,8 +474,10 @@ void AsmPrinter::EmitFunctionHeader() {
void AsmPrinter::EmitFunctionEntryLabel() {
// The function label could have already been emitted if two symbols end up
// conflicting due to asm renaming. Detect this and emit an error.
- if (CurrentFnSym->isUndefined())
+ if (CurrentFnSym->isUndefined()) {
+ OutStreamer.ForceCodeRegion();
return OutStreamer.EmitLabel(CurrentFnSym);
+ }
report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
"' label emitted multiple times to assembly file");
@@ -620,6 +622,9 @@ void AsmPrinter::emitPrologLabel(const MachineInstr &MI) {
if (needsCFIMoves() == CFI_M_None)
return;
+ if (MMI->getCompactUnwindEncoding() != 0)
+ OutStreamer.EmitCompactUnwindEncoding(MMI->getCompactUnwindEncoding());
+
MachineModuleInfo &MMI = MF->getMMI();
std::vector<MachineMove> &Moves = MMI.getFrameMoves();
bool FoundOne = false;
@@ -878,7 +883,7 @@ bool AsmPrinter::doFinalization(Module &M) {
I != E; ++I) {
MCSymbol *Name = Mang->getSymbol(I);
- const GlobalValue *GV = cast<GlobalValue>(I->getAliasedGlobal());
+ const GlobalValue *GV = I->getAliasedGlobal();
MCSymbol *Target = Mang->getSymbol(GV);
if (I->hasExternalLinkage() || !MAI->getWeakRefDirective())
@@ -1009,7 +1014,7 @@ void AsmPrinter::EmitConstantPool() {
unsigned NewOffset = (Offset + AlignMask) & ~AlignMask;
OutStreamer.EmitFill(NewOffset - Offset, 0/*fillval*/, 0/*addrspace*/);
- const Type *Ty = CPE.getType();
+ Type *Ty = CPE.getType();
Offset = NewOffset + TM.getTargetData()->getTypeAllocSize(Ty);
OutStreamer.EmitLabel(GetCPISymbol(CPI));
@@ -1055,6 +1060,15 @@ void AsmPrinter::EmitJumpTableInfo() {
EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getTargetData())));
+ // If we know the form of the jump table, go ahead and tag it as such.
+ if (!JTInDiffSection) {
+ if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32) {
+ OutStreamer.EmitJumpTable32Region();
+ } else {
+ OutStreamer.EmitDataRegion();
+ }
+ }
+
for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
@@ -1226,22 +1240,53 @@ void AsmPrinter::EmitLLVMUsedList(const Constant *List) {
}
}
-/// EmitXXStructorList - Emit the ctor or dtor list. This just prints out the
-/// function pointers, ignoring the init priority.
+typedef std::pair<int, Constant*> Structor;
+
+static bool priority_order(const Structor& lhs, const Structor& rhs) {
+ return lhs.first < rhs.first;
+}
+
+/// EmitXXStructorList - Emit the ctor or dtor list taking into account the init
+/// priority.
void AsmPrinter::EmitXXStructorList(const Constant *List) {
// Should be an array of '{ int, void ()* }' structs. The first value is the
- // init priority, which we ignore.
+ // init priority.
if (!isa<ConstantArray>(List)) return;
- const ConstantArray *InitList = cast<ConstantArray>(List);
- for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
- if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
- if (CS->getNumOperands() != 2) return; // Not array of 2-element structs.
-
- if (CS->getOperand(1)->isNullValue())
- return; // Found a null terminator, exit printing.
- // Emit the function pointer.
- EmitGlobalConstant(CS->getOperand(1));
- }
+
+ // Sanity check the structors list.
+ const ConstantArray *InitList = dyn_cast<ConstantArray>(List);
+ if (!InitList) return; // Not an array!
+ StructType *ETy = dyn_cast<StructType>(InitList->getType()->getElementType());
+ if (!ETy || ETy->getNumElements() != 2) return; // Not an array of pairs!
+ if (!isa<IntegerType>(ETy->getTypeAtIndex(0U)) ||
+ !isa<PointerType>(ETy->getTypeAtIndex(1U))) return; // Not (int, ptr).
+
+ // Gather the structors in a form that's convenient for sorting by priority.
+ SmallVector<Structor, 8> Structors;
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+ ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i));
+ if (!CS) continue; // Malformed.
+ if (CS->getOperand(1)->isNullValue())
+ break; // Found a null terminator, skip the rest.
+ ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0));
+ if (!Priority) continue; // Malformed.
+ Structors.push_back(std::make_pair(Priority->getLimitedValue(65535),
+ CS->getOperand(1)));
+ }
+
+ // Emit the function pointers in reverse priority order.
+ switch (MAI->getStructorOutputOrder()) {
+ case Structors::None:
+ break;
+ case Structors::PriorityOrder:
+ std::sort(Structors.begin(), Structors.end(), priority_order);
+ break;
+ case Structors::ReversePriorityOrder:
+ std::sort(Structors.rbegin(), Structors.rend(), priority_order);
+ break;
+ }
+ for (unsigned i = 0, e = Structors.size(); i != e; ++i)
+ EmitGlobalConstant(Structors[i].second);
}
//===--------------------------------------------------------------------===//
@@ -1406,8 +1451,7 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
// Generate a symbolic expression for the byte address
const Constant *PtrVal = CE->getOperand(0);
SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end());
- int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), &IdxVec[0],
- IdxVec.size());
+ int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), IdxVec);
const MCExpr *Base = LowerConstant(CE->getOperand(0), AP);
if (Offset == 0)
@@ -1447,7 +1491,7 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
// Support only foldable casts to/from pointers that can be eliminated by
// changing the pointer to the appropriately sized integer type.
Constant *Op = CE->getOperand(0);
- const Type *Ty = CE->getType();
+ Type *Ty = CE->getType();
const MCExpr *OpExpr = LowerConstant(Op, AP);
@@ -1496,12 +1540,67 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
static void EmitGlobalConstantImpl(const Constant *C, unsigned AddrSpace,
AsmPrinter &AP);
+/// isRepeatedByteSequence - Determine whether the given value is
+/// composed of a repeated sequence of identical bytes and return the
+/// byte value. If it is not a repeated sequence, return -1.
+static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getBitWidth() > 64) return -1;
+
+ uint64_t Size = TM.getTargetData()->getTypeAllocSize(V->getType());
+ uint64_t Value = CI->getZExtValue();
+
+ // Make sure the constant is at least 8 bits long and has a power
+ // of 2 bit width. This guarantees the constant bit width is
+ // always a multiple of 8 bits, avoiding issues with padding out
+ // to Size and other such corner cases.
+ if (CI->getBitWidth() < 8 || !isPowerOf2_64(CI->getBitWidth())) return -1;
+
+ uint8_t Byte = static_cast<uint8_t>(Value);
+
+ for (unsigned i = 1; i < Size; ++i) {
+ Value >>= 8;
+ if (static_cast<uint8_t>(Value) != Byte) return -1;
+ }
+ return Byte;
+ }
+ if (const ConstantArray *CA = dyn_cast<ConstantArray>(V)) {
+ // Make sure all array elements are sequences of the same repeated
+ // byte.
+ if (CA->getNumOperands() == 0) return -1;
+
+ int Byte = isRepeatedByteSequence(CA->getOperand(0), TM);
+ if (Byte == -1) return -1;
+
+ for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) {
+ int ThisByte = isRepeatedByteSequence(CA->getOperand(i), TM);
+ if (ThisByte == -1) return -1;
+ if (Byte != ThisByte) return -1;
+ }
+ return Byte;
+ }
+
+ return -1;
+}
+
static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace,
AsmPrinter &AP) {
if (AddrSpace != 0 || !CA->isString()) {
- // Not a string. Print the values in successive locations
- for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
- EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP);
+ // Not a string. Print the values in successive locations.
+
+ // See if we can aggregate some values. Make sure it can be
+ // represented as a series of bytes of the constant value.
+ int Value = isRepeatedByteSequence(CA, AP.TM);
+
+ if (Value != -1) {
+ uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CA->getType());
+ AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace);
+ }
+ else {
+ for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
+ EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP);
+ }
return;
}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index dd5b0e2..4d6c281 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -13,7 +13,7 @@
#define DEBUG_TYPE "asm-printer"
#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MachineLocation.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 5ac455e..8eda889 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -23,15 +23,15 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -49,7 +49,7 @@ namespace {
static void SrcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
SrcMgrDiagInfo *DiagInfo = static_cast<SrcMgrDiagInfo *>(diagInfo);
assert(DiagInfo && "Diagnostic context not passed down?");
-
+
// If the inline asm had metadata associated with it, pull out a location
// cookie corresponding to which line the error occurred on.
unsigned LocCookie = 0;
@@ -57,13 +57,13 @@ static void SrcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
unsigned ErrorLine = Diag.getLineNo()-1;
if (ErrorLine >= LocInfo->getNumOperands())
ErrorLine = 0;
-
+
if (LocInfo->getNumOperands() != 0)
if (const ConstantInt *CI =
dyn_cast<ConstantInt>(LocInfo->getOperand(ErrorLine)))
LocCookie = CI->getZExtValue();
}
-
+
DiagInfo->DiagHandler(Diag, DiagInfo->DiagContext, LocCookie);
}
@@ -109,7 +109,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const {
// Tell SrcMgr about this buffer, it takes ownership of the buffer.
SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
- OwningPtr<MCAsmParser> Parser(createMCAsmParser(TM.getTarget(), SrcMgr,
+ OwningPtr<MCAsmParser> Parser(createMCAsmParser(SrcMgr,
OutContext, OutStreamer,
*MAI));
@@ -121,7 +121,8 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const {
STI(TM.getTarget().createMCSubtargetInfo(TM.getTargetTriple(),
TM.getTargetCPU(),
TM.getTargetFeatureString()));
- OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(*STI, *Parser));
+ OwningPtr<MCTargetAsmParser>
+ TAP(TM.getTarget().createMCAsmParser(*STI, *Parser));
if (!TAP)
report_fatal_error("Inline asm not supported by this streamer because"
" we don't have an asm parser for this target\n");
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index 4da7876..67d9273 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -12,3 +12,12 @@ add_llvm_library(LLVMAsmPrinter
Win64Exception.cpp
)
+add_llvm_library_dependencies(LLVMAsmPrinter
+ LLVMAnalysis
+ LLVMCodeGen
+ LLVMCore
+ LLVMMC
+ LLVMMCParser
+ LLVMSupport
+ LLVMTarget
+ )
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 21396ca..9c1ce76 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -69,7 +69,7 @@ void DIEAbbrev::Emit(AsmPrinter *AP) const {
// Emit attribute type.
// FIXME: Doing work even in non-asm-verbose runs.
AP->EmitULEB128(AttrData.getAttribute(),
- dwarf::AttributeString(AttrData.getAttribute()));
+ dwarf::AttributeString(AttrData.getAttribute()));
// Emit form type.
// FIXME: Doing work even in non-asm-verbose runs.
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 91b7d08..8ed4f4c 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -17,7 +17,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MachineLocation.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -77,7 +77,8 @@ void DwarfCFIException::EndModule() {
// This is a temporary hack to keep sections in the same order they
// were before. This lets us produce bit identical outputs while
// transitioning to CFI.
- Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
+ Asm->OutStreamer.SwitchSection(
+ const_cast<TargetLoweringObjectFile&>(TLOF).getEHFrameSection());
}
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 1fe035e..88b7524 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -16,7 +16,10 @@
#include "DwarfCompileUnit.h"
#include "DwarfDebug.h"
#include "llvm/Constants.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
@@ -132,8 +135,8 @@ void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) {
unsigned Line = G.getLineNumber();
if (Line == 0)
return;
- unsigned FileID = DD->GetOrCreateSourceID(G.getContext().getFilename(),
- G.getContext().getDirectory());
+ unsigned FileID = DD->GetOrCreateSourceID(G.getFilename(),
+ G.getDirectory());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -439,27 +442,36 @@ void CompileUnit::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
addBlock(Die, Attribute, 0, Block);
}
+/// isTypeSigned - Return true if the type is signed.
+static bool isTypeSigned(DIType Ty, int *SizeInBits) {
+ if (Ty.isDerivedType())
+ return isTypeSigned(DIDerivedType(Ty).getTypeDerivedFrom(), SizeInBits);
+ if (Ty.isBasicType())
+ if (DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed
+ || DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed_char) {
+ *SizeInBits = Ty.getSizeInBits();
+ return true;
+ }
+ return false;
+}
+
/// addConstantValue - Add constant value entry in variable DIE.
bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO,
DIType Ty) {
assert (MO.isImm() && "Invalid machine operand!");
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
- unsigned form = dwarf::DW_FORM_udata;
- switch (Ty.getSizeInBits()) {
- case 8: form = dwarf::DW_FORM_data1; break;
- case 16: form = dwarf::DW_FORM_data2; break;
- case 32: form = dwarf::DW_FORM_data4; break;
- case 64: form = dwarf::DW_FORM_data8; break;
+ int SizeInBits = -1;
+ bool SignedConstant = isTypeSigned(Ty, &SizeInBits);
+ unsigned Form = SignedConstant ? dwarf::DW_FORM_sdata : dwarf::DW_FORM_udata;
+ switch (SizeInBits) {
+ case 8: Form = dwarf::DW_FORM_data1; break;
+ case 16: Form = dwarf::DW_FORM_data2; break;
+ case 32: Form = dwarf::DW_FORM_data4; break;
+ case 64: Form = dwarf::DW_FORM_data8; break;
default: break;
}
-
- DIBasicType BTy(Ty);
- if (BTy.Verify() &&
- (BTy.getEncoding() == dwarf::DW_ATE_signed
- || BTy.getEncoding() == dwarf::DW_ATE_signed_char))
- addSInt(Block, 0, form, MO.getImm());
- else
- addUInt(Block, 0, form, MO.getImm());
+ SignedConstant ? addSInt(Block, 0, Form, MO.getImm())
+ : addUInt(Block, 0, Form, MO.getImm());
addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
return true;
@@ -555,7 +567,7 @@ void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) {
DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context));
ContextDIE->addChild(Die);
} else if (Context.isSubprogram()) {
- DIE *ContextDIE = DD->createSubprogramDIE(DISubprogram(Context));
+ DIE *ContextDIE = getOrCreateSubprogramDIE(DISubprogram(Context));
ContextDIE->addChild(Die);
} else if (DIE *ContextDIE = getDIE(Context))
ContextDIE->addChild(Die);
@@ -565,7 +577,10 @@ void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) {
/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
/// given DIType.
-DIE *CompileUnit::getOrCreateTypeDIE(DIType Ty) {
+DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
+ DIType Ty(TyNode);
+ if (!Ty.Verify())
+ return NULL;
DIE *TyDIE = getDIE(Ty);
if (TyDIE)
return TyDIE;
@@ -617,7 +632,8 @@ void CompileUnit::addType(DIE *Entity, DIType Ty) {
void CompileUnit::addGlobalType(DIType Ty) {
DIDescriptor Context = Ty.getContext();
if (Ty.isCompositeType() && !Ty.getName().empty() && !Ty.isForwardDecl()
- && (Context.isCompileUnit() || Context.isFile() || Context.isNameSpace()))
+ && (!Context || Context.isCompileUnit() || Context.isFile()
+ || Context.isNameSpace()))
if (DIEEntry *Entry = getDIEEntry(Ty))
GlobalTypes[Ty.getName()] = Entry->getEntry();
}
@@ -642,13 +658,20 @@ void CompileUnit::addPubTypes(DISubprogram SP) {
void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
// Get core information.
StringRef Name = BTy.getName();
- Buffer.setTag(dwarf::DW_TAG_base_type);
- addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
- BTy.getEncoding());
-
// Add name if not anonymous or intermediate type.
if (!Name.empty())
addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+ if (BTy.getTag() == dwarf::DW_TAG_unspecified_type) {
+ Buffer.setTag(dwarf::DW_TAG_unspecified_type);
+ // Unspecified types has only name, nothing else.
+ return;
+ }
+
+ Buffer.setTag(dwarf::DW_TAG_base_type);
+ addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ BTy.getEncoding());
+
uint64_t Size = BTy.getSizeInBits() >> 3;
addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
}
@@ -752,7 +775,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
DIE *ElemDie = NULL;
if (Element.isSubprogram()) {
DISubprogram SP(Element);
- ElemDie = DD->createSubprogramDIE(DISubprogram(Element));
+ ElemDie = getOrCreateSubprogramDIE(DISubprogram(Element));
if (SP.isProtected())
addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
dwarf::DW_ACCESS_protected);
@@ -880,6 +903,218 @@ DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) {
return NDie;
}
+/// getRealLinkageName - If special LLVM prefix that is used to inform the asm
+/// printer to not emit usual symbol prefix before the symbol name is used then
+/// return linkage name after skipping this special LLVM prefix.
+static StringRef getRealLinkageName(StringRef LinkageName) {
+ char One = '\1';
+ if (LinkageName.startswith(StringRef(&One, 1)))
+ return LinkageName.substr(1);
+ return LinkageName;
+}
+
+/// getOrCreateSubprogramDIE - Create new DIE using SP.
+DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
+ DIE *SPDie = getDIE(SP);
+ if (SPDie)
+ return SPDie;
+
+ SPDie = new DIE(dwarf::DW_TAG_subprogram);
+
+ // DW_TAG_inlined_subroutine may refer to this DIE.
+ insertDIE(SP, SPDie);
+
+ // Add to context owner.
+ addToContextOwner(SPDie, SP.getContext());
+
+ // Add function template parameters.
+ addTemplateParams(*SPDie, SP.getTemplateParams());
+
+ StringRef LinkageName = SP.getLinkageName();
+ if (!LinkageName.empty())
+ addString(SPDie, dwarf::DW_AT_MIPS_linkage_name,
+ dwarf::DW_FORM_string,
+ getRealLinkageName(LinkageName));
+
+ // If this DIE is going to refer declaration info using AT_specification
+ // then there is no need to add other attributes.
+ if (SP.getFunctionDeclaration().isSubprogram())
+ return SPDie;
+
+ // Constructors and operators for anonymous aggregates do not have names.
+ if (!SP.getName().empty())
+ addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
+ SP.getName());
+
+ addSourceLine(SPDie, SP);
+
+ if (SP.isPrototyped())
+ addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
+
+ // Add Return Type.
+ DICompositeType SPTy = SP.getType();
+ DIArray Args = SPTy.getTypeArray();
+ unsigned SPTag = SPTy.getTag();
+
+ if (Args.getNumElements() == 0 || SPTag != dwarf::DW_TAG_subroutine_type)
+ addType(SPDie, SPTy);
+ else
+ addType(SPDie, DIType(Args.getElement(0)));
+
+ unsigned VK = SP.getVirtuality();
+ if (VK) {
+ addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK);
+ DIEBlock *Block = getDIEBlock();
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex());
+ addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block);
+ ContainingTypeMap.insert(std::make_pair(SPDie,
+ SP.getContainingType()));
+ }
+
+ if (!SP.isDefinition()) {
+ addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+
+ // Add arguments. Do not add arguments for subprogram definition. They will
+ // be handled while processing variables.
+ DICompositeType SPTy = SP.getType();
+ DIArray Args = SPTy.getTypeArray();
+ unsigned SPTag = SPTy.getTag();
+
+ if (SPTag == dwarf::DW_TAG_subroutine_type)
+ for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
+ DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+ DIType ATy = DIType(DIType(Args.getElement(i)));
+ addType(Arg, ATy);
+ if (ATy.isArtificial())
+ addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+ SPDie->addChild(Arg);
+ }
+ }
+
+ if (SP.isArtificial())
+ addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+
+ if (!SP.isLocalToUnit())
+ addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+
+ if (SP.isOptimized())
+ addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
+
+ if (unsigned isa = Asm->getISAEncoding()) {
+ addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
+ }
+
+ return SPDie;
+}
+
+// Return const expression if value is a GEP to access merged global
+// constant. e.g.
+// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0)
+static const ConstantExpr *getMergedGlobalExpr(const Value *V) {
+ const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V);
+ if (!CE || CE->getNumOperands() != 3 ||
+ CE->getOpcode() != Instruction::GetElementPtr)
+ return NULL;
+
+ // First operand points to a global struct.
+ Value *Ptr = CE->getOperand(0);
+ if (!isa<GlobalValue>(Ptr) ||
+ !isa<StructType>(cast<PointerType>(Ptr->getType())->getElementType()))
+ return NULL;
+
+ // Second operand is zero.
+ const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CE->getOperand(1));
+ if (!CI || !CI->isZero())
+ return NULL;
+
+ // Third operand is offset.
+ if (!isa<ConstantInt>(CE->getOperand(2)))
+ return NULL;
+
+ return CE;
+}
+
+/// createGlobalVariableDIE - create global variable DIE.
+void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
+ // Check for pre-existence.
+ if (getDIE(N))
+ return;
+
+ DIGlobalVariable GV(N);
+ if (!GV.Verify())
+ return;
+
+ DIE *VariableDIE = new DIE(GV.getTag());
+ // Add to map.
+ insertDIE(N, VariableDIE);
+
+ // Add name.
+ addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string,
+ GV.getDisplayName());
+ StringRef LinkageName = GV.getLinkageName();
+ bool isGlobalVariable = GV.getGlobal() != NULL;
+ if (!LinkageName.empty() && isGlobalVariable)
+ addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name,
+ dwarf::DW_FORM_string,
+ getRealLinkageName(LinkageName));
+ // Add type.
+ DIType GTy = GV.getType();
+ addType(VariableDIE, GTy);
+
+ // Add scoping info.
+ if (!GV.isLocalToUnit()) {
+ addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+ // Expose as global.
+ addGlobal(GV.getName(), VariableDIE);
+ }
+ // Add line number info.
+ addSourceLine(VariableDIE, GV);
+ // Add to context owner.
+ DIDescriptor GVContext = GV.getContext();
+ addToContextOwner(VariableDIE, GVContext);
+ // Add location.
+ if (isGlobalVariable) {
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+ addLabel(Block, 0, dwarf::DW_FORM_udata,
+ Asm->Mang->getSymbol(GV.getGlobal()));
+ // Do not create specification DIE if context is either compile unit
+ // or a subprogram.
+ if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() &&
+ !GVContext.isFile() && !isSubprogramContext(GVContext)) {
+ // Create specification DIE.
+ DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
+ addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
+ dwarf::DW_FORM_ref4, VariableDIE);
+ addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
+ addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag,
+ 1);
+ addDie(VariableSpecDIE);
+ } else {
+ addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
+ }
+ } else if (const ConstantInt *CI =
+ dyn_cast_or_null<ConstantInt>(GV.getConstant()))
+ addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType());
+ else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) {
+ // GV is a merged global.
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ Value *Ptr = CE->getOperand(0);
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+ addLabel(Block, 0, dwarf::DW_FORM_udata,
+ Asm->Mang->getSymbol(cast<GlobalValue>(Ptr)));
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ SmallVector<Value*, 3> Idx(CE->op_begin()+1, CE->op_end());
+ addUInt(Block, 0, dwarf::DW_FORM_udata,
+ Asm->getTargetData().getIndexedOffset(Ptr->getType(), Idx));
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+ addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
+ }
+
+ return;
+}
+
/// constructSubrangeDIE - Construct subrange DIE from DISubrange.
void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type);
@@ -944,6 +1179,128 @@ DIE *CompileUnit::constructEnumTypeDIE(DIEnumerator ETy) {
return Enumerator;
}
+/// constructContainingTypeDIEs - Construct DIEs for types that contain
+/// vtables.
+void CompileUnit::constructContainingTypeDIEs() {
+ for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(),
+ CE = ContainingTypeMap.end(); CI != CE; ++CI) {
+ DIE *SPDie = CI->first;
+ const MDNode *N = CI->second;
+ if (!N) continue;
+ DIE *NDie = getDIE(N);
+ if (!NDie) continue;
+ addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie);
+ }
+}
+
+/// constructVariableDIE - Construct a DIE for the given DbgVariable.
+DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) {
+ StringRef Name = DV->getName();
+ if (Name.empty())
+ return NULL;
+
+ // Translate tag to proper Dwarf tag.
+ unsigned Tag = DV->getTag();
+
+ // Define variable debug information entry.
+ DIE *VariableDie = new DIE(Tag);
+ DbgVariable *AbsVar = DV->getAbstractVariable();
+ DIE *AbsDIE = AbsVar ? AbsVar->getDIE() : NULL;
+ if (AbsDIE)
+ addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin,
+ dwarf::DW_FORM_ref4, AbsDIE);
+ else {
+ addString(VariableDie, dwarf::DW_AT_name,
+ dwarf::DW_FORM_string, Name);
+ addSourceLine(VariableDie, DV->getVariable());
+ addType(VariableDie, DV->getType());
+ }
+
+ if (DV->isArtificial())
+ addUInt(VariableDie, dwarf::DW_AT_artificial,
+ dwarf::DW_FORM_flag, 1);
+
+ if (isScopeAbstract) {
+ DV->setDIE(VariableDie);
+ return VariableDie;
+ }
+
+ // Add variable address.
+
+ unsigned Offset = DV->getDotDebugLocOffset();
+ if (Offset != ~0U) {
+ addLabel(VariableDie, dwarf::DW_AT_location,
+ dwarf::DW_FORM_data4,
+ Asm->GetTempSymbol("debug_loc", Offset));
+ DV->setDIE(VariableDie);
+ return VariableDie;
+ }
+
+ // Check if variable is described by a DBG_VALUE instruction.
+ if (const MachineInstr *DVInsn = DV->getMInsn()) {
+ bool updated = false;
+ if (DVInsn->getNumOperands() == 3) {
+ if (DVInsn->getOperand(0).isReg()) {
+ const MachineOperand RegOp = DVInsn->getOperand(0);
+ const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
+ if (DVInsn->getOperand(1).isImm() &&
+ TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) {
+ unsigned FrameReg = 0;
+ const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
+ int Offset =
+ TFI->getFrameIndexReference(*Asm->MF,
+ DVInsn->getOperand(1).getImm(),
+ FrameReg);
+ MachineLocation Location(FrameReg, Offset);
+ addVariableAddress(DV, VariableDie, Location);
+
+ } else if (RegOp.getReg())
+ addVariableAddress(DV, VariableDie,
+ MachineLocation(RegOp.getReg()));
+ updated = true;
+ }
+ else if (DVInsn->getOperand(0).isImm())
+ updated =
+ addConstantValue(VariableDie, DVInsn->getOperand(0),
+ DV->getType());
+ else if (DVInsn->getOperand(0).isFPImm())
+ updated =
+ addConstantFPValue(VariableDie, DVInsn->getOperand(0));
+ else if (DVInsn->getOperand(0).isCImm())
+ updated =
+ addConstantValue(VariableDie,
+ DVInsn->getOperand(0).getCImm(),
+ DV->getType().isUnsignedDIType());
+ } else {
+ addVariableAddress(DV, VariableDie,
+ Asm->getDebugValueLocation(DVInsn));
+ updated = true;
+ }
+ if (!updated) {
+ // If variableDie is not updated then DBG_VALUE instruction does not
+ // have valid variable info.
+ delete VariableDie;
+ return NULL;
+ }
+ DV->setDIE(VariableDie);
+ return VariableDie;
+ } else {
+ // .. else use frame index.
+ int FI = DV->getFrameIndex();
+ if (FI != ~0) {
+ unsigned FrameReg = 0;
+ const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
+ int Offset =
+ TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
+ MachineLocation Location(FrameReg, Offset);
+ addVariableAddress(DV, VariableDie, Location);
+ }
+ }
+
+ DV->setDIE(VariableDie);
+ return VariableDie;
+}
+
/// createMemberDIE - Create new member DIE.
DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
DIE *MemberDie = new DIE(DT.getTag());
@@ -1013,7 +1370,7 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
dwarf::DW_ACCESS_private);
// Otherwise C++ member and base classes are considered public.
- else if (DT.getCompileUnit().getLanguage() == dwarf::DW_LANG_C_plus_plus)
+ else
addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
dwarf::DW_ACCESS_public);
if (DT.isVirtual())
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 213c7fc..7859265 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -67,6 +67,11 @@ class CompileUnit {
/// DIEBlocks - A list of all the DIEBlocks in use.
std::vector<DIEBlock *> DIEBlocks;
+ /// ContainingTypeMap - This map is used to keep track of subprogram DIEs that
+ /// need DW_AT_containing_type attribute. This attribute points to a DIE that
+ /// corresponds to the MDNode mapped with the subprogram DIE.
+ DenseMap<DIE *, const MDNode *> ContainingTypeMap;
+
public:
CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW);
~CompileUnit();
@@ -226,9 +231,12 @@ public:
/// getOrCreateNameSpace - Create a DIE for DINameSpace.
DIE *getOrCreateNameSpace(DINameSpace NS);
+ /// getOrCreateSubprogramDIE - Create new DIE using SP.
+ DIE *getOrCreateSubprogramDIE(DISubprogram SP);
+
/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
/// given DIType.
- DIE *getOrCreateTypeDIE(DIType Ty);
+ DIE *getOrCreateTypeDIE(const MDNode *N);
/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE
/// for the given DITemplateTypeParameter.
@@ -242,6 +250,9 @@ public:
/// information entry.
DIEEntry *createDIEEntry(DIE *Entry);
+ /// createGlobalVariableDIE - create global variable DIE.
+ void createGlobalVariableDIE(const MDNode *N);
+
void addPubTypes(DISubprogram SP);
/// constructTypeDIE - Construct basic type die from DIBasicType.
@@ -266,6 +277,13 @@ public:
/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
DIE *constructEnumTypeDIE(DIEnumerator ETy);
+ /// constructContainingTypeDIEs - Construct DIEs for types that contain
+ /// vtables.
+ void constructContainingTypeDIEs();
+
+ /// constructVariableDIE - Construct a DIE for the given DbgVariable.
+ DIE *constructVariableDIE(DbgVariable *DV, bool isScopeAbstract);
+
/// createMemberDIE - Create new member DIE.
DIE *createMemberDIE(DIDerivedType DT);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 125e1e8..1b7e370 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -24,7 +24,6 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -45,9 +44,6 @@
#include "llvm/Support/Path.h"
using namespace llvm;
-static cl::opt<bool> PrintDbgScope("print-dbgscope", cl::Hidden,
- cl::desc("Print DbgScope information for each machine instruction"));
-
static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print",
cl::Hidden,
cl::desc("Disable debug info printing"));
@@ -69,7 +65,7 @@ static const unsigned InitAbbreviationsSetSize = 9; // log2(512)
namespace llvm {
-DIType DbgVariable::getType() const {
+DIType DbgVariable::getType() const {
DIType Ty = Var.getType();
// FIXME: isBlockByrefVariable should be reformulated in terms of complex
// addresses instead.
@@ -120,141 +116,12 @@ DIType DbgVariable::getType() const {
return Ty;
}
-//===----------------------------------------------------------------------===//
-/// DbgRange - This is used to track range of instructions with identical
-/// debug info scope.
-///
-typedef std::pair<const MachineInstr *, const MachineInstr *> DbgRange;
-
-//===----------------------------------------------------------------------===//
-/// DbgScope - This class is used to track scope information.
-///
-class DbgScope {
- DbgScope *Parent; // Parent to this scope.
- DIDescriptor Desc; // Debug info descriptor for scope.
- // Location at which this scope is inlined.
- AssertingVH<const MDNode> InlinedAtLocation;
- bool AbstractScope; // Abstract Scope
- const MachineInstr *LastInsn; // Last instruction of this scope.
- const MachineInstr *FirstInsn; // First instruction of this scope.
- unsigned DFSIn, DFSOut;
- // Scopes defined in scope. Contents not owned.
- SmallVector<DbgScope *, 4> Scopes;
- // Variables declared in scope. Contents owned.
- SmallVector<DbgVariable *, 8> Variables;
- SmallVector<DbgRange, 4> Ranges;
- // Private state for dump()
- mutable unsigned IndentLevel;
-public:
- DbgScope(DbgScope *P, DIDescriptor D, const MDNode *I = 0)
- : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(false),
- LastInsn(0), FirstInsn(0),
- DFSIn(0), DFSOut(0), IndentLevel(0) {}
- virtual ~DbgScope();
-
- // Accessors.
- DbgScope *getParent() const { return Parent; }
- void setParent(DbgScope *P) { Parent = P; }
- DIDescriptor getDesc() const { return Desc; }
- const MDNode *getInlinedAt() const { return InlinedAtLocation; }
- const MDNode *getScopeNode() const { return Desc; }
- const SmallVector<DbgScope *, 4> &getScopes() { return Scopes; }
- const SmallVector<DbgVariable *, 8> &getDbgVariables() { return Variables; }
- const SmallVector<DbgRange, 4> &getRanges() { return Ranges; }
-
- /// openInsnRange - This scope covers instruction range starting from MI.
- void openInsnRange(const MachineInstr *MI) {
- if (!FirstInsn)
- FirstInsn = MI;
-
- if (Parent)
- Parent->openInsnRange(MI);
- }
-
- /// extendInsnRange - Extend the current instruction range covered by
- /// this scope.
- void extendInsnRange(const MachineInstr *MI) {
- assert (FirstInsn && "MI Range is not open!");
- LastInsn = MI;
- if (Parent)
- Parent->extendInsnRange(MI);
- }
-
- /// closeInsnRange - Create a range based on FirstInsn and LastInsn collected
- /// until now. This is used when a new scope is encountered while walking
- /// machine instructions.
- void closeInsnRange(DbgScope *NewScope = NULL) {
- assert (LastInsn && "Last insn missing!");
- Ranges.push_back(DbgRange(FirstInsn, LastInsn));
- FirstInsn = NULL;
- LastInsn = NULL;
- // If Parent dominates NewScope then do not close Parent's instruction
- // range.
- if (Parent && (!NewScope || !Parent->dominates(NewScope)))
- Parent->closeInsnRange(NewScope);
- }
-
- void setAbstractScope() { AbstractScope = true; }
- bool isAbstractScope() const { return AbstractScope; }
-
- // Depth First Search support to walk and mainpluate DbgScope hierarchy.
- unsigned getDFSOut() const { return DFSOut; }
- void setDFSOut(unsigned O) { DFSOut = O; }
- unsigned getDFSIn() const { return DFSIn; }
- void setDFSIn(unsigned I) { DFSIn = I; }
- bool dominates(const DbgScope *S) {
- if (S == this)
- return true;
- if (DFSIn < S->getDFSIn() && DFSOut > S->getDFSOut())
- return true;
- return false;
- }
-
- /// addScope - Add a scope to the scope.
- ///
- void addScope(DbgScope *S) { Scopes.push_back(S); }
-
- /// addVariable - Add a variable to the scope.
- ///
- void addVariable(DbgVariable *V) { Variables.push_back(V); }
-
-#ifndef NDEBUG
- void dump() const;
-#endif
-};
-
} // end llvm namespace
-#ifndef NDEBUG
-void DbgScope::dump() const {
- raw_ostream &err = dbgs();
- err.indent(IndentLevel);
- err << "DFSIn: " << DFSIn << " DFSOut: " << DFSOut << "\n";
- const MDNode *N = Desc;
- N->dump();
- if (AbstractScope)
- err << "Abstract Scope\n";
-
- IndentLevel += 2;
- if (!Scopes.empty())
- err << "Children ...\n";
- for (unsigned i = 0, e = Scopes.size(); i != e; ++i)
- if (Scopes[i] != this)
- Scopes[i]->dump();
-
- IndentLevel -= 2;
-}
-#endif
-
-DbgScope::~DbgScope() {
- for (unsigned j = 0, M = Variables.size(); j < M; ++j)
- delete Variables[j];
-}
-
DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
: Asm(A), MMI(Asm->MMI), FirstCU(0),
AbbreviationsSet(InitAbbreviationsSetSize),
- CurrentFnDbgScope(0), PrevLabel(NULL) {
+ PrevLabel(NULL) {
NextStringPoolNumber = 0;
DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
@@ -311,147 +178,12 @@ static StringRef getRealLinkageName(StringRef LinkageName) {
return LinkageName;
}
-/// createSubprogramDIE - Create new DIE using SP.
-DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) {
- CompileUnit *SPCU = getCompileUnit(SP);
- DIE *SPDie = SPCU->getDIE(SP);
- if (SPDie)
- return SPDie;
-
- SPDie = new DIE(dwarf::DW_TAG_subprogram);
-
- // DW_TAG_inlined_subroutine may refer to this DIE.
- SPCU->insertDIE(SP, SPDie);
-
- // Add to context owner.
- SPCU->addToContextOwner(SPDie, SP.getContext());
-
- // Add function template parameters.
- SPCU->addTemplateParams(*SPDie, SP.getTemplateParams());
-
- StringRef LinkageName = SP.getLinkageName();
- if (!LinkageName.empty())
- SPCU->addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
- getRealLinkageName(LinkageName));
-
- // If this DIE is going to refer declaration info using AT_specification
- // then there is no need to add other attributes.
- if (SP.getFunctionDeclaration().isSubprogram())
- return SPDie;
-
- // Constructors and operators for anonymous aggregates do not have names.
- if (!SP.getName().empty())
- SPCU->addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
- SP.getName());
-
- SPCU->addSourceLine(SPDie, SP);
-
- if (SP.isPrototyped())
- SPCU->addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
-
- // Add Return Type.
- DICompositeType SPTy = SP.getType();
- DIArray Args = SPTy.getTypeArray();
- unsigned SPTag = SPTy.getTag();
-
- if (Args.getNumElements() == 0 || SPTag != dwarf::DW_TAG_subroutine_type)
- SPCU->addType(SPDie, SPTy);
- else
- SPCU->addType(SPDie, DIType(Args.getElement(0)));
-
- unsigned VK = SP.getVirtuality();
- if (VK) {
- SPCU->addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK);
- DIEBlock *Block = SPCU->getDIEBlock();
- SPCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
- SPCU->addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex());
- SPCU->addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block);
- ContainingTypeMap.insert(std::make_pair(SPDie,
- SP.getContainingType()));
- }
-
- if (!SP.isDefinition()) {
- SPCU->addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
-
- // Add arguments. Do not add arguments for subprogram definition. They will
- // be handled while processing variables.
- DICompositeType SPTy = SP.getType();
- DIArray Args = SPTy.getTypeArray();
- unsigned SPTag = SPTy.getTag();
-
- if (SPTag == dwarf::DW_TAG_subroutine_type)
- for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
- DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
- DIType ATy = DIType(DIType(Args.getElement(i)));
- SPCU->addType(Arg, ATy);
- if (ATy.isArtificial())
- SPCU->addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
- SPDie->addChild(Arg);
- }
- }
-
- if (SP.isArtificial())
- SPCU->addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
-
- if (!SP.isLocalToUnit())
- SPCU->addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
-
- if (SP.isOptimized())
- SPCU->addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
-
- if (unsigned isa = Asm->getISAEncoding()) {
- SPCU->addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
- }
-
- return SPDie;
-}
-
-DbgScope *DwarfDebug::getOrCreateAbstractScope(const MDNode *N) {
- assert(N && "Invalid Scope encoding!");
-
- DbgScope *AScope = AbstractScopes.lookup(N);
- if (AScope)
- return AScope;
-
- DbgScope *Parent = NULL;
-
- DIDescriptor Scope(N);
- if (Scope.isLexicalBlock()) {
- DILexicalBlock DB(N);
- DIDescriptor ParentDesc = DB.getContext();
- Parent = getOrCreateAbstractScope(ParentDesc);
- }
-
- AScope = new DbgScope(Parent, DIDescriptor(N), NULL);
-
- if (Parent)
- Parent->addScope(AScope);
- AScope->setAbstractScope();
- AbstractScopes[N] = AScope;
- if (DIDescriptor(N).isSubprogram())
- AbstractScopesList.push_back(AScope);
- return AScope;
-}
-
-/// isSubprogramContext - Return true if Context is either a subprogram
-/// or another context nested inside a subprogram.
-static bool isSubprogramContext(const MDNode *Context) {
- if (!Context)
- return false;
- DIDescriptor D(Context);
- if (D.isSubprogram())
- return true;
- if (D.isType())
- return isSubprogramContext(DIType(Context).getContext());
- return false;
-}
-
/// updateSubprogramScopeDIE - Find DIE for the given subprogram and
/// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
/// If there are global variables in this scope then create and insert
/// DIEs for these variables.
-DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) {
- CompileUnit *SPCU = getCompileUnit(SPNode);
+DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
+ const MDNode *SPNode) {
DIE *SPDie = SPCU->getDIE(SPNode);
assert(SPDie && "Unable to find subprogram DIE!");
@@ -461,7 +193,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) {
if (SPDecl.isSubprogram())
// Refer function declaration directly.
SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
- createSubprogramDIE(SPDecl));
+ SPCU->getOrCreateSubprogramDIE(SPDecl));
else {
// There is not any need to generate specification DIE for a function
// defined at compile unit level. If a function is defined inside another
@@ -514,25 +246,26 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) {
/// constructLexicalScope - Construct new DW_TAG_lexical_block
/// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
-DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) {
+DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
+ LexicalScope *Scope) {
DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block);
if (Scope->isAbstractScope())
return ScopeDIE;
- const SmallVector<DbgRange, 4> &Ranges = Scope->getRanges();
+ const SmallVector<InsnRange, 4> &Ranges = Scope->getRanges();
if (Ranges.empty())
return 0;
- CompileUnit *TheCU = getCompileUnit(Scope->getScopeNode());
- SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin();
+ SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin();
if (Ranges.size() > 1) {
// .debug_range section has not been laid out yet. Emit offset in
// .debug_range as a uint, size 4, for now. emitDIE will handle
// DW_AT_ranges appropriately.
TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
- DebugRangeSymbols.size() * Asm->getTargetData().getPointerSize());
- for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(),
+ DebugRangeSymbols.size()
+ * Asm->getTargetData().getPointerSize());
+ for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
RE = Ranges.end(); RI != RE; ++RI) {
DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second));
@@ -559,22 +292,29 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) {
/// constructInlinedScopeDIE - This scope represents inlined body of
/// a function. Construct DIE to represent this concrete inlined copy
/// of the function.
-DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
+DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
+ LexicalScope *Scope) {
- const SmallVector<DbgRange, 4> &Ranges = Scope->getRanges();
+ const SmallVector<InsnRange, 4> &Ranges = Scope->getRanges();
assert (Ranges.empty() == false
- && "DbgScope does not have instruction markers!");
+ && "LexicalScope does not have instruction markers!");
- // FIXME : .debug_inlined section specification does not clearly state how
- // to emit inlined scope that is split into multiple instruction ranges.
- // For now, use first instruction range and emit low_pc/high_pc pair and
- // corresponding .debug_inlined section entry for this pair.
- SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin();
+ if (!Scope->getScopeNode())
+ return NULL;
+ DIScope DS(Scope->getScopeNode());
+ DISubprogram InlinedSP = getDISubprogram(DS);
+ DIE *OriginDIE = TheCU->getDIE(InlinedSP);
+ if (!OriginDIE) {
+ DEBUG(dbgs() << "Unable to find original DIE for inlined subprogram.");
+ return NULL;
+ }
+
+ SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin();
const MCSymbol *StartLabel = getLabelBeforeInsn(RI->first);
const MCSymbol *EndLabel = getLabelAfterInsn(RI->second);
if (StartLabel == 0 || EndLabel == 0) {
- assert (0 && "Unexpected Start and End labels for a inlined scope!");
+ assert (0 && "Unexpected Start and End labels for a inlined scope!");
return 0;
}
assert(StartLabel->isDefined() &&
@@ -582,26 +322,38 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
assert(EndLabel->isDefined() &&
"Invalid end label for an inlined scope!");
- if (!Scope->getScopeNode())
- return NULL;
- DIScope DS(Scope->getScopeNode());
- DISubprogram InlinedSP = getDISubprogram(DS);
- CompileUnit *TheCU = getCompileUnit(InlinedSP);
- DIE *OriginDIE = TheCU->getDIE(InlinedSP);
- if (!OriginDIE) {
- DEBUG(dbgs() << "Unable to find original DIE for inlined subprogram.");
- return NULL;
- }
DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine);
TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin,
dwarf::DW_FORM_ref4, OriginDIE);
- TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, StartLabel);
- TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, EndLabel);
+ if (Ranges.size() > 1) {
+ // .debug_range section has not been laid out yet. Emit offset in
+ // .debug_range as a uint, size 4, for now. emitDIE will handle
+ // DW_AT_ranges appropriately.
+ TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
+ DebugRangeSymbols.size()
+ * Asm->getTargetData().getPointerSize());
+ for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
+ RE = Ranges.end(); RI != RE; ++RI) {
+ DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
+ DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second));
+ }
+ DebugRangeSymbols.push_back(NULL);
+ DebugRangeSymbols.push_back(NULL);
+ } else {
+ TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ StartLabel);
+ TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ EndLabel);
+ }
InlinedSubprogramDIEs.insert(OriginDIE);
// Track the start label for this inlined function.
+ //.debug_inlined section specification does not clearly state how
+ // to emit inlined scope that is split into multiple instruction ranges.
+ // For now, use first instruction range and emit low_pc/high_pc pair and
+ // corresponding .debug_inlined section entry for this pair.
DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator
I = InlineInfo.find(InlinedSP);
@@ -619,200 +371,51 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
return ScopeDIE;
}
-/// isUnsignedDIType - Return true if type encoding is unsigned.
-static bool isUnsignedDIType(DIType Ty) {
- DIDerivedType DTy(Ty);
- if (DTy.Verify())
- return isUnsignedDIType(DTy.getTypeDerivedFrom());
-
- DIBasicType BTy(Ty);
- if (BTy.Verify()) {
- unsigned Encoding = BTy.getEncoding();
- if (Encoding == dwarf::DW_ATE_unsigned ||
- Encoding == dwarf::DW_ATE_unsigned_char)
- return true;
- }
- return false;
-}
-
-/// constructVariableDIE - Construct a DIE for the given DbgVariable.
-DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
- StringRef Name = DV->getName();
- if (Name.empty())
- return NULL;
-
- // Translate tag to proper Dwarf tag. The result variable is dropped for
- // now.
- unsigned Tag;
- switch (DV->getTag()) {
- case dwarf::DW_TAG_return_variable:
- return NULL;
- case dwarf::DW_TAG_arg_variable:
- Tag = dwarf::DW_TAG_formal_parameter;
- break;
- case dwarf::DW_TAG_auto_variable: // fall thru
- default:
- Tag = dwarf::DW_TAG_variable;
- break;
- }
-
- // Define variable debug information entry.
- DIE *VariableDie = new DIE(Tag);
- CompileUnit *VariableCU = getCompileUnit(DV->getVariable());
- DIE *AbsDIE = NULL;
- DenseMap<const DbgVariable *, const DbgVariable *>::iterator
- V2AVI = VarToAbstractVarMap.find(DV);
- if (V2AVI != VarToAbstractVarMap.end())
- AbsDIE = V2AVI->second->getDIE();
-
- if (AbsDIE)
- VariableCU->addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin,
- dwarf::DW_FORM_ref4, AbsDIE);
- else {
- VariableCU->addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
- Name);
- VariableCU->addSourceLine(VariableDie, DV->getVariable());
-
- // Add variable type.
- VariableCU->addType(VariableDie, DV->getType());
- }
-
- if (Tag == dwarf::DW_TAG_formal_parameter && DV->getType().isArtificial())
- VariableCU->addUInt(VariableDie, dwarf::DW_AT_artificial,
- dwarf::DW_FORM_flag, 1);
- else if (DIVariable(DV->getVariable()).isArtificial())
- VariableCU->addUInt(VariableDie, dwarf::DW_AT_artificial,
- dwarf::DW_FORM_flag, 1);
-
- if (Scope->isAbstractScope()) {
- DV->setDIE(VariableDie);
- return VariableDie;
- }
-
- // Add variable address.
-
- unsigned Offset = DV->getDotDebugLocOffset();
- if (Offset != ~0U) {
- VariableCU->addLabel(VariableDie, dwarf::DW_AT_location, dwarf::DW_FORM_data4,
- Asm->GetTempSymbol("debug_loc", Offset));
- DV->setDIE(VariableDie);
- UseDotDebugLocEntry.insert(VariableDie);
- return VariableDie;
- }
-
- // Check if variable is described by a DBG_VALUE instruction.
- DenseMap<const DbgVariable *, const MachineInstr *>::iterator DVI =
- DbgVariableToDbgInstMap.find(DV);
- if (DVI != DbgVariableToDbgInstMap.end()) {
- const MachineInstr *DVInsn = DVI->second;
- bool updated = false;
- // FIXME : Handle getNumOperands != 3
- if (DVInsn->getNumOperands() == 3) {
- if (DVInsn->getOperand(0).isReg()) {
- const MachineOperand RegOp = DVInsn->getOperand(0);
- const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
- if (DVInsn->getOperand(1).isImm() &&
- TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) {
- unsigned FrameReg = 0;
- const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
- int Offset =
- TFI->getFrameIndexReference(*Asm->MF,
- DVInsn->getOperand(1).getImm(),
- FrameReg);
- MachineLocation Location(FrameReg, Offset);
- VariableCU->addVariableAddress(DV, VariableDie, Location);
-
- } else if (RegOp.getReg())
- VariableCU->addVariableAddress(DV, VariableDie,
- MachineLocation(RegOp.getReg()));
- updated = true;
- }
- else if (DVInsn->getOperand(0).isImm())
- updated =
- VariableCU->addConstantValue(VariableDie, DVInsn->getOperand(0),
- DV->getType());
- else if (DVInsn->getOperand(0).isFPImm())
- updated =
- VariableCU->addConstantFPValue(VariableDie, DVInsn->getOperand(0));
- else if (DVInsn->getOperand(0).isCImm())
- updated =
- VariableCU->addConstantValue(VariableDie,
- DVInsn->getOperand(0).getCImm(),
- isUnsignedDIType(DV->getType()));
- } else {
- VariableCU->addVariableAddress(DV, VariableDie,
- Asm->getDebugValueLocation(DVInsn));
- updated = true;
- }
- if (!updated) {
- // If variableDie is not updated then DBG_VALUE instruction does not
- // have valid variable info.
- delete VariableDie;
- return NULL;
- }
- DV->setDIE(VariableDie);
- return VariableDie;
- }
-
- // .. else use frame index, if available.
- int FI = 0;
- if (findVariableFrameIndex(DV, &FI)) {
- unsigned FrameReg = 0;
- const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
- int Offset =
- TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
- MachineLocation Location(FrameReg, Offset);
- VariableCU->addVariableAddress(DV, VariableDie, Location);
- }
-
- DV->setDIE(VariableDie);
- return VariableDie;
-
-}
-
/// constructScopeDIE - Construct a DIE for this scope.
-DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
+DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
if (!Scope || !Scope->getScopeNode())
return NULL;
SmallVector <DIE *, 8> Children;
// Collect arguments for current function.
- if (Scope == CurrentFnDbgScope)
+ if (LScopes.isCurrentFunctionScope(Scope))
for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i)
if (DbgVariable *ArgDV = CurrentFnArguments[i])
- if (DIE *Arg = constructVariableDIE(ArgDV, Scope))
+ if (DIE *Arg =
+ TheCU->constructVariableDIE(ArgDV, Scope->isAbstractScope()))
Children.push_back(Arg);
- // Collect lexical scope childrens first.
- const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables();
+ // Collect lexical scope children first.
+ const SmallVector<DbgVariable *, 8> &Variables = ScopeVariables.lookup(Scope);
for (unsigned i = 0, N = Variables.size(); i < N; ++i)
- if (DIE *Variable = constructVariableDIE(Variables[i], Scope))
+ if (DIE *Variable =
+ TheCU->constructVariableDIE(Variables[i], Scope->isAbstractScope()))
Children.push_back(Variable);
- const SmallVector<DbgScope *, 4> &Scopes = Scope->getScopes();
+ const SmallVector<LexicalScope *, 4> &Scopes = Scope->getChildren();
for (unsigned j = 0, M = Scopes.size(); j < M; ++j)
- if (DIE *Nested = constructScopeDIE(Scopes[j]))
+ if (DIE *Nested = constructScopeDIE(TheCU, Scopes[j]))
Children.push_back(Nested);
DIScope DS(Scope->getScopeNode());
DIE *ScopeDIE = NULL;
if (Scope->getInlinedAt())
- ScopeDIE = constructInlinedScopeDIE(Scope);
+ ScopeDIE = constructInlinedScopeDIE(TheCU, Scope);
else if (DS.isSubprogram()) {
ProcessedSPNodes.insert(DS);
if (Scope->isAbstractScope()) {
- ScopeDIE = getCompileUnit(DS)->getDIE(DS);
+ ScopeDIE = TheCU->getDIE(DS);
// Note down abstract DIE.
if (ScopeDIE)
AbstractSPDies.insert(std::make_pair(DS, ScopeDIE));
}
else
- ScopeDIE = updateSubprogramScopeDIE(DS);
+ ScopeDIE = updateSubprogramScopeDIE(TheCU, DS);
}
else {
// There is no need to emit empty lexical block DIE.
if (Children.empty())
return NULL;
- ScopeDIE = constructLexicalScopeDIE(Scope);
+ ScopeDIE = constructLexicalScopeDIE(TheCU, Scope);
}
if (!ScopeDIE) return NULL;
@@ -823,7 +426,7 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
ScopeDIE->addChild(*I);
if (DS.isSubprogram())
- getCompileUnit(DS)->addPubTypes(DISubprogram(DS));
+ TheCU->addPubTypes(DISubprogram(DS));
return ScopeDIE;
}
@@ -862,7 +465,7 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName,
/// constructCompileUnit - Create new CompileUnit for the given
/// metadata node with tag DW_TAG_compile_unit.
-void DwarfDebug::constructCompileUnit(const MDNode *N) {
+CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
DICompileUnit DIUnit(N);
StringRef FN = DIUnit.getFilename();
StringRef Dir = DIUnit.getDirectory();
@@ -893,7 +496,8 @@ void DwarfDebug::constructCompileUnit(const MDNode *N) {
StringRef Flags = DIUnit.getFlags();
if (!Flags.empty())
- NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags);
+ NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string,
+ Flags);
unsigned RVer = DIUnit.getRunTimeVersion();
if (RVer)
@@ -903,159 +507,19 @@ void DwarfDebug::constructCompileUnit(const MDNode *N) {
if (!FirstCU)
FirstCU = NewCU;
CUMap.insert(std::make_pair(N, NewCU));
-}
-
-/// getCompielUnit - Get CompileUnit DIE.
-CompileUnit *DwarfDebug::getCompileUnit(const MDNode *N) const {
- assert (N && "Invalid DwarfDebug::getCompileUnit argument!");
- DIDescriptor D(N);
- const MDNode *CUNode = NULL;
- if (D.isCompileUnit())
- CUNode = N;
- else if (D.isSubprogram())
- CUNode = DISubprogram(N).getCompileUnit();
- else if (D.isType())
- CUNode = DIType(N).getCompileUnit();
- else if (D.isGlobalVariable())
- CUNode = DIGlobalVariable(N).getCompileUnit();
- else if (D.isVariable())
- CUNode = DIVariable(N).getCompileUnit();
- else if (D.isNameSpace())
- CUNode = DINameSpace(N).getCompileUnit();
- else if (D.isFile())
- CUNode = DIFile(N).getCompileUnit();
- else
- return FirstCU;
-
- DenseMap<const MDNode *, CompileUnit *>::const_iterator I
- = CUMap.find(CUNode);
- if (I == CUMap.end())
- return FirstCU;
- return I->second;
-}
-
-// Return const exprssion if value is a GEP to access merged global
-// constant. e.g.
-// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0)
-static const ConstantExpr *getMergedGlobalExpr(const Value *V) {
- const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V);
- if (!CE || CE->getNumOperands() != 3 ||
- CE->getOpcode() != Instruction::GetElementPtr)
- return NULL;
-
- // First operand points to a global value.
- if (!isa<GlobalValue>(CE->getOperand(0)))
- return NULL;
-
- // Second operand is zero.
- const ConstantInt *CI =
- dyn_cast_or_null<ConstantInt>(CE->getOperand(1));
- if (!CI || !CI->isZero())
- return NULL;
-
- // Third operand is offset.
- if (!isa<ConstantInt>(CE->getOperand(2)))
- return NULL;
-
- return CE;
-}
-
-/// constructGlobalVariableDIE - Construct global variable DIE.
-void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) {
- DIGlobalVariable GV(N);
-
- // If debug information is malformed then ignore it.
- if (GV.Verify() == false)
- return;
-
- // Check for pre-existence.
- CompileUnit *TheCU = getCompileUnit(N);
- if (TheCU->getDIE(GV))
- return;
-
- DIType GTy = GV.getType();
- DIE *VariableDIE = new DIE(GV.getTag());
-
- bool isGlobalVariable = GV.getGlobal() != NULL;
-
- // Add name.
- TheCU->addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string,
- GV.getDisplayName());
- StringRef LinkageName = GV.getLinkageName();
- if (!LinkageName.empty() && isGlobalVariable)
- TheCU->addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name,
- dwarf::DW_FORM_string,
- getRealLinkageName(LinkageName));
- // Add type.
- TheCU->addType(VariableDIE, GTy);
-
- // Add scoping info.
- if (!GV.isLocalToUnit()) {
- TheCU->addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
- // Expose as global.
- TheCU->addGlobal(GV.getName(), VariableDIE);
- }
- // Add line number info.
- TheCU->addSourceLine(VariableDIE, GV);
- // Add to map.
- TheCU->insertDIE(N, VariableDIE);
- // Add to context owner.
- DIDescriptor GVContext = GV.getContext();
- TheCU->addToContextOwner(VariableDIE, GVContext);
- // Add location.
- if (isGlobalVariable) {
- DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
- TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
- TheCU->addLabel(Block, 0, dwarf::DW_FORM_udata,
- Asm->Mang->getSymbol(GV.getGlobal()));
- // Do not create specification DIE if context is either compile unit
- // or a subprogram.
- if (GV.isDefinition() && !GVContext.isCompileUnit() &&
- !GVContext.isFile() && !isSubprogramContext(GVContext)) {
- // Create specification DIE.
- DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
- TheCU->addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
- dwarf::DW_FORM_ref4, VariableDIE);
- TheCU->addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
- TheCU->addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
- TheCU->addDie(VariableSpecDIE);
- } else {
- TheCU->addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
- }
- } else if (const ConstantInt *CI =
- dyn_cast_or_null<ConstantInt>(GV.getConstant()))
- TheCU->addConstantValue(VariableDIE, CI, isUnsignedDIType(GTy));
- else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) {
- // GV is a merged global.
- DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
- TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
- TheCU->addLabel(Block, 0, dwarf::DW_FORM_udata,
- Asm->Mang->getSymbol(cast<GlobalValue>(CE->getOperand(0))));
- ConstantInt *CII = cast<ConstantInt>(CE->getOperand(2));
- TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
- TheCU->addUInt(Block, 0, dwarf::DW_FORM_udata, CII->getZExtValue());
- TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
- TheCU->addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
- }
-
- return;
+ return NewCU;
}
/// construct SubprogramDIE - Construct subprogram DIE.
-void DwarfDebug::constructSubprogramDIE(const MDNode *N) {
+void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU,
+ const MDNode *N) {
DISubprogram SP(N);
-
- // Check for pre-existence.
- CompileUnit *TheCU = getCompileUnit(N);
- if (TheCU->getDIE(N))
- return;
-
if (!SP.isDefinition())
// This is a method declaration which will be handled while constructing
// class type.
return;
- DIE *SubprogramDie = createSubprogramDIE(SP);
+ DIE *SubprogramDie = TheCU->getOrCreateSubprogramDIE(SP);
// Add to map.
TheCU->insertDIE(N, SubprogramDie);
@@ -1066,71 +530,115 @@ void DwarfDebug::constructSubprogramDIE(const MDNode *N) {
// Expose as global.
TheCU->addGlobal(SP.getName(), SubprogramDie);
+ SPMap[N] = TheCU;
return;
}
+/// collectInfoFromNamedMDNodes - Collect debug info from named mdnodes such
+/// as llvm.dbg.enum and llvm.dbg.ty
+void DwarfDebug::collectInfoFromNamedMDNodes(Module *M) {
+ if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.sp"))
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ const MDNode *N = NMD->getOperand(i);
+ if (CompileUnit *CU = CUMap.lookup(DISubprogram(N).getCompileUnit()))
+ constructSubprogramDIE(CU, N);
+ }
+
+ if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv"))
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ const MDNode *N = NMD->getOperand(i);
+ if (CompileUnit *CU = CUMap.lookup(DIGlobalVariable(N).getCompileUnit()))
+ CU->createGlobalVariableDIE(N);
+ }
+
+ if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.enum"))
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ DIType Ty(NMD->getOperand(i));
+ if (CompileUnit *CU = CUMap.lookup(Ty.getCompileUnit()))
+ CU->getOrCreateTypeDIE(Ty);
+ }
+
+ if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.ty"))
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ DIType Ty(NMD->getOperand(i));
+ if (CompileUnit *CU = CUMap.lookup(Ty.getCompileUnit()))
+ CU->getOrCreateTypeDIE(Ty);
+ }
+}
+
+/// collectLegacyDebugInfo - Collect debug info using DebugInfoFinder.
+/// FIXME - Remove this when dragon-egg and llvm-gcc switch to DIBuilder.
+bool DwarfDebug::collectLegacyDebugInfo(Module *M) {
+ DebugInfoFinder DbgFinder;
+ DbgFinder.processModule(*M);
+
+ bool HasDebugInfo = false;
+ // Scan all the compile-units to see if there are any marked as the main
+ // unit. If not, we do not generate debug info.
+ for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
+ E = DbgFinder.compile_unit_end(); I != E; ++I) {
+ if (DICompileUnit(*I).isMain()) {
+ HasDebugInfo = true;
+ break;
+ }
+ }
+ if (!HasDebugInfo) return false;
+
+ // Create all the compile unit DIEs.
+ for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
+ E = DbgFinder.compile_unit_end(); I != E; ++I)
+ constructCompileUnit(*I);
+
+ // Create DIEs for each global variable.
+ for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(),
+ E = DbgFinder.global_variable_end(); I != E; ++I) {
+ const MDNode *N = *I;
+ if (CompileUnit *CU = CUMap.lookup(DIGlobalVariable(N).getCompileUnit()))
+ CU->createGlobalVariableDIE(N);
+ }
+
+ // Create DIEs for each subprogram.
+ for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
+ E = DbgFinder.subprogram_end(); I != E; ++I) {
+ const MDNode *N = *I;
+ if (CompileUnit *CU = CUMap.lookup(DISubprogram(N).getCompileUnit()))
+ constructSubprogramDIE(CU, N);
+ }
+
+ return HasDebugInfo;
+}
+
/// beginModule - Emit all Dwarf sections that should come prior to the
/// content. Create global DIEs and emit initial debug info sections.
-/// This is inovked by the target AsmPrinter.
+/// This is invoked by the target AsmPrinter.
void DwarfDebug::beginModule(Module *M) {
if (DisableDebugInfoPrinting)
return;
- // If module has named metadata anchors then use them, otherwise scan the module
- // using debug info finder to collect debug info.
+ // If module has named metadata anchors then use them, otherwise scan the
+ // module using debug info finder to collect debug info.
NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
if (CU_Nodes) {
-
- NamedMDNode *GV_Nodes = M->getNamedMetadata("llvm.dbg.gv");
- NamedMDNode *SP_Nodes = M->getNamedMetadata("llvm.dbg.sp");
- if (!GV_Nodes && !SP_Nodes)
- // If there are not any global variables or any functions then
- // there is not any debug info in this module.
- return;
-
- for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i)
- constructCompileUnit(CU_Nodes->getOperand(i));
-
- if (GV_Nodes)
- for (unsigned i = 0, e = GV_Nodes->getNumOperands(); i != e; ++i)
- constructGlobalVariableDIE(GV_Nodes->getOperand(i));
-
- if (SP_Nodes)
- for (unsigned i = 0, e = SP_Nodes->getNumOperands(); i != e; ++i)
- constructSubprogramDIE(SP_Nodes->getOperand(i));
-
- } else {
-
- DebugInfoFinder DbgFinder;
- DbgFinder.processModule(*M);
-
- bool HasDebugInfo = false;
- // Scan all the compile-units to see if there are any marked as the main unit.
- // if not, we do not generate debug info.
- for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
- E = DbgFinder.compile_unit_end(); I != E; ++I) {
- if (DICompileUnit(*I).isMain()) {
- HasDebugInfo = true;
- break;
- }
+ for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+ DICompileUnit CUNode(CU_Nodes->getOperand(i));
+ CompileUnit *CU = constructCompileUnit(CUNode);
+ DIArray GVs = CUNode.getGlobalVariables();
+ for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i)
+ CU->createGlobalVariableDIE(GVs.getElement(i));
+ DIArray SPs = CUNode.getSubprograms();
+ for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i)
+ constructSubprogramDIE(CU, SPs.getElement(i));
+ DIArray EnumTypes = CUNode.getEnumTypes();
+ for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i)
+ CU->getOrCreateTypeDIE(EnumTypes.getElement(i));
+ DIArray RetainedTypes = CUNode.getRetainedTypes();
+ for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i)
+ CU->getOrCreateTypeDIE(RetainedTypes.getElement(i));
}
- if (!HasDebugInfo) return;
-
- // Create all the compile unit DIEs.
- for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
- E = DbgFinder.compile_unit_end(); I != E; ++I)
- constructCompileUnit(*I);
-
- // Create DIEs for each global variable.
- for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(),
- E = DbgFinder.global_variable_end(); I != E; ++I)
- constructGlobalVariableDIE(*I);
-
- // Create DIEs for each subprogram.
- for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
- E = DbgFinder.subprogram_end(); I != E; ++I)
- constructSubprogramDIE(*I);
- }
+ } else if (!collectLegacyDebugInfo(M))
+ return;
+
+ collectInfoFromNamedMDNodes(M);
// Tell MMI that we have debug info.
MMI->setDebugInfoAvailability(true);
@@ -1138,19 +646,6 @@ void DwarfDebug::beginModule(Module *M) {
// Emit initial sections.
EmitSectionLabels();
- //getOrCreateTypeDIE
- if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.enum"))
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- DIType Ty(NMD->getOperand(i));
- getCompileUnit(Ty)->getOrCreateTypeDIE(Ty);
- }
-
- if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.ty"))
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- DIType Ty(NMD->getOperand(i));
- getCompileUnit(Ty)->getOrCreateTypeDIE(Ty);
- }
-
// Prime section data.
SectionMap.insert(Asm->getObjFileLowering().getTextSection());
}
@@ -1160,38 +655,38 @@ void DwarfDebug::beginModule(Module *M) {
void DwarfDebug::endModule() {
if (!FirstCU) return;
const Module *M = MMI->getModule();
- DenseMap<const MDNode *, DbgScope *> DeadFnScopeMap;
- if (NamedMDNode *AllSPs = M->getNamedMetadata("llvm.dbg.sp")) {
- for (unsigned SI = 0, SE = AllSPs->getNumOperands(); SI != SE; ++SI) {
- if (ProcessedSPNodes.count(AllSPs->getOperand(SI)) != 0) continue;
- DISubprogram SP(AllSPs->getOperand(SI));
- if (!SP.Verify()) continue;
-
- // Collect info for variables that were optimized out.
- if (!SP.isDefinition()) continue;
- StringRef FName = SP.getLinkageName();
- if (FName.empty())
- FName = SP.getName();
- NamedMDNode *NMD = getFnSpecificMDNode(*(MMI->getModule()), FName);
- if (!NMD) continue;
- unsigned E = NMD->getNumOperands();
- if (!E) continue;
- DbgScope *Scope = new DbgScope(NULL, DIDescriptor(SP), NULL);
- DeadFnScopeMap[SP] = Scope;
- for (unsigned I = 0; I != E; ++I) {
- DIVariable DV(NMD->getOperand(I));
- if (!DV.Verify()) continue;
- Scope->addVariable(new DbgVariable(DV));
- }
+ DenseMap<const MDNode *, LexicalScope *> DeadFnScopeMap;
- // Construct subprogram DIE and add variables DIEs.
- constructSubprogramDIE(SP);
- DIE *ScopeDIE = getCompileUnit(SP)->getDIE(SP);
- const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables();
- for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
- DIE *VariableDIE = constructVariableDIE(Variables[i], Scope);
- if (VariableDIE)
- ScopeDIE->addChild(VariableDIE);
+ // Collect info for variables that were optimized out.
+ if (NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu")) {
+ for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+ DICompileUnit TheCU(CU_Nodes->getOperand(i));
+ DIArray Subprograms = TheCU.getSubprograms();
+ for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) {
+ DISubprogram SP(Subprograms.getElement(i));
+ if (ProcessedSPNodes.count(SP) != 0) continue;
+ if (!SP.Verify()) continue;
+ if (!SP.isDefinition()) continue;
+ DIArray Variables = SP.getVariables();
+ if (Variables.getNumElements() == 0) continue;
+
+ LexicalScope *Scope =
+ new LexicalScope(NULL, DIDescriptor(SP), NULL, false);
+ DeadFnScopeMap[SP] = Scope;
+
+ // Construct subprogram DIE and add variables DIEs.
+ CompileUnit *SPCU = CUMap.lookup(TheCU);
+ assert (SPCU && "Unable to find Compile Unit!");
+ constructSubprogramDIE(SPCU, SP);
+ DIE *ScopeDIE = SPCU->getDIE(SP);
+ for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) {
+ DIVariable DV(Variables.getElement(vi));
+ if (!DV.Verify()) continue;
+ DbgVariable *NewVar = new DbgVariable(DV, NULL);
+ if (DIE *VariableDIE =
+ SPCU->constructVariableDIE(NewVar, Scope->isAbstractScope()))
+ ScopeDIE->addChild(VariableDIE);
+ }
}
}
}
@@ -1203,15 +698,12 @@ void DwarfDebug::endModule() {
FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
}
- for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(),
- CE = ContainingTypeMap.end(); CI != CE; ++CI) {
- DIE *SPDie = CI->first;
- const MDNode *N = dyn_cast_or_null<MDNode>(CI->second);
- if (!N) continue;
- DIE *NDie = getCompileUnit(N)->getDIE(N);
- if (!NDie) continue;
- getCompileUnit(N)->addDIEEntry(SPDie, dwarf::DW_AT_containing_type,
- dwarf::DW_FORM_ref4, NDie);
+ // Emit DW_AT_containing_type attribute to connect types with their
+ // vtable holding type.
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator CUI = CUMap.begin(),
+ CUE = CUMap.end(); CUI != CUE; ++CUI) {
+ CompileUnit *TheCU = CUI->second;
+ TheCU->constructContainingTypeDIEs();
}
// Standard sections final addresses.
@@ -1261,6 +753,7 @@ void DwarfDebug::endModule() {
// clean up.
DeleteContainerSeconds(DeadFnScopeMap);
+ SPMap.clear();
for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
E = CUMap.end(); I != E; ++I)
delete I->second;
@@ -1268,29 +761,30 @@ void DwarfDebug::endModule() {
}
/// findAbstractVariable - Find abstract variable, if any, associated with Var.
-DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var,
+DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV,
DebugLoc ScopeLoc) {
-
+ LLVMContext &Ctx = DV->getContext();
+ // More then one inlined variable corresponds to one abstract variable.
+ DIVariable Var = cleanseInlinedVariable(DV, Ctx);
DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var);
if (AbsDbgVariable)
return AbsDbgVariable;
- LLVMContext &Ctx = Var->getContext();
- DbgScope *Scope = AbstractScopes.lookup(ScopeLoc.getScope(Ctx));
+ LexicalScope *Scope = LScopes.findAbstractScope(ScopeLoc.getScope(Ctx));
if (!Scope)
return NULL;
- AbsDbgVariable = new DbgVariable(Var);
- Scope->addVariable(AbsDbgVariable);
+ AbsDbgVariable = new DbgVariable(Var, NULL);
+ addScopeVariable(Scope, AbsDbgVariable);
AbstractVariables[Var] = AbsDbgVariable;
return AbsDbgVariable;
}
-/// addCurrentFnArgument - If Var is an current function argument that add
-/// it in CurrentFnArguments list.
+/// addCurrentFnArgument - If Var is a current function argument then add
+/// it to CurrentFnArguments list.
bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF,
- DbgVariable *Var, DbgScope *Scope) {
- if (Scope != CurrentFnDbgScope)
+ DbgVariable *Var, LexicalScope *Scope) {
+ if (!LScopes.isCurrentFunctionScope(Scope))
return false;
DIVariable DV = Var->getVariable();
if (DV.getTag() != dwarf::DW_TAG_arg_variable)
@@ -1313,7 +807,7 @@ bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF,
/// collectVariableInfoFromMMITable - Collect variable information from
/// side table maintained by MMI.
void
-DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF,
+DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF,
SmallPtrSet<const MDNode *, 16> &Processed) {
MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo();
for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(),
@@ -1324,21 +818,19 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF,
DIVariable DV(Var);
const std::pair<unsigned, DebugLoc> &VP = VI->second;
- DbgScope *Scope = findDbgScope(VP.second);
+ LexicalScope *Scope = LScopes.findLexicalScope(VP.second);
// If variable scope is not found then skip this variable.
if (Scope == 0)
continue;
DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.second);
- DbgVariable *RegVar = new DbgVariable(DV);
- recordVariableFrameIndex(RegVar, VP.first);
+ DbgVariable *RegVar = new DbgVariable(DV, AbsDbgVariable);
+ RegVar->setFrameIndex(VP.first);
if (!addCurrentFnArgument(MF, RegVar, Scope))
- Scope->addVariable(RegVar);
- if (AbsDbgVariable) {
- recordVariableFrameIndex(AbsDbgVariable, VP.first);
- VarToAbstractVarMap[RegVar] = AbsDbgVariable;
- }
+ addScopeVariable(Scope, RegVar);
+ if (AbsDbgVariable)
+ AbsDbgVariable->setFrameIndex(VP.first);
}
}
@@ -1351,7 +843,7 @@ static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0;
}
-/// getDebugLocEntry - Get .debug_loc entry for the instraction range starting
+/// getDebugLocEntry - Get .debug_loc entry for the instruction range starting
/// at MI.
static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm,
const MCSymbol *FLabel,
@@ -1379,7 +871,7 @@ static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm,
return DotDebugLocEntry();
}
-/// collectVariableInfo - Populate DbgScope entries with variables' info.
+/// collectVariableInfo - Find variables for each lexical scope.
void
DwarfDebug::collectVariableInfo(const MachineFunction *MF,
SmallPtrSet<const MDNode *, 16> &Processed) {
@@ -1402,30 +894,37 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
const MachineInstr *MInsn = History.front();
DIVariable DV(Var);
- DbgScope *Scope = NULL;
+ LexicalScope *Scope = NULL;
if (DV.getTag() == dwarf::DW_TAG_arg_variable &&
DISubprogram(DV.getContext()).describes(MF->getFunction()))
- Scope = CurrentFnDbgScope;
- else
- Scope = findDbgScope(MInsn->getDebugLoc());
+ Scope = LScopes.getCurrentFunctionScope();
+ else {
+ if (DV.getVersion() <= LLVMDebugVersion9)
+ Scope = LScopes.findLexicalScope(MInsn->getDebugLoc());
+ else {
+ if (MDNode *IA = DV.getInlinedAt())
+ Scope = LScopes.findInlinedScope(DebugLoc::getFromDILocation(IA));
+ else
+ Scope = LScopes.findLexicalScope(cast<MDNode>(DV->getOperand(1)));
+ }
+ }
// If variable scope is not found then skip this variable.
if (!Scope)
continue;
Processed.insert(DV);
assert(MInsn->isDebugValue() && "History must begin with debug value");
- DbgVariable *RegVar = new DbgVariable(DV);
+ DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc());
+ DbgVariable *RegVar = new DbgVariable(DV, AbsVar);
if (!addCurrentFnArgument(MF, RegVar, Scope))
- Scope->addVariable(RegVar);
- if (DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc())) {
- DbgVariableToDbgInstMap[AbsVar] = MInsn;
- VarToAbstractVarMap[RegVar] = AbsVar;
- }
+ addScopeVariable(Scope, RegVar);
+ if (AbsVar)
+ AbsVar->setMInsn(MInsn);
// Simple ranges that are fully coalesced.
if (History.size() <= 1 || (History.size() == 2 &&
MInsn->isIdenticalTo(History.back()))) {
- DbgVariableToDbgInstMap[RegVar] = MInsn;
+ RegVar->setMInsn(MInsn);
continue;
}
@@ -1471,16 +970,14 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
}
// Collect info for variables that were optimized out.
- const Function *F = MF->getFunction();
- if (NamedMDNode *NMD = getFnSpecificMDNode(*(F->getParent()), F->getName())) {
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- DIVariable DV(cast<MDNode>(NMD->getOperand(i)));
- if (!DV || !Processed.insert(DV))
- continue;
- DbgScope *Scope = DbgScopeMap.lookup(DV.getContext());
- if (Scope)
- Scope->addVariable(new DbgVariable(DV));
- }
+ LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
+ DIArray Variables = DISubprogram(FnScope->getScopeNode()).getVariables();
+ for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) {
+ DIVariable DV(Variables.getElement(i));
+ if (!DV || !DV.Verify() || !Processed.insert(DV))
+ continue;
+ if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext()))
+ addScopeVariable(Scope, new DbgVariable(DV, NULL));
}
}
@@ -1561,237 +1058,33 @@ void DwarfDebug::endInstruction(const MachineInstr *MI) {
I->second = PrevLabel;
}
-/// getOrCreateDbgScope - Create DbgScope for the scope.
-DbgScope *DwarfDebug::getOrCreateDbgScope(DebugLoc DL) {
- LLVMContext &Ctx = Asm->MF->getFunction()->getContext();
- MDNode *Scope = NULL;
- MDNode *InlinedAt = NULL;
- DL.getScopeAndInlinedAt(Scope, InlinedAt, Ctx);
-
- if (!InlinedAt) {
- DbgScope *WScope = DbgScopeMap.lookup(Scope);
- if (WScope)
- return WScope;
- WScope = new DbgScope(NULL, DIDescriptor(Scope), NULL);
- DbgScopeMap.insert(std::make_pair(Scope, WScope));
- if (DIDescriptor(Scope).isLexicalBlock()) {
- DbgScope *Parent =
- getOrCreateDbgScope(DebugLoc::getFromDILexicalBlock(Scope));
- WScope->setParent(Parent);
- Parent->addScope(WScope);
- } else if (DIDescriptor(Scope).isSubprogram()
- && DISubprogram(Scope).describes(Asm->MF->getFunction()))
- CurrentFnDbgScope = WScope;
-
- return WScope;
- }
-
- getOrCreateAbstractScope(Scope);
- DbgScope *WScope = DbgScopeMap.lookup(InlinedAt);
- if (WScope)
- return WScope;
-
- WScope = new DbgScope(NULL, DIDescriptor(Scope), InlinedAt);
- DbgScopeMap.insert(std::make_pair(InlinedAt, WScope));
- InlinedDbgScopeMap[DebugLoc::getFromDILocation(InlinedAt)] = WScope;
- DbgScope *Parent =
- getOrCreateDbgScope(DebugLoc::getFromDILocation(InlinedAt));
- WScope->setParent(Parent);
- Parent->addScope(WScope);
- return WScope;
-}
-
-/// calculateDominanceGraph - Calculate dominance graph for DbgScope
-/// hierarchy.
-static void calculateDominanceGraph(DbgScope *Scope) {
- assert (Scope && "Unable to calculate scop edominance graph!");
- SmallVector<DbgScope *, 4> WorkStack;
- WorkStack.push_back(Scope);
- unsigned Counter = 0;
- while (!WorkStack.empty()) {
- DbgScope *WS = WorkStack.back();
- const SmallVector<DbgScope *, 4> &Children = WS->getScopes();
- bool visitedChildren = false;
- for (SmallVector<DbgScope *, 4>::const_iterator SI = Children.begin(),
- SE = Children.end(); SI != SE; ++SI) {
- DbgScope *ChildScope = *SI;
- if (!ChildScope->getDFSOut()) {
- WorkStack.push_back(ChildScope);
- visitedChildren = true;
- ChildScope->setDFSIn(++Counter);
- break;
- }
- }
- if (!visitedChildren) {
- WorkStack.pop_back();
- WS->setDFSOut(++Counter);
- }
- }
-}
-
-/// printDbgScopeInfo - Print DbgScope info for each machine instruction.
-static
-void printDbgScopeInfo(const MachineFunction *MF,
- DenseMap<const MachineInstr *, DbgScope *> &MI2ScopeMap)
-{
-#ifndef NDEBUG
- LLVMContext &Ctx = MF->getFunction()->getContext();
- unsigned PrevDFSIn = 0;
- for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
- I != E; ++I) {
- for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
- II != IE; ++II) {
- const MachineInstr *MInsn = II;
- MDNode *Scope = NULL;
- MDNode *InlinedAt = NULL;
-
- // Check if instruction has valid location information.
- DebugLoc MIDL = MInsn->getDebugLoc();
- if (!MIDL.isUnknown()) {
- MIDL.getScopeAndInlinedAt(Scope, InlinedAt, Ctx);
- dbgs() << " [ ";
- if (InlinedAt)
- dbgs() << "*";
- DenseMap<const MachineInstr *, DbgScope *>::iterator DI =
- MI2ScopeMap.find(MInsn);
- if (DI != MI2ScopeMap.end()) {
- DbgScope *S = DI->second;
- dbgs() << S->getDFSIn();
- PrevDFSIn = S->getDFSIn();
- } else
- dbgs() << PrevDFSIn;
- } else
- dbgs() << " [ x" << PrevDFSIn;
- dbgs() << " ]";
- MInsn->dump();
- }
- dbgs() << "\n";
- }
-#endif
-}
-/// extractScopeInformation - Scan machine instructions in this function
-/// and collect DbgScopes. Return true, if at least one scope was found.
-bool DwarfDebug::extractScopeInformation() {
- // If scope information was extracted using .dbg intrinsics then there is not
- // any need to extract these information by scanning each instruction.
- if (!DbgScopeMap.empty())
- return false;
-
- // Scan each instruction and create scopes. First build working set of scopes.
- SmallVector<DbgRange, 4> MIRanges;
- DenseMap<const MachineInstr *, DbgScope *> MI2ScopeMap;
- DebugLoc PrevDL;
- const MachineInstr *RangeBeginMI = NULL;
- const MachineInstr *PrevMI = NULL;
- for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end();
- I != E; ++I) {
- for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
- II != IE; ++II) {
- const MachineInstr *MInsn = II;
-
- // Check if instruction has valid location information.
- const DebugLoc MIDL = MInsn->getDebugLoc();
- if (MIDL.isUnknown()) {
- PrevMI = MInsn;
- continue;
- }
-
- // If scope has not changed then skip this instruction.
- if (MIDL == PrevDL) {
- PrevMI = MInsn;
- continue;
- }
-
- // Ignore DBG_VALUE. It does not contribute any instruction in output.
- if (MInsn->isDebugValue())
- continue;
-
- if (RangeBeginMI) {
- // If we have alread seen a beginning of a instruction range and
- // current instruction scope does not match scope of first instruction
- // in this range then create a new instruction range.
- DEBUG(dbgs() << "Creating new instruction range :\n");
- DEBUG(dbgs() << "Begin Range at " << *RangeBeginMI);
- DEBUG(dbgs() << "End Range at " << *PrevMI);
- DEBUG(dbgs() << "Next Range starting at " << *MInsn);
- DEBUG(dbgs() << "------------------------\n");
- DbgRange R(RangeBeginMI, PrevMI);
- MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevDL);
- MIRanges.push_back(R);
- }
-
- // This is a beginning of a new instruction range.
- RangeBeginMI = MInsn;
-
- // Reset previous markers.
- PrevMI = MInsn;
- PrevDL = MIDL;
- }
- }
-
- // Create last instruction range.
- if (RangeBeginMI && PrevMI && !PrevDL.isUnknown()) {
- DbgRange R(RangeBeginMI, PrevMI);
- MIRanges.push_back(R);
- MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevDL);
- }
-
- if (!CurrentFnDbgScope)
- return false;
-
- calculateDominanceGraph(CurrentFnDbgScope);
- if (PrintDbgScope)
- printDbgScopeInfo(Asm->MF, MI2ScopeMap);
-
- // Find ranges of instructions covered by each DbgScope;
- DbgScope *PrevDbgScope = NULL;
- for (SmallVector<DbgRange, 4>::const_iterator RI = MIRanges.begin(),
- RE = MIRanges.end(); RI != RE; ++RI) {
- const DbgRange &R = *RI;
- DbgScope *S = MI2ScopeMap.lookup(R.first);
- assert (S && "Lost DbgScope for a machine instruction!");
- if (PrevDbgScope && !PrevDbgScope->dominates(S))
- PrevDbgScope->closeInsnRange(S);
- S->openInsnRange(R.first);
- S->extendInsnRange(R.second);
- PrevDbgScope = S;
- }
-
- if (PrevDbgScope)
- PrevDbgScope->closeInsnRange();
-
- identifyScopeMarkers();
-
- return !DbgScopeMap.empty();
-}
-
/// identifyScopeMarkers() -
-/// Each DbgScope has first instruction and last instruction to mark beginning
-/// and end of a scope respectively. Create an inverse map that list scopes
-/// starts (and ends) with an instruction. One instruction may start (or end)
-/// multiple scopes. Ignore scopes that are not reachable.
+/// Each LexicalScope has first instruction and last instruction to mark
+/// beginning and end of a scope respectively. Create an inverse map that list
+/// scopes starts (and ends) with an instruction. One instruction may start (or
+/// end) multiple scopes. Ignore scopes that are not reachable.
void DwarfDebug::identifyScopeMarkers() {
- SmallVector<DbgScope *, 4> WorkList;
- WorkList.push_back(CurrentFnDbgScope);
+ SmallVector<LexicalScope *, 4> WorkList;
+ WorkList.push_back(LScopes.getCurrentFunctionScope());
while (!WorkList.empty()) {
- DbgScope *S = WorkList.pop_back_val();
+ LexicalScope *S = WorkList.pop_back_val();
- const SmallVector<DbgScope *, 4> &Children = S->getScopes();
+ const SmallVector<LexicalScope *, 4> &Children = S->getChildren();
if (!Children.empty())
- for (SmallVector<DbgScope *, 4>::const_iterator SI = Children.begin(),
+ for (SmallVector<LexicalScope *, 4>::const_iterator SI = Children.begin(),
SE = Children.end(); SI != SE; ++SI)
WorkList.push_back(*SI);
if (S->isAbstractScope())
continue;
- const SmallVector<DbgRange, 4> &Ranges = S->getRanges();
+ const SmallVector<InsnRange, 4> &Ranges = S->getRanges();
if (Ranges.empty())
continue;
- for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(),
+ for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
RE = Ranges.end(); RI != RE; ++RI) {
- assert(RI->first && "DbgRange does not have first instruction!");
- assert(RI->second && "DbgRange does not have second instruction!");
+ assert(RI->first && "InsnRange does not have first instruction!");
+ assert(RI->second && "InsnRange does not have second instruction!");
requestLabelBeforeInsn(RI->first);
requestLabelAfterInsn(RI->second);
}
@@ -1819,7 +1112,9 @@ static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) {
/// emitted immediately after the function entry point.
void DwarfDebug::beginFunction(const MachineFunction *MF) {
if (!MMI->hasDebugInfo()) return;
- if (!extractScopeInformation()) return;
+ LScopes.initialize(*MF);
+ if (LScopes.empty()) return;
+ identifyScopeMarkers();
FunctionBeginSym = Asm->GetTempSymbol("func_begin",
Asm->getFunctionNumber());
@@ -1953,7 +1248,8 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
const MachineInstr *Prev = History.back();
if (Prev->isDebugValue() && isDbgValueInDefinedReg(Prev)) {
const MachineBasicBlock *PrevMBB = Prev->getParent();
- MachineBasicBlock::const_iterator LastMI = PrevMBB->getLastNonDebugInstr();
+ MachineBasicBlock::const_iterator LastMI =
+ PrevMBB->getLastNonDebugInstr();
if (LastMI == PrevMBB->end())
// Drop DBG_VALUE for empty range.
History.pop_back();
@@ -1985,110 +1281,73 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
}
}
+void DwarfDebug::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
+// SmallVector<DbgVariable *, 8> &Vars = ScopeVariables.lookup(LS);
+ ScopeVariables[LS].push_back(Var);
+// Vars.push_back(Var);
+}
+
/// endFunction - Gather and emit post-function debug information.
///
void DwarfDebug::endFunction(const MachineFunction *MF) {
- if (!MMI->hasDebugInfo() || DbgScopeMap.empty()) return;
-
- if (CurrentFnDbgScope) {
+ if (!MMI->hasDebugInfo() || LScopes.empty()) return;
- // Define end label for subprogram.
- FunctionEndSym = Asm->GetTempSymbol("func_end",
- Asm->getFunctionNumber());
- // Assumes in correct section after the entry point.
- Asm->OutStreamer.EmitLabel(FunctionEndSym);
-
- SmallPtrSet<const MDNode *, 16> ProcessedVars;
- collectVariableInfo(MF, ProcessedVars);
-
- // Construct abstract scopes.
- for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(),
- AE = AbstractScopesList.end(); AI != AE; ++AI) {
- DISubprogram SP((*AI)->getScopeNode());
- if (SP.Verify()) {
- // Collect info for variables that were optimized out.
- StringRef FName = SP.getLinkageName();
- if (FName.empty())
- FName = SP.getName();
- if (NamedMDNode *NMD =
- getFnSpecificMDNode(*(MF->getFunction()->getParent()), FName)) {
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- DIVariable DV(cast<MDNode>(NMD->getOperand(i)));
- if (!DV || !ProcessedVars.insert(DV))
- continue;
- DbgScope *Scope = AbstractScopes.lookup(DV.getContext());
- if (Scope)
- Scope->addVariable(new DbgVariable(DV));
- }
- }
+ // Define end label for subprogram.
+ FunctionEndSym = Asm->GetTempSymbol("func_end",
+ Asm->getFunctionNumber());
+ // Assumes in correct section after the entry point.
+ Asm->OutStreamer.EmitLabel(FunctionEndSym);
+
+ SmallPtrSet<const MDNode *, 16> ProcessedVars;
+ collectVariableInfo(MF, ProcessedVars);
+
+ LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
+ CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
+ assert (TheCU && "Unable to find compile unit!");
+
+ // Construct abstract scopes.
+ ArrayRef<LexicalScope *> AList = LScopes.getAbstractScopesList();
+ for (unsigned i = 0, e = AList.size(); i != e; ++i) {
+ LexicalScope *AScope = AList[i];
+ DISubprogram SP(AScope->getScopeNode());
+ if (SP.Verify()) {
+ // Collect info for variables that were optimized out.
+ DIArray Variables = SP.getVariables();
+ for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) {
+ DIVariable DV(Variables.getElement(i));
+ if (!DV || !DV.Verify() || !ProcessedVars.insert(DV))
+ continue;
+ if (LexicalScope *Scope = LScopes.findAbstractScope(DV.getContext()))
+ addScopeVariable(Scope, new DbgVariable(DV, NULL));
}
- if (ProcessedSPNodes.count((*AI)->getScopeNode()) == 0)
- constructScopeDIE(*AI);
}
-
- DIE *CurFnDIE = constructScopeDIE(CurrentFnDbgScope);
-
- if (!DisableFramePointerElim(*MF))
- getCompileUnit(CurrentFnDbgScope->getScopeNode())->addUInt(CurFnDIE,
- dwarf::DW_AT_APPLE_omit_frame_ptr,
- dwarf::DW_FORM_flag, 1);
-
-
- DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(),
- MMI->getFrameMoves()));
+ if (ProcessedSPNodes.count(AScope->getScopeNode()) == 0)
+ constructScopeDIE(TheCU, AScope);
}
+
+ DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope);
+
+ if (!DisableFramePointerElim(*MF))
+ TheCU->addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr,
+ dwarf::DW_FORM_flag, 1);
+
+ DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(),
+ MMI->getFrameMoves()));
// Clear debug info
- CurrentFnDbgScope = NULL;
+ for (DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> >::iterator
+ I = ScopeVariables.begin(), E = ScopeVariables.end(); I != E; ++I)
+ DeleteContainerPointers(I->second);
+ ScopeVariables.clear();
DeleteContainerPointers(CurrentFnArguments);
- DbgVariableToFrameIndexMap.clear();
- VarToAbstractVarMap.clear();
- DbgVariableToDbgInstMap.clear();
- InlinedDbgScopeMap.clear();
- DeleteContainerSeconds(DbgScopeMap);
UserVariables.clear();
DbgValues.clear();
- DeleteContainerSeconds(AbstractScopes);
- AbstractScopesList.clear();
AbstractVariables.clear();
LabelsBeforeInsn.clear();
LabelsAfterInsn.clear();
PrevLabel = NULL;
}
-/// recordVariableFrameIndex - Record a variable's index.
-void DwarfDebug::recordVariableFrameIndex(const DbgVariable *V, int Index) {
- assert (V && "Invalid DbgVariable!");
- DbgVariableToFrameIndexMap[V] = Index;
-}
-
-/// findVariableFrameIndex - Return true if frame index for the variable
-/// is found. Update FI to hold value of the index.
-bool DwarfDebug::findVariableFrameIndex(const DbgVariable *V, int *FI) {
- assert (V && "Invalid DbgVariable!");
- DenseMap<const DbgVariable *, int>::iterator I =
- DbgVariableToFrameIndexMap.find(V);
- if (I == DbgVariableToFrameIndexMap.end())
- return false;
- *FI = I->second;
- return true;
-}
-
-/// findDbgScope - Find DbgScope for the debug loc.
-DbgScope *DwarfDebug::findDbgScope(DebugLoc DL) {
- if (DL.isUnknown())
- return NULL;
-
- DbgScope *Scope = NULL;
- LLVMContext &Ctx = Asm->MF->getFunction()->getContext();
- if (MDNode *IA = DL.getInlinedAt(Ctx))
- Scope = InlinedDbgScopeMap.lookup(DebugLoc::getFromDILocation(IA));
- else
- Scope = DbgScopeMap.lookup(DL.getScope(Ctx));
- return Scope;
-}
-
-
/// recordSourceLine - Register a source line with debug info. Returns the
/// unique label that was emitted and which provides correspondence to
/// the source line list.
@@ -2112,6 +1371,10 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
DISubprogram SP(S);
Fn = SP.getFilename();
Dir = SP.getDirectory();
+ } else if (Scope.isLexicalBlockFile()) {
+ DILexicalBlockFile DBF(S);
+ Fn = DBF.getFilename();
+ Dir = DBF.getDirectory();
} else if (Scope.isLexicalBlock()) {
DILexicalBlock DB(S);
Fn = DB.getFilename();
@@ -2121,8 +1384,7 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
Src = GetOrCreateSourceID(Fn, Dir);
}
- Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags,
- 0, 0, Fn);
+ Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags, 0, 0, Fn);
}
//===----------------------------------------------------------------------===//
@@ -2235,7 +1497,7 @@ void DwarfDebug::EmitSectionLabels() {
EmitSectionSym(Asm, TLOF.getDataSection());
}
-/// emitDIE - Recusively Emits a debug information entry.
+/// emitDIE - Recursively emits a debug information entry.
///
void DwarfDebug::emitDIE(DIE *Die) {
// Get the abbreviation for this DIE.
@@ -2290,10 +1552,9 @@ void DwarfDebug::emitDIE(DIE *Die) {
break;
}
case dwarf::DW_AT_location: {
- if (UseDotDebugLocEntry.count(Die) != 0) {
- DIELabel *L = cast<DIELabel>(Values[i]);
+ if (DIELabel *L = dyn_cast<DIELabel>(Values[i]))
Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4);
- } else
+ else
Values[i]->EmitValue(Asm, Form);
break;
}
@@ -2464,7 +1725,7 @@ void DwarfDebug::emitDebugPubNames() {
Asm->OutStreamer.AddComment("End Mark");
Asm->EmitInt32(0);
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end",
- TheCU->getID()));
+ TheCU->getID()));
}
}
@@ -2499,7 +1760,7 @@ void DwarfDebug::emitDebugPubTypes() {
for (StringMap<DIE*>::const_iterator
GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
const char *Name = GI->getKeyData();
- DIE * Entity = GI->second;
+ DIE *Entity = GI->second;
if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset");
Asm->EmitInt32(Entity->getOffset());
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index b245006..35653be 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -15,7 +15,8 @@
#define CODEGEN_ASMPRINTER_DWARFDEBUG_H__
#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/MC/MachineLocation.h"
#include "llvm/Analysis/DebugInfo.h"
#include "DIE.h"
#include "llvm/ADT/DenseMap.h"
@@ -30,7 +31,6 @@ namespace llvm {
class CompileUnit;
class DbgConcreteScope;
-class DbgScope;
class DbgVariable;
class MachineFrameInfo;
class MachineModuleInfo;
@@ -125,9 +125,14 @@ class DbgVariable {
DIVariable Var; // Variable Descriptor.
DIE *TheDIE; // Variable DIE.
unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries.
+ DbgVariable *AbsVar; // Corresponding Abstract variable, if any.
+ const MachineInstr *MInsn; // DBG_VALUE instruction of the variable.
+ int FrameIndex;
public:
// AbsVar may be NULL.
- DbgVariable(DIVariable V) : Var(V), TheDIE(0), DotDebugLocOffset(~0U) {}
+ DbgVariable(DIVariable V, DbgVariable *AV)
+ : Var(V), TheDIE(0), DotDebugLocOffset(~0U), AbsVar(AV), MInsn(0),
+ FrameIndex(~0) {}
// Accessors.
DIVariable getVariable() const { return Var; }
@@ -136,7 +141,27 @@ public:
void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; }
unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; }
StringRef getName() const { return Var.getName(); }
- unsigned getTag() const { return Var.getTag(); }
+ DbgVariable *getAbstractVariable() const { return AbsVar; }
+ const MachineInstr *getMInsn() const { return MInsn; }
+ void setMInsn(const MachineInstr *M) { MInsn = M; }
+ int getFrameIndex() const { return FrameIndex; }
+ void setFrameIndex(int FI) { FrameIndex = FI; }
+ // Translate tag to proper Dwarf tag.
+ unsigned getTag() const {
+ if (Var.getTag() == dwarf::DW_TAG_arg_variable)
+ return dwarf::DW_TAG_formal_parameter;
+
+ return dwarf::DW_TAG_variable;
+ }
+ /// isArtificial - Return true if DbgVariable is artificial.
+ bool isArtificial() const {
+ if (Var.isArtificial())
+ return true;
+ if (Var.getTag() == dwarf::DW_TAG_arg_variable
+ && getType().isArtificial())
+ return true;
+ return false;
+ }
bool variableHasComplexAddress() const {
assert(Var.Verify() && "Invalid complex DbgVariable!");
return Var.hasComplexAddress();
@@ -167,8 +192,13 @@ class DwarfDebug {
//
CompileUnit *FirstCU;
+
+ /// Maps MDNode with its corresponding CompileUnit.
DenseMap <const MDNode *, CompileUnit *> CUMap;
+ /// Maps subprogram MDNode with its corresponding CompileUnit.
+ DenseMap <const MDNode *, CompileUnit *> SPMap;
+
/// AbbreviationsSet - Used to uniquely define abbreviations.
///
FoldingSet<DIEAbbrev> AbbreviationsSet;
@@ -192,63 +222,27 @@ class DwarfDebug {
///
UniqueVector<const MCSection*> SectionMap;
- /// CurrentFnDbgScope - Top level scope for the current function.
- ///
- DbgScope *CurrentFnDbgScope;
-
/// CurrentFnArguments - List of Arguments (DbgValues) for current function.
SmallVector<DbgVariable *, 8> CurrentFnArguments;
- /// DbgScopeMap - Tracks the scopes in the current function. Owns the
- /// contained DbgScope*s.
- DenseMap<const MDNode *, DbgScope *> DbgScopeMap;
-
- /// InlinedDbgScopeMap - Tracks inlined function scopes in current function.
- DenseMap<DebugLoc, DbgScope *> InlinedDbgScopeMap;
-
- /// AbstractScopes - Tracks the abstract scopes a module. These scopes are
- /// not included DbgScopeMap. AbstractScopes owns its DbgScope*s.
- DenseMap<const MDNode *, DbgScope *> AbstractScopes;
+ LexicalScopes LScopes;
/// AbstractSPDies - Collection of abstract subprogram DIEs.
DenseMap<const MDNode *, DIE *> AbstractSPDies;
- /// AbstractScopesList - Tracks abstract scopes constructed while processing
- /// a function. This list is cleared during endFunction().
- SmallVector<DbgScope *, 4>AbstractScopesList;
+ /// ScopeVariables - Collection of dbg variables of a scope.
+ DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> > ScopeVariables;
- /// AbstractVariables - Collection on abstract variables. Owned by the
- /// DbgScopes in AbstractScopes.
+ /// AbstractVariables - Collection on abstract variables.
DenseMap<const MDNode *, DbgVariable *> AbstractVariables;
- /// DbgVariableToFrameIndexMap - Tracks frame index used to find
- /// variable's value.
- DenseMap<const DbgVariable *, int> DbgVariableToFrameIndexMap;
-
- /// DbgVariableToDbgInstMap - Maps DbgVariable to corresponding DBG_VALUE
- /// machine instruction.
- DenseMap<const DbgVariable *, const MachineInstr *> DbgVariableToDbgInstMap;
-
/// DotDebugLocEntries - Collection of DotDebugLocEntry.
SmallVector<DotDebugLocEntry, 4> DotDebugLocEntries;
- /// UseDotDebugLocEntry - DW_AT_location attributes for the DIEs in this set
- /// idetifies corresponding .debug_loc entry offset.
- SmallPtrSet<const DIE *, 4> UseDotDebugLocEntry;
-
- /// VarToAbstractVarMap - Maps DbgVariable with corresponding Abstract
- /// DbgVariable, if any.
- DenseMap<const DbgVariable *, const DbgVariable *> VarToAbstractVarMap;
-
/// InliendSubprogramDIEs - Collection of subprgram DIEs that are marked
/// (at the end of the module) as DW_AT_inline.
SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs;
- /// ContainingTypeMap - This map is used to keep track of subprogram DIEs that
- /// need DW_AT_containing_type attribute. This attribute points to a DIE that
- /// corresponds to the MDNode mapped with the subprogram DIE.
- DenseMap<DIE *, const MDNode *> ContainingTypeMap;
-
/// InlineInfo - Keep track of inlined functions and their location. This
/// information is used to populate debug_inlined section.
typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels;
@@ -316,10 +310,7 @@ private:
///
void assignAbbrevNumber(DIEAbbrev &Abbrev);
- /// getOrCreateDbgScope - Create DbgScope for the scope.
- DbgScope *getOrCreateDbgScope(DebugLoc DL);
-
- DbgScope *getOrCreateAbstractScope(const MDNode *N);
+ void addScopeVariable(LexicalScope *LS, DbgVariable *Var);
/// findAbstractVariable - Find abstract variable associated with Var.
DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc);
@@ -328,22 +319,22 @@ private:
/// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
/// If there are global variables in this scope then create and insert
/// DIEs for these variables.
- DIE *updateSubprogramScopeDIE(const MDNode *SPNode);
+ DIE *updateSubprogramScopeDIE(CompileUnit *SPCU, const MDNode *SPNode);
/// constructLexicalScope - Construct new DW_TAG_lexical_block
/// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
- DIE *constructLexicalScopeDIE(DbgScope *Scope);
+ DIE *constructLexicalScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
/// constructInlinedScopeDIE - This scope represents inlined body of
/// a function. Construct DIE to represent this concrete inlined copy
/// of the function.
- DIE *constructInlinedScopeDIE(DbgScope *Scope);
+ DIE *constructInlinedScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
/// constructVariableDIE - Construct a DIE for the given DbgVariable.
- DIE *constructVariableDIE(DbgVariable *DV, DbgScope *S);
+ DIE *constructVariableDIE(DbgVariable *DV, LexicalScope *S);
/// constructScopeDIE - Construct a DIE for this scope.
- DIE *constructScopeDIE(DbgScope *Scope);
+ DIE *constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
/// EmitSectionLabels - Emit initial Dwarf sections with a label at
/// the start of each one.
@@ -424,16 +415,10 @@ private:
/// constructCompileUnit - Create new CompileUnit for the given
/// metadata node with tag DW_TAG_compile_unit.
- void constructCompileUnit(const MDNode *N);
-
- /// getCompielUnit - Get CompileUnit DIE.
- CompileUnit *getCompileUnit(const MDNode *N) const;
-
- /// constructGlobalVariableDIE - Construct global variable DIE.
- void constructGlobalVariableDIE(const MDNode *N);
+ CompileUnit *constructCompileUnit(const MDNode *N);
/// construct SubprogramDIE - Construct subprogram DIE.
- void constructSubprogramDIE(const MDNode *N);
+ void constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N);
/// recordSourceLine - Register a source line with debug info. Returns the
/// unique label that was emitted and which provides correspondence to
@@ -441,30 +426,16 @@ private:
void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope,
unsigned Flags);
- /// recordVariableFrameIndex - Record a variable's index.
- void recordVariableFrameIndex(const DbgVariable *V, int Index);
-
- /// findVariableFrameIndex - Return true if frame index for the variable
- /// is found. Update FI to hold value of the index.
- bool findVariableFrameIndex(const DbgVariable *V, int *FI);
-
- /// findDbgScope - Find DbgScope for the debug loc.
- DbgScope *findDbgScope(DebugLoc DL);
-
/// identifyScopeMarkers() - Indentify instructions that are marking
/// beginning of or end of a scope.
void identifyScopeMarkers();
- /// extractScopeInformation - Scan machine instructions in this function
- /// and collect DbgScopes. Return true, if atleast one scope was found.
- bool extractScopeInformation();
-
/// addCurrentFnArgument - If Var is an current function argument that add
/// it in CurrentFnArguments list.
bool addCurrentFnArgument(const MachineFunction *MF,
- DbgVariable *Var, DbgScope *Scope);
+ DbgVariable *Var, LexicalScope *Scope);
- /// collectVariableInfo - Populate DbgScope entries with variables' info.
+ /// collectVariableInfo - Populate LexicalScope entries with variables' info.
void collectVariableInfo(const MachineFunction *,
SmallPtrSet<const MDNode *, 16> &ProcessedVars);
@@ -496,6 +467,14 @@ public:
DwarfDebug(AsmPrinter *A, Module *M);
~DwarfDebug();
+ /// collectInfoFromNamedMDNodes - Collect debug info from named mdnodes such
+ /// as llvm.dbg.enum and llvm.dbg.ty
+ void collectInfoFromNamedMDNodes(Module *M);
+
+ /// collectLegacyDebugInfo - Collect debug info using DebugInfoFinder.
+ /// FIXME - Remove this when dragon-egg and llvm-gcc switch to DIBuilder.
+ bool collectLegacyDebugInfo(Module *M);
+
/// beginModule - Emit all Dwarf sections that should come prior to the
/// content.
void beginModule(Module *M);
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 1f992fa..18b726b 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -17,7 +17,6 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -527,29 +526,26 @@ void DwarfException::EmitExceptionTable() {
I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) {
const CallSiteEntry &S = *I;
+ // Offset of the landing pad, counted in 16-byte bundles relative to the
+ // @LPStart address.
if (VerboseAsm) {
- // Emit comments that decode the call site.
Asm->OutStreamer.AddComment(Twine(">> Call Site ") +
llvm::utostr(idx) + " <<");
Asm->OutStreamer.AddComment(Twine(" On exception at call site ") +
llvm::utostr(idx));
+ }
+ Asm->EmitULEB128(idx);
+ // Offset of the first associated action record, relative to the start of
+ // the action table. This value is biased by 1 (1 indicates the start of
+ // the action table), and 0 indicates that there are no actions.
+ if (VerboseAsm) {
if (S.Action == 0)
Asm->OutStreamer.AddComment(" Action: cleanup");
else
Asm->OutStreamer.AddComment(Twine(" Action: ") +
llvm::utostr((S.Action - 1) / 2 + 1));
-
- Asm->OutStreamer.AddBlankLine();
}
-
- // Offset of the landing pad, counted in 16-byte bundles relative to the
- // @LPStart address.
- Asm->EmitULEB128(idx);
-
- // Offset of the first associated action record, relative to the start of
- // the action table. This value is biased by 1 (1 indicates the start of
- // the action table), and 0 indicates that there are no actions.
Asm->EmitULEB128(S.Action);
}
} else {
@@ -595,46 +591,43 @@ void DwarfException::EmitExceptionTable() {
if (EndLabel == 0)
EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber());
- if (VerboseAsm) {
- // Emit comments that decode the call site.
- Asm->OutStreamer.AddComment(Twine(">> Call Site ") +
- llvm::utostr(++Entry) + " <<");
- Asm->OutStreamer.AddComment(Twine(" Call between ") +
- BeginLabel->getName() + " and " +
- EndLabel->getName());
-
- if (!S.PadLabel) {
- Asm->OutStreamer.AddComment(" has no landing pad");
- } else {
- Asm->OutStreamer.AddComment(Twine(" jumps to ") +
- S.PadLabel->getName());
-
- if (S.Action == 0)
- Asm->OutStreamer.AddComment(" On action: cleanup");
- else
- Asm->OutStreamer.AddComment(Twine(" On action: ") +
- llvm::utostr((S.Action - 1) / 2 + 1));
- }
-
- Asm->OutStreamer.AddBlankLine();
- }
// Offset of the call site relative to the previous call site, counted in
// number of 16-byte bundles. The first call site is counted relative to
// the start of the procedure fragment.
+ if (VerboseAsm)
+ Asm->OutStreamer.AddComment(Twine(">> Call Site ") +
+ llvm::utostr(++Entry) + " <<");
Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4);
+ if (VerboseAsm)
+ Asm->OutStreamer.AddComment(Twine(" Call between ") +
+ BeginLabel->getName() + " and " +
+ EndLabel->getName());
Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
// Offset of the landing pad, counted in 16-byte bundles relative to the
// @LPStart address.
- if (!S.PadLabel)
+ if (!S.PadLabel) {
+ if (VerboseAsm)
+ Asm->OutStreamer.AddComment(" has no landing pad");
Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
- else
+ } else {
+ if (VerboseAsm)
+ Asm->OutStreamer.AddComment(Twine(" jumps to ") +
+ S.PadLabel->getName());
Asm->EmitLabelDifference(S.PadLabel, EHFuncBeginSym, 4);
+ }
// Offset of the first associated action record, relative to the start of
// the action table. This value is biased by 1 (1 indicates the start of
// the action table), and 0 indicates that there are no actions.
+ if (VerboseAsm) {
+ if (S.Action == 0)
+ Asm->OutStreamer.AddComment(" On action: cleanup");
+ else
+ Asm->OutStreamer.AddComment(Twine(" On action: ") +
+ llvm::utostr((S.Action - 1) / 2 + 1));
+ }
Asm->EmitULEB128(S.Action);
}
}
@@ -649,13 +642,29 @@ void DwarfException::EmitExceptionTable() {
// Emit comments that decode the action table.
Asm->OutStreamer.AddComment(Twine(">> Action Record ") +
llvm::utostr(++Entry) + " <<");
- if (Action.ValueForTypeID >= 0)
+ }
+
+ // Type Filter
+ //
+ // Used by the runtime to match the type of the thrown exception to the
+ // type of the catch clauses or the types in the exception specification.
+ if (VerboseAsm) {
+ if (Action.ValueForTypeID > 0)
Asm->OutStreamer.AddComment(Twine(" Catch TypeInfo ") +
llvm::itostr(Action.ValueForTypeID));
- else
+ else if (Action.ValueForTypeID < 0)
Asm->OutStreamer.AddComment(Twine(" Filter TypeInfo ") +
llvm::itostr(Action.ValueForTypeID));
+ else
+ Asm->OutStreamer.AddComment(" Cleanup");
+ }
+ Asm->EmitSLEB128(Action.ValueForTypeID);
+ // Action Record
+ //
+ // Self-relative signed displacement in bytes of the next action record,
+ // or 0 if there is no next action record.
+ if (VerboseAsm) {
if (Action.NextAction == 0) {
Asm->OutStreamer.AddComment(" No further actions");
} else {
@@ -663,20 +672,7 @@ void DwarfException::EmitExceptionTable() {
Asm->OutStreamer.AddComment(Twine(" Continue to action ") +
llvm::utostr(NextAction));
}
-
- Asm->OutStreamer.AddBlankLine();
}
-
- // Type Filter
- //
- // Used by the runtime to match the type of the thrown exception to the
- // type of the catch clauses or the types in the exception specification.
- Asm->EmitSLEB128(Action.ValueForTypeID);
-
- // Action Record
- //
- // Self-relative signed displacement in bytes of the next action record,
- // or 0 if there is no next action record.
Asm->EmitSLEB128(Action.NextAction);
}
diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
index c2ad5eb..b83aa5a 100644
--- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp
+++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
@@ -17,7 +17,6 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 99090a8..75288b0 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -1624,26 +1624,29 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
if (!TIB->isSafeToMove(TII, 0, DontMoveAcrossStore))
break;
+ // Remove kills from LocalDefsSet, these registers had short live ranges.
+ for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = TIB->getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || !MO.isKill())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || !LocalDefsSet.count(Reg))
+ continue;
+ for (const unsigned *OR = TRI->getOverlaps(Reg); *OR; ++OR)
+ LocalDefsSet.erase(*OR);
+ }
+
// Track local defs so we can update liveins.
for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
MachineOperand &MO = TIB->getOperand(i);
- if (!MO.isReg())
+ if (!MO.isReg() || !MO.isDef() || MO.isDead())
continue;
unsigned Reg = MO.getReg();
if (!Reg)
continue;
- if (MO.isDef()) {
- if (!MO.isDead()) {
- LocalDefs.push_back(Reg);
- LocalDefsSet.insert(Reg);
- for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
- LocalDefsSet.insert(*SR);
- }
- } else if (MO.isKill() && LocalDefsSet.count(Reg)) {
- LocalDefsSet.erase(Reg);
- for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
- LocalDefsSet.erase(*SR);
- }
+ LocalDefs.push_back(Reg);
+ for (const unsigned *OR = TRI->getOverlaps(Reg); *OR; ++OR)
+ LocalDefsSet.insert(*OR);
}
HasDups = true;;
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 06d2a95..9a5e551 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -13,7 +13,9 @@ add_llvm_library(LLVMCodeGen
EdgeBundles.cpp
ELFCodeEmitter.cpp
ELFWriter.cpp
+ ExecutionDepsFix.cpp
ExpandISelPseudos.cpp
+ ExpandPostRAPseudos.cpp
GCMetadata.cpp
GCMetadataPrinter.cpp
GCStrategy.cpp
@@ -23,17 +25,18 @@ add_llvm_library(LLVMCodeGen
IntrinsicLowering.cpp
LLVMTargetMachine.cpp
LatencyPriorityQueue.cpp
+ LexicalScopes.cpp
LiveDebugVariables.cpp
LiveInterval.cpp
LiveIntervalAnalysis.cpp
LiveIntervalUnion.cpp
LiveStackAnalysis.cpp
LiveVariables.cpp
+ LiveRangeCalc.cpp
LiveRangeEdit.cpp
LocalStackSlotAllocation.cpp
- LowerSubregs.cpp
MachineBasicBlock.cpp
- MachineBlockFrequency.cpp
+ MachineBlockFrequencyInfo.cpp
MachineBranchProbabilityInfo.cpp
MachineCSE.cpp
MachineDominators.cpp
@@ -97,5 +100,15 @@ add_llvm_library(LLVMCodeGen
VirtRegRewriter.cpp
)
+add_llvm_library_dependencies(LLVMCodeGen
+ LLVMAnalysis
+ LLVMCore
+ LLVMMC
+ LLVMScalarOpts
+ LLVMSupport
+ LLVMTarget
+ LLVMTransformUtils
+ )
+
add_subdirectory(SelectionDAG)
add_subdirectory(AsmPrinter)
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index e6b3bbc..ea16a25 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -185,35 +185,3 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
li.weight = normalizeSpillWeight(totalWeight, li.getSize());
}
-
-void VirtRegAuxInfo::CalculateRegClass(unsigned reg) {
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
- const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
- const TargetRegisterClass *OldRC = MRI.getRegClass(reg);
- const TargetRegisterClass *NewRC = TRI->getLargestLegalSuperClass(OldRC);
-
- // Stop early if there is no room to grow.
- if (NewRC == OldRC)
- return;
-
- // Accumulate constraints from all uses.
- for (MachineRegisterInfo::reg_nodbg_iterator I = MRI.reg_nodbg_begin(reg),
- E = MRI.reg_nodbg_end(); I != E; ++I) {
- // TRI doesn't have accurate enough information to model this yet.
- if (I.getOperand().getSubReg())
- return;
- // Inline asm instuctions don't remember their constraints.
- if (I->isInlineAsm())
- return;
- const TargetRegisterClass *OpRC =
- TII->getRegClass(I->getDesc(), I.getOperandNo(), TRI);
- if (OpRC)
- NewRC = getCommonSubClass(NewRC, OpRC);
- if (!NewRC || NewRC == OldRC)
- return;
- }
- DEBUG(dbgs() << "Inflating " << OldRC->getName() << ':' << PrintReg(reg)
- << " to " << NewRC->getName() <<".\n");
- MRI.setRegClass(reg, NewRC);
-}
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 489746c..424535b 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -27,6 +27,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeLiveIntervalsPass(Registry);
initializeLiveStacksPass(Registry);
initializeLiveVariablesPass(Registry);
+ initializeMachineBlockFrequencyInfoPass(Registry);
initializeMachineCSEPass(Registry);
initializeMachineDominatorTreePass(Registry);
initializeMachineLICMPass(Registry);
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 03604b0..ed9e409 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -63,6 +63,8 @@ namespace {
typedef SmallPtrSet<BasicBlock*, 8> BBSet;
BBSet LandingPads;
+ bool InsertUnwindResumeCalls();
+
bool NormalizeLandingPads();
bool LowerUnwindsAndResumes();
bool MoveExceptionValueCalls();
@@ -658,13 +660,76 @@ Instruction *DwarfEHPrepare::CreateExceptionValueCall(BasicBlock *BB) {
return CallInst::Create(ExceptionValueIntrinsic, "eh.value.call", Start);
}
+/// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present
+/// into calls to the appropriate _Unwind_Resume function.
+bool DwarfEHPrepare::InsertUnwindResumeCalls() {
+ bool UsesNewEH = false;
+ SmallVector<ResumeInst*, 16> Resumes;
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+ TerminatorInst *TI = I->getTerminator();
+ if (ResumeInst *RI = dyn_cast<ResumeInst>(TI))
+ Resumes.push_back(RI);
+ else if (InvokeInst *II = dyn_cast<InvokeInst>(TI))
+ UsesNewEH = II->getUnwindDest()->isLandingPad();
+ }
+
+ if (Resumes.empty())
+ return UsesNewEH;
+
+ // Find the rewind function if we didn't already.
+ if (!RewindFunction) {
+ LLVMContext &Ctx = Resumes[0]->getContext();
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
+ Type::getInt8PtrTy(Ctx), false);
+ const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
+ RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy);
+ }
+
+ // Create the basic block where the _Unwind_Resume call will live.
+ LLVMContext &Ctx = F->getContext();
+ BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", F);
+ PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), Resumes.size(),
+ "exn.obj", UnwindBB);
+
+ // Extract the exception object from the ResumeInst and add it to the PHI node
+ // that feeds the _Unwind_Resume call.
+ BasicBlock *UnwindBBDom = Resumes[0]->getParent();
+ for (SmallVectorImpl<ResumeInst*>::iterator
+ I = Resumes.begin(), E = Resumes.end(); I != E; ++I) {
+ ResumeInst *RI = *I;
+ BranchInst::Create(UnwindBB, RI->getParent());
+ ExtractValueInst *ExnObj = ExtractValueInst::Create(RI->getOperand(0),
+ 0, "exn.obj", RI);
+ PN->addIncoming(ExnObj, RI->getParent());
+ UnwindBBDom = DT->findNearestCommonDominator(RI->getParent(), UnwindBBDom);
+ RI->eraseFromParent();
+ }
+
+ // Call the function.
+ CallInst *CI = CallInst::Create(RewindFunction, PN, "", UnwindBB);
+ CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
+
+ // We never expect _Unwind_Resume to return.
+ new UnreachableInst(Ctx, UnwindBB);
+
+ // Now update DominatorTree analysis information.
+ DT->addNewBlock(UnwindBB, UnwindBBDom);
+ return true;
+}
+
bool DwarfEHPrepare::runOnFunction(Function &Fn) {
bool Changed = false;
// Initialize internal state.
- DT = &getAnalysis<DominatorTree>();
+ DT = &getAnalysis<DominatorTree>(); // FIXME: We won't need this with the new EH.
F = &Fn;
+ if (InsertUnwindResumeCalls()) {
+ // FIXME: The reset of this function can go once the new EH is done.
+ LandingPads.clear();
+ return true;
+ }
+
// Ensure that only unwind edges end at landing pads (a landing pad is a
// basic block where an invoke unwind edge ends).
Changed |= NormalizeLandingPads();
diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp
index 3fb087c..660424c3 100644
--- a/lib/CodeGen/ELFCodeEmitter.cpp
+++ b/lib/CodeGen/ELFCodeEmitter.cpp
@@ -155,7 +155,7 @@ void ELFCodeEmitter::emitConstantPool(MachineConstantPool *MCP) {
CPSections.push_back(CstPool.SectionIdx);
if (CPE.isMachineConstantPoolEntry())
- assert("CPE.isMachineConstantPoolEntry not supported yet");
+ assert(0 && "CPE.isMachineConstantPoolEntry not supported yet");
// Emit the constant to constant pool section
EW.EmitGlobalConstant(CPE.Val.ConstVal, CstPool);
diff --git a/lib/CodeGen/ELFCodeEmitter.h b/lib/CodeGen/ELFCodeEmitter.h
index 2ec1f6e..8671c67 100644
--- a/lib/CodeGen/ELFCodeEmitter.h
+++ b/lib/CodeGen/ELFCodeEmitter.h
@@ -58,13 +58,13 @@ namespace llvm {
/// emitLabel - Emits a label
virtual void emitLabel(MCSymbol *Label) {
- assert("emitLabel not implemented");
+ assert(0 && "emitLabel not implemented");
}
/// getLabelAddress - Return the address of the specified LabelID,
/// only usable after the LabelID has been emitted.
virtual uintptr_t getLabelAddress(MCSymbol *Label) const {
- assert("getLabelAddress not implemented");
+ assert(0 && "getLabelAddress not implemented");
return 0;
}
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
index d977651..f2c2185 100644
--- a/lib/CodeGen/ELFWriter.cpp
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -45,12 +45,12 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetAsmInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetELFWriterInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -65,7 +65,8 @@ char ELFWriter::ID = 0;
ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm)
: MachineFunctionPass(ID), O(o), TM(tm),
- OutContext(*new MCContext(*TM.getMCAsmInfo(), new TargetAsmInfo(tm))),
+ OutContext(*new MCContext(*TM.getMCAsmInfo(), *TM.getRegisterInfo(),
+ &TM.getTargetLowering()->getObjFileLowering())),
TLOF(TM.getTargetLowering()->getObjFileLowering()),
is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64),
isLittleEndian(TM.getTargetData()->isLittleEndian()),
@@ -482,7 +483,7 @@ void ELFWriter::EmitGlobalConstant(const Constant *CV, ELFSection &GblS) {
EmitGlobalConstantLargeInt(CI, GblS);
return;
} else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
- const VectorType *PTy = CP->getType();
+ VectorType *PTy = CP->getType();
for (unsigned I = 0, E = PTy->getNumElements(); I < E; ++I)
EmitGlobalConstant(CP->getOperand(I), GblS);
return;
@@ -540,8 +541,7 @@ CstExprResTy ELFWriter::ResolveConstantExpr(const Constant *CV) {
case Instruction::GetElementPtr: {
const Constant *ptrVal = CE->getOperand(0);
SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end());
- int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), &idxVec[0],
- idxVec.size());
+ int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), idxVec);
return std::make_pair(ptrVal, Offset);
}
case Instruction::IntToPtr: {
@@ -552,7 +552,7 @@ CstExprResTy ELFWriter::ResolveConstantExpr(const Constant *CV) {
}
case Instruction::PtrToInt: {
Constant *Op = CE->getOperand(0);
- const Type *Ty = CE->getType();
+ Type *Ty = CE->getType();
// We can emit the pointer value into this slot if the slot is an
// integer slot greater or equal to the size of the pointer.
diff --git a/lib/Target/X86/SSEDomainFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index 13680c5..01dccdb 100644
--- a/lib/Target/X86/SSEDomainFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -1,4 +1,4 @@
-//===- SSEDomainFix.cpp - Use proper int/float domain for SSE ---*- C++ -*-===//
+//===- ExecutionDepsFix.cpp - Fix execution dependecy issues ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,21 +7,25 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains the SSEDomainFix pass.
+// This file contains the execution dependency fix pass.
//
-// Some SSE instructions like mov, and, or, xor are available in different
+// Some X86 SSE instructions like mov, and, or, xor are available in different
// variants for different operand types. These variant instructions are
// equivalent, but on Nehalem and newer cpus there is extra latency
-// transferring data between integer and floating point domains.
+// transferring data between integer and floating point domains. ARM cores
+// have similar issues when they are configured with both VFP and NEON
+// pipelines.
//
// This pass changes the variant instructions to minimize domain crossings.
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "sse-domain-fix"
-#include "X86InstrInfo.h"
+#define DEBUG_TYPE "execution-fix"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Debug.h"
@@ -97,25 +101,27 @@ struct DomainValue {
};
}
-static const unsigned NumRegs = 16;
-
namespace {
-class SSEDomainFixPass : public MachineFunctionPass {
+class ExeDepsFix : public MachineFunctionPass {
static char ID;
SpecificBumpPtrAllocator<DomainValue> Allocator;
SmallVector<DomainValue*,16> Avail;
+ const TargetRegisterClass *const RC;
MachineFunction *MF;
- const X86InstrInfo *TII;
+ const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
MachineBasicBlock *MBB;
+ std::vector<int> AliasMap;
+ const unsigned NumRegs;
DomainValue **LiveRegs;
typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap;
LiveOutMap LiveOuts;
unsigned Distance;
public:
- SSEDomainFixPass() : MachineFunctionPass(ID) {}
+ ExeDepsFix(const TargetRegisterClass *rc)
+ : MachineFunctionPass(ID), RC(rc), NumRegs(RC->getNumRegs()) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -150,17 +156,16 @@ private:
};
}
-char SSEDomainFixPass::ID = 0;
+char ExeDepsFix::ID = 0;
/// Translate TRI register number to an index into our smaller tables of
/// interesting registers. Return -1 for boring registers.
-int SSEDomainFixPass::RegIndex(unsigned reg) {
- assert(X86::XMM15 == X86::XMM0+NumRegs-1 && "Unexpected sort");
- reg -= X86::XMM0;
- return reg < NumRegs ? (int) reg : -1;
+int ExeDepsFix::RegIndex(unsigned Reg) {
+ assert(Reg < AliasMap.size() && "Invalid register");
+ return AliasMap[Reg];
}
-DomainValue *SSEDomainFixPass::Alloc(int domain) {
+DomainValue *ExeDepsFix::Alloc(int domain) {
DomainValue *dv = Avail.empty() ?
new(Allocator.Allocate()) DomainValue :
Avail.pop_back_val();
@@ -170,14 +175,14 @@ DomainValue *SSEDomainFixPass::Alloc(int domain) {
return dv;
}
-void SSEDomainFixPass::Recycle(DomainValue *dv) {
+void ExeDepsFix::Recycle(DomainValue *dv) {
assert(dv && "Cannot recycle NULL");
dv->clear();
Avail.push_back(dv);
}
/// Set LiveRegs[rx] = dv, updating reference counts.
-void SSEDomainFixPass::SetLiveReg(int rx, DomainValue *dv) {
+void ExeDepsFix::SetLiveReg(int rx, DomainValue *dv) {
assert(unsigned(rx) < NumRegs && "Invalid index");
if (!LiveRegs) {
LiveRegs = new DomainValue*[NumRegs];
@@ -195,7 +200,7 @@ void SSEDomainFixPass::SetLiveReg(int rx, DomainValue *dv) {
}
// Kill register rx, recycle or collapse any DomainValue.
-void SSEDomainFixPass::Kill(int rx) {
+void ExeDepsFix::Kill(int rx) {
assert(unsigned(rx) < NumRegs && "Invalid index");
if (!LiveRegs || !LiveRegs[rx]) return;
@@ -208,7 +213,7 @@ void SSEDomainFixPass::Kill(int rx) {
}
/// Force register rx into domain.
-void SSEDomainFixPass::Force(int rx, unsigned domain) {
+void ExeDepsFix::Force(int rx, unsigned domain) {
assert(unsigned(rx) < NumRegs && "Invalid index");
DomainValue *dv;
if (LiveRegs && (dv = LiveRegs[rx])) {
@@ -217,8 +222,8 @@ void SSEDomainFixPass::Force(int rx, unsigned domain) {
else if (dv->hasDomain(domain))
Collapse(dv, domain);
else {
- // This is an incompatible open DomainValue. Collapse it to whatever and force
- // the new value into domain. This costs a domain crossing.
+ // This is an incompatible open DomainValue. Collapse it to whatever and
+ // force the new value into domain. This costs a domain crossing.
Collapse(dv, dv->getFirstDomain());
assert(LiveRegs[rx] && "Not live after collapse?");
LiveRegs[rx]->addDomain(domain);
@@ -231,12 +236,12 @@ void SSEDomainFixPass::Force(int rx, unsigned domain) {
/// Collapse open DomainValue into given domain. If there are multiple
/// registers using dv, they each get a unique collapsed DomainValue.
-void SSEDomainFixPass::Collapse(DomainValue *dv, unsigned domain) {
+void ExeDepsFix::Collapse(DomainValue *dv, unsigned domain) {
assert(dv->hasDomain(domain) && "Cannot collapse");
// Collapse all the instructions.
while (!dv->Instrs.empty())
- TII->SetSSEDomain(dv->Instrs.pop_back_val(), domain);
+ TII->setExecutionDomain(dv->Instrs.pop_back_val(), domain);
dv->setSingleDomain(domain);
// If there are multiple users, give them new, unique DomainValues.
@@ -248,7 +253,7 @@ void SSEDomainFixPass::Collapse(DomainValue *dv, unsigned domain) {
/// Merge - All instructions and registers in B are moved to A, and B is
/// released.
-bool SSEDomainFixPass::Merge(DomainValue *A, DomainValue *B) {
+bool ExeDepsFix::Merge(DomainValue *A, DomainValue *B) {
assert(!A->isCollapsed() && "Cannot merge into collapsed");
assert(!B->isCollapsed() && "Cannot merge from collapsed");
if (A == B)
@@ -266,7 +271,7 @@ bool SSEDomainFixPass::Merge(DomainValue *A, DomainValue *B) {
return true;
}
-void SSEDomainFixPass::enterBasicBlock() {
+void ExeDepsFix::enterBasicBlock() {
// Try to coalesce live-out registers from predecessors.
for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
e = MBB->livein_end(); i != e; ++i) {
@@ -303,7 +308,7 @@ void SSEDomainFixPass::enterBasicBlock() {
// A hard instruction only works in one domain. All input registers will be
// forced into that domain.
-void SSEDomainFixPass::visitHardInstr(MachineInstr *mi, unsigned domain) {
+void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
// Collapse all uses.
for (unsigned i = mi->getDesc().getNumDefs(),
e = mi->getDesc().getNumOperands(); i != e; ++i) {
@@ -326,7 +331,7 @@ void SSEDomainFixPass::visitHardInstr(MachineInstr *mi, unsigned domain) {
}
// A soft instruction can be changed to work in other domains given by mask.
-void SSEDomainFixPass::visitSoftInstr(MachineInstr *mi, unsigned mask) {
+void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
// Bitmask of available domains for this instruction after taking collapsed
// operands into account.
unsigned available = mask;
@@ -362,7 +367,7 @@ void SSEDomainFixPass::visitSoftInstr(MachineInstr *mi, unsigned mask) {
// If the collapsed operands force a single domain, propagate the collapse.
if (isPowerOf2_32(available)) {
unsigned domain = CountTrailingZeros_32(available);
- TII->SetSSEDomain(mi, domain);
+ TII->setExecutionDomain(mi, domain);
visitHardInstr(mi, domain);
return;
}
@@ -431,8 +436,8 @@ void SSEDomainFixPass::visitSoftInstr(MachineInstr *mi, unsigned mask) {
}
}
-void SSEDomainFixPass::visitGenericInstr(MachineInstr *mi) {
- // Process explicit defs, kill any XMM registers redefined.
+void ExeDepsFix::visitGenericInstr(MachineInstr *mi) {
+ // Process explicit defs, kill any relevant registers redefined.
for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
MachineOperand &mo = mi->getOperand(i);
if (!mo.isReg()) continue;
@@ -442,25 +447,36 @@ void SSEDomainFixPass::visitGenericInstr(MachineInstr *mi) {
}
}
-bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) {
+bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
- TII = static_cast<const X86InstrInfo*>(MF->getTarget().getInstrInfo());
+ TII = MF->getTarget().getInstrInfo();
TRI = MF->getTarget().getRegisterInfo();
MBB = 0;
LiveRegs = 0;
Distance = 0;
- assert(NumRegs == X86::VR128RegClass.getNumRegs() && "Bad regclass");
+ assert(NumRegs == RC->getNumRegs() && "Bad regclass");
- // If no XMM registers are used in the function, we can skip it completely.
+ // If no relevant registers are used in the function, we can skip it
+ // completely.
bool anyregs = false;
- for (TargetRegisterClass::const_iterator I = X86::VR128RegClass.begin(),
- E = X86::VR128RegClass.end(); I != E; ++I)
+ for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end();
+ I != E; ++I)
if (MF->getRegInfo().isPhysRegUsed(*I)) {
anyregs = true;
break;
}
if (!anyregs) return false;
+ // Initialize the AliasMap on the first use.
+ if (AliasMap.empty()) {
+ // Given a PhysReg, AliasMap[PhysReg] is either the relevant index into RC,
+ // or -1.
+ AliasMap.resize(TRI->getNumRegs(), -1);
+ for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i)
+ for (const unsigned *AI = TRI->getOverlaps(RC->getRegister(i)); *AI; ++AI)
+ AliasMap[*AI] = i;
+ }
+
MachineBasicBlock *Entry = MF->begin();
SmallPtrSet<MachineBasicBlock*, 16> Visited;
for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 16> >
@@ -473,7 +489,7 @@ bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) {
MachineInstr *mi = I;
if (mi->isDebugValue()) continue;
++Distance;
- std::pair<uint16_t, uint16_t> domp = TII->GetSSEDomain(mi);
+ std::pair<uint16_t, uint16_t> domp = TII->getExecutionDomain(mi);
if (domp.first)
if (domp.second)
visitSoftInstr(mi, domp.second);
@@ -501,6 +517,7 @@ bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) {
return false;
}
-FunctionPass *llvm::createSSEDomainFixPass() {
- return new SSEDomainFixPass();
+FunctionPass *
+llvm::createExecutionDependencyFixPass(const TargetRegisterClass *RC) {
+ return new ExeDepsFix(RC);
}
diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index 7871ba9..e2a14a8 100644
--- a/lib/CodeGen/LowerSubregs.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -1,4 +1,4 @@
-//===-- LowerSubregs.cpp - Subregister Lowering instruction pass ----------===//
+//===-- ExpandPostRAPseudos.cpp - Pseudo instruction expansion pass -------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,14 +7,12 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines a MachineFunction pass which runs after register
-// allocation that turns subreg insert/extract instructions into register
-// copies, as needed. This ensures correct codegen even if the coalescer
-// isn't able to remove all subreg instructions.
+// This file defines a pass that expands COPY and SUBREG_TO_REG pseudo
+// instructions after register allocation.
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "lowersubregs"
+#define DEBUG_TYPE "postrapseudos"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -29,52 +27,51 @@
using namespace llvm;
namespace {
- struct LowerSubregsInstructionPass : public MachineFunctionPass {
- private:
- const TargetRegisterInfo *TRI;
- const TargetInstrInfo *TII;
+struct ExpandPostRA : public MachineFunctionPass {
+private:
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
- public:
- static char ID; // Pass identification, replacement for typeid
- LowerSubregsInstructionPass() : MachineFunctionPass(ID) {}
+public:
+ static char ID; // Pass identification, replacement for typeid
+ ExpandPostRA() : MachineFunctionPass(ID) {}
- const char *getPassName() const {
- return "Subregister lowering instruction pass";
- }
+ const char *getPassName() const {
+ return "Post-RA pseudo instruction expansion pass";
+ }
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- AU.addPreservedID(MachineLoopInfoID);
- AU.addPreservedID(MachineDominatorsID);
- MachineFunctionPass::getAnalysisUsage(AU);
- }
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
- /// runOnMachineFunction - pass entry point
- bool runOnMachineFunction(MachineFunction&);
+ /// runOnMachineFunction - pass entry point
+ bool runOnMachineFunction(MachineFunction&);
- private:
- bool LowerSubregToReg(MachineInstr *MI);
- bool LowerCopy(MachineInstr *MI);
+private:
+ bool LowerSubregToReg(MachineInstr *MI);
+ bool LowerCopy(MachineInstr *MI);
- void TransferDeadFlag(MachineInstr *MI, unsigned DstReg,
- const TargetRegisterInfo *TRI);
- void TransferImplicitDefs(MachineInstr *MI);
- };
+ void TransferDeadFlag(MachineInstr *MI, unsigned DstReg,
+ const TargetRegisterInfo *TRI);
+ void TransferImplicitDefs(MachineInstr *MI);
+};
+} // end anonymous namespace
- char LowerSubregsInstructionPass::ID = 0;
-}
+char ExpandPostRA::ID = 0;
-FunctionPass *llvm::createLowerSubregsPass() {
- return new LowerSubregsInstructionPass();
+FunctionPass *llvm::createExpandPostRAPseudosPass() {
+ return new ExpandPostRA();
}
/// TransferDeadFlag - MI is a pseudo-instruction with DstReg dead,
/// and the lowered replacement instructions immediately precede it.
/// Mark the replacement instructions with the dead flag.
void
-LowerSubregsInstructionPass::TransferDeadFlag(MachineInstr *MI,
- unsigned DstReg,
- const TargetRegisterInfo *TRI) {
+ExpandPostRA::TransferDeadFlag(MachineInstr *MI, unsigned DstReg,
+ const TargetRegisterInfo *TRI) {
for (MachineBasicBlock::iterator MII =
prior(MachineBasicBlock::iterator(MI)); ; --MII) {
if (MII->addRegisterDead(DstReg, TRI))
@@ -88,7 +85,7 @@ LowerSubregsInstructionPass::TransferDeadFlag(MachineInstr *MI,
/// replacement instructions immediately precede it. Copy any implicit-def
/// operands from MI to the replacement instruction.
void
-LowerSubregsInstructionPass::TransferImplicitDefs(MachineInstr *MI) {
+ExpandPostRA::TransferImplicitDefs(MachineInstr *MI) {
MachineBasicBlock::iterator CopyMI = MI;
--CopyMI;
@@ -100,7 +97,7 @@ LowerSubregsInstructionPass::TransferImplicitDefs(MachineInstr *MI) {
}
}
-bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
+bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
MachineBasicBlock *MBB = MI->getParent();
assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) &&
MI->getOperand(1).isImm() &&
@@ -152,7 +149,7 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
return true;
}
-bool LowerSubregsInstructionPass::LowerCopy(MachineInstr *MI) {
+bool ExpandPostRA::LowerCopy(MachineInstr *MI) {
MachineOperand &DstMO = MI->getOperand(0);
MachineOperand &SrcMO = MI->getOperand(1);
@@ -191,9 +188,9 @@ bool LowerSubregsInstructionPass::LowerCopy(MachineInstr *MI) {
/// runOnMachineFunction - Reduce subregister inserts and extracts to register
/// copies.
///
-bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) {
+bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "Machine Function\n"
- << "********** LOWERING SUBREG INSTRS **********\n"
+ << "********** EXPANDING POST-RA PSEUDO INSTRS **********\n"
<< "********** Function: "
<< MF.getFunction()->getName() << '\n');
TRI = MF.getTarget().getRegisterInfo();
@@ -205,17 +202,34 @@ bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) {
mbbi != mbbe; ++mbbi) {
for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
mi != me;) {
- MachineBasicBlock::iterator nmi = llvm::next(mi);
MachineInstr *MI = mi;
- assert(!MI->isInsertSubreg() && "INSERT_SUBREG should no longer appear");
- assert(MI->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
- "EXTRACT_SUBREG should no longer appear");
- if (MI->isSubregToReg()) {
+ // Advance iterator here because MI may be erased.
+ ++mi;
+
+ // Only expand pseudos.
+ if (!MI->getDesc().isPseudo())
+ continue;
+
+ // Give targets a chance to expand even standard pseudos.
+ if (TII->expandPostRAPseudo(MI)) {
+ MadeChange = true;
+ continue;
+ }
+
+ // Expand standard pseudos.
+ switch (MI->getOpcode()) {
+ case TargetOpcode::SUBREG_TO_REG:
MadeChange |= LowerSubregToReg(MI);
- } else if (MI->isCopy()) {
+ break;
+ case TargetOpcode::COPY:
MadeChange |= LowerCopy(MI);
+ break;
+ case TargetOpcode::DBG_VALUE:
+ continue;
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::EXTRACT_SUBREG:
+ llvm_unreachable("Sub-register pseudos should have been eliminated.");
}
- mi = nmi;
}
}
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 6cb2277..ce7ed29 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -16,14 +16,13 @@
#include "llvm/Function.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -154,7 +153,8 @@ namespace {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
const InstrItineraryData *InstrItins;
- const MachineLoopInfo *MLI;
+ const MachineBranchProbabilityInfo *MBPI;
+
bool MadeChange;
int FnNum;
public:
@@ -162,9 +162,9 @@ namespace {
IfConverter() : MachineFunctionPass(ID), FnNum(-1) {
initializeIfConverterPass(*PassRegistry::getPassRegistry());
}
-
+
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineBranchProbabilityInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -252,7 +252,7 @@ namespace {
}
INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); }
@@ -261,7 +261,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
TLI = MF.getTarget().getTargetLowering();
TII = MF.getTarget().getInstrInfo();
TRI = MF.getTarget().getRegisterInfo();
- MLI = &getAnalysis<MachineLoopInfo>();
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
InstrItins = MF.getTarget().getInstrItineraryData();
if (!TII) return false;
@@ -790,28 +790,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
bool TNeedSub = TrueBBI.Predicate.size() > 0;
bool FNeedSub = FalseBBI.Predicate.size() > 0;
bool Enqueued = false;
-
- // Try to predict the branch, using loop info to guide us.
- // General heuristics are:
- // - backedge -> 90% taken
- // - early exit -> 20% taken
- // - branch predictor confidence -> 90%
- BranchProbability Prediction(5, 10);
- MachineLoop *Loop = MLI->getLoopFor(BB);
- if (Loop) {
- if (TrueBBI.BB == Loop->getHeader())
- Prediction = BranchProbability(9, 10);
- else if (FalseBBI.BB == Loop->getHeader())
- Prediction = BranchProbability(1, 10);
-
- MachineLoop *TrueLoop = MLI->getLoopFor(TrueBBI.BB);
- MachineLoop *FalseLoop = MLI->getLoopFor(FalseBBI.BB);
- if (!TrueLoop || TrueLoop->getParentLoop() == Loop)
- Prediction = BranchProbability(2, 10);
- else if (!FalseLoop || FalseLoop->getParentLoop() == Loop)
- Prediction = BranchProbability(8, 10);
- }
-
+
+ BranchProbability Prediction = MBPI->getEdgeProbability(BB, TrueBBI.BB);
+
if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) +
TrueBBI.ExtraCost), TrueBBI.ExtraCost2,
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 5547f73..726af46 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -17,6 +17,7 @@
#include "LiveRangeEdit.h"
#include "VirtRegMap.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
@@ -27,22 +28,26 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
STATISTIC(NumSpilledRanges, "Number of spilled live ranges");
-STATISTIC(NumSnippets, "Number of snippets included in spills");
+STATISTIC(NumSnippets, "Number of spilled snippets");
STATISTIC(NumSpills, "Number of spills inserted");
+STATISTIC(NumSpillsRemoved, "Number of spills removed");
STATISTIC(NumReloads, "Number of reloads inserted");
+STATISTIC(NumReloadsRemoved, "Number of reloads removed");
STATISTIC(NumFolded, "Number of folded stack accesses");
STATISTIC(NumFoldedLoads, "Number of folded loads");
STATISTIC(NumRemats, "Number of rematerialized defs for spilling");
-STATISTIC(NumOmitReloadSpill, "Number of omitted spills after reloads");
-STATISTIC(NumHoistLocal, "Number of locally hoisted spills");
-STATISTIC(NumHoistGlobal, "Number of globally hoisted spills");
-STATISTIC(NumRedundantSpills, "Number of redundant spills identified");
+STATISTIC(NumOmitReloadSpill, "Number of omitted spills of reloads");
+STATISTIC(NumHoists, "Number of hoisted spills");
+
+static cl::opt<bool> DisableHoisting("disable-spill-hoist", cl::Hidden,
+ cl::desc("Disable inline spill hoisting"));
namespace {
class InlineSpiller : public Spiller {
@@ -75,26 +80,49 @@ class InlineSpiller : public Spiller {
// Values that failed to remat at some point.
SmallPtrSet<VNInfo*, 8> UsedValues;
+public:
// Information about a value that was defined by a copy from a sibling
// register.
struct SibValueInfo {
// True when all reaching defs were reloads: No spill is necessary.
bool AllDefsAreReloads;
+ // True when value is defined by an original PHI not from splitting.
+ bool DefByOrigPHI;
+
+ // True when the COPY defining this value killed its source.
+ bool KillsSource;
+
// The preferred register to spill.
unsigned SpillReg;
// The value of SpillReg that should be spilled.
VNInfo *SpillVNI;
+ // The block where SpillVNI should be spilled. Currently, this must be the
+ // block containing SpillVNI->def.
+ MachineBasicBlock *SpillMBB;
+
// A defining instruction that is not a sibling copy or a reload, or NULL.
// This can be used as a template for rematerialization.
MachineInstr *DefMI;
+ // List of values that depend on this one. These values are actually the
+ // same, but live range splitting has placed them in different registers,
+ // or SSA update needed to insert PHI-defs to preserve SSA form. This is
+ // copies of the current value and phi-kills. Usually only phi-kills cause
+ // more than one dependent value.
+ TinyPtrVector<VNInfo*> Deps;
+
SibValueInfo(unsigned Reg, VNInfo *VNI)
- : AllDefsAreReloads(false), SpillReg(Reg), SpillVNI(VNI), DefMI(0) {}
+ : AllDefsAreReloads(true), DefByOrigPHI(false), KillsSource(false),
+ SpillReg(Reg), SpillVNI(VNI), SpillMBB(0), DefMI(0) {}
+
+ // Returns true when a def has been found.
+ bool hasDef() const { return DefByOrigPHI || DefMI; }
};
+private:
// Values in RegsToSpill defined by sibling copies.
typedef DenseMap<VNInfo*, SibValueInfo> SibValueMap;
SibValueMap SibValues;
@@ -134,6 +162,7 @@ private:
bool isSibling(unsigned Reg);
MachineInstr *traceSiblingValue(unsigned, VNInfo*, VNInfo*);
+ void propagateSiblingValue(SibValueMap::iterator, VNInfo *VNI = 0);
void analyzeSiblingValues();
bool hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI);
@@ -282,6 +311,156 @@ bool InlineSpiller::isSibling(unsigned Reg) {
VRM.getOriginal(Reg) == Original;
}
+#ifndef NDEBUG
+static raw_ostream &operator<<(raw_ostream &OS,
+ const InlineSpiller::SibValueInfo &SVI) {
+ OS << "spill " << PrintReg(SVI.SpillReg) << ':'
+ << SVI.SpillVNI->id << '@' << SVI.SpillVNI->def;
+ if (SVI.SpillMBB)
+ OS << " in BB#" << SVI.SpillMBB->getNumber();
+ if (SVI.AllDefsAreReloads)
+ OS << " all-reloads";
+ if (SVI.DefByOrigPHI)
+ OS << " orig-phi";
+ if (SVI.KillsSource)
+ OS << " kill";
+ OS << " deps[";
+ for (unsigned i = 0, e = SVI.Deps.size(); i != e; ++i)
+ OS << ' ' << SVI.Deps[i]->id << '@' << SVI.Deps[i]->def;
+ OS << " ]";
+ if (SVI.DefMI)
+ OS << " def: " << *SVI.DefMI;
+ else
+ OS << '\n';
+ return OS;
+}
+#endif
+
+/// propagateSiblingValue - Propagate the value in SVI to dependents if it is
+/// known. Otherwise remember the dependency for later.
+///
+/// @param SVI SibValues entry to propagate.
+/// @param VNI Dependent value, or NULL to propagate to all saved dependents.
+void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVI,
+ VNInfo *VNI) {
+ // When VNI is non-NULL, add it to SVI's deps, and only propagate to that.
+ TinyPtrVector<VNInfo*> FirstDeps;
+ if (VNI) {
+ FirstDeps.push_back(VNI);
+ SVI->second.Deps.push_back(VNI);
+ }
+
+ // Has the value been completely determined yet? If not, defer propagation.
+ if (!SVI->second.hasDef())
+ return;
+
+ // Work list of values to propagate. It would be nice to use a SetVector
+ // here, but then we would be forced to use a SmallSet.
+ SmallVector<SibValueMap::iterator, 8> WorkList(1, SVI);
+ SmallPtrSet<VNInfo*, 8> WorkSet;
+
+ do {
+ SVI = WorkList.pop_back_val();
+ WorkSet.erase(SVI->first);
+ TinyPtrVector<VNInfo*> *Deps = VNI ? &FirstDeps : &SVI->second.Deps;
+ VNI = 0;
+
+ SibValueInfo &SV = SVI->second;
+ if (!SV.SpillMBB)
+ SV.SpillMBB = LIS.getMBBFromIndex(SV.SpillVNI->def);
+
+ DEBUG(dbgs() << " prop to " << Deps->size() << ": "
+ << SVI->first->id << '@' << SVI->first->def << ":\t" << SV);
+
+ assert(SV.hasDef() && "Propagating undefined value");
+
+ // Should this value be propagated as a preferred spill candidate? We don't
+ // propagate values of registers that are about to spill.
+ bool PropSpill = !DisableHoisting && !isRegToSpill(SV.SpillReg);
+ unsigned SpillDepth = ~0u;
+
+ for (TinyPtrVector<VNInfo*>::iterator DepI = Deps->begin(),
+ DepE = Deps->end(); DepI != DepE; ++DepI) {
+ SibValueMap::iterator DepSVI = SibValues.find(*DepI);
+ assert(DepSVI != SibValues.end() && "Dependent value not in SibValues");
+ SibValueInfo &DepSV = DepSVI->second;
+ if (!DepSV.SpillMBB)
+ DepSV.SpillMBB = LIS.getMBBFromIndex(DepSV.SpillVNI->def);
+
+ bool Changed = false;
+
+ // Propagate defining instruction.
+ if (!DepSV.hasDef()) {
+ Changed = true;
+ DepSV.DefMI = SV.DefMI;
+ DepSV.DefByOrigPHI = SV.DefByOrigPHI;
+ }
+
+ // Propagate AllDefsAreReloads. For PHI values, this computes an AND of
+ // all predecessors.
+ if (!SV.AllDefsAreReloads && DepSV.AllDefsAreReloads) {
+ Changed = true;
+ DepSV.AllDefsAreReloads = false;
+ }
+
+ // Propagate best spill value.
+ if (PropSpill && SV.SpillVNI != DepSV.SpillVNI) {
+ if (SV.SpillMBB == DepSV.SpillMBB) {
+ // DepSV is in the same block. Hoist when dominated.
+ if (DepSV.KillsSource && SV.SpillVNI->def < DepSV.SpillVNI->def) {
+ // This is an alternative def earlier in the same MBB.
+ // Hoist the spill as far as possible in SpillMBB. This can ease
+ // register pressure:
+ //
+ // x = def
+ // y = use x
+ // s = copy x
+ //
+ // Hoisting the spill of s to immediately after the def removes the
+ // interference between x and y:
+ //
+ // x = def
+ // spill x
+ // y = use x<kill>
+ //
+ // This hoist only helps when the DepSV copy kills its source.
+ Changed = true;
+ DepSV.SpillReg = SV.SpillReg;
+ DepSV.SpillVNI = SV.SpillVNI;
+ DepSV.SpillMBB = SV.SpillMBB;
+ }
+ } else {
+ // DepSV is in a different block.
+ if (SpillDepth == ~0u)
+ SpillDepth = Loops.getLoopDepth(SV.SpillMBB);
+
+ // Also hoist spills to blocks with smaller loop depth, but make sure
+ // that the new value dominates. Non-phi dependents are always
+ // dominated, phis need checking.
+ if ((Loops.getLoopDepth(DepSV.SpillMBB) > SpillDepth) &&
+ (!DepSVI->first->isPHIDef() ||
+ MDT.dominates(SV.SpillMBB, DepSV.SpillMBB))) {
+ Changed = true;
+ DepSV.SpillReg = SV.SpillReg;
+ DepSV.SpillVNI = SV.SpillVNI;
+ DepSV.SpillMBB = SV.SpillMBB;
+ }
+ }
+ }
+
+ if (!Changed)
+ continue;
+
+ // Something changed in DepSVI. Propagate to dependents.
+ if (WorkSet.insert(DepSVI->first))
+ WorkList.push_back(DepSVI);
+
+ DEBUG(dbgs() << " update " << DepSVI->first->id << '@'
+ << DepSVI->first->def << " to:\t" << DepSV);
+ }
+ } while (!WorkList.empty());
+}
+
/// traceSiblingValue - Trace a value that is about to be spilled back to the
/// real defining instructions by looking through sibling copies. Always stay
/// within the range of OrigVNI so the registers are known to carry the same
@@ -294,84 +473,101 @@ bool InlineSpiller::isSibling(unsigned Reg) {
///
MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
VNInfo *OrigVNI) {
+ // Check if a cached value already exists.
+ SibValueMap::iterator SVI;
+ bool Inserted;
+ tie(SVI, Inserted) =
+ SibValues.insert(std::make_pair(UseVNI, SibValueInfo(UseReg, UseVNI)));
+ if (!Inserted) {
+ DEBUG(dbgs() << "Cached value " << PrintReg(UseReg) << ':'
+ << UseVNI->id << '@' << UseVNI->def << ' ' << SVI->second);
+ return SVI->second.DefMI;
+ }
+
DEBUG(dbgs() << "Tracing value " << PrintReg(UseReg) << ':'
<< UseVNI->id << '@' << UseVNI->def << '\n');
- SmallPtrSet<VNInfo*, 8> Visited;
+
+ // List of (Reg, VNI) that have been inserted into SibValues, but need to be
+ // processed.
SmallVector<std::pair<unsigned, VNInfo*>, 8> WorkList;
WorkList.push_back(std::make_pair(UseReg, UseVNI));
- // Best spill candidate seen so far. This must dominate UseVNI.
- SibValueInfo SVI(UseReg, UseVNI);
- MachineBasicBlock *UseMBB = LIS.getMBBFromIndex(UseVNI->def);
- MachineBasicBlock *SpillMBB = UseMBB;
- unsigned SpillDepth = Loops.getLoopDepth(SpillMBB);
- bool SeenOrigPHI = false; // Original PHI met.
-
do {
unsigned Reg;
VNInfo *VNI;
tie(Reg, VNI) = WorkList.pop_back_val();
- if (!Visited.insert(VNI))
- continue;
+ DEBUG(dbgs() << " " << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def
+ << ":\t");
- // Is this value a better spill candidate?
- if (!isRegToSpill(Reg)) {
- MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
- if (MBB == SpillMBB) {
- // This is an alternative def earlier in the same MBB.
- // Hoist the spill as far as possible in SpillMBB. This can ease
- // register pressure:
- //
- // x = def
- // y = use x
- // s = copy x
- //
- // Hoisting the spill of s to immediately after the def removes the
- // interference between x and y:
- //
- // x = def
- // spill x
- // y = use x<kill>
- //
- if (VNI->def < SVI.SpillVNI->def) {
- DEBUG(dbgs() << " hoist in BB#" << MBB->getNumber() << ": "
- << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def
- << '\n');
- SVI.SpillReg = Reg;
- SVI.SpillVNI = VNI;
- }
- } else if (MBB != UseMBB && MDT.dominates(MBB, UseMBB)) {
- // This is a valid spill location dominating UseVNI.
- // Prefer to spill at a smaller loop depth.
- unsigned Depth = Loops.getLoopDepth(MBB);
- if (Depth < SpillDepth) {
- DEBUG(dbgs() << " spill depth " << Depth << ": " << PrintReg(Reg)
- << ':' << VNI->id << '@' << VNI->def << '\n');
- SVI.SpillReg = Reg;
- SVI.SpillVNI = VNI;
- SpillMBB = MBB;
- SpillDepth = Depth;
- }
- }
- }
+ // First check if this value has already been computed.
+ SVI = SibValues.find(VNI);
+ assert(SVI != SibValues.end() && "Missing SibValues entry");
// Trace through PHI-defs created by live range splitting.
if (VNI->isPHIDef()) {
+ // Stop at original PHIs. We don't know the value at the predecessors.
if (VNI->def == OrigVNI->def) {
- DEBUG(dbgs() << " orig phi value " << PrintReg(Reg) << ':'
- << VNI->id << '@' << VNI->def << '\n');
- SeenOrigPHI = true;
+ DEBUG(dbgs() << "orig phi value\n");
+ SVI->second.DefByOrigPHI = true;
+ SVI->second.AllDefsAreReloads = false;
+ propagateSiblingValue(SVI);
continue;
}
- // Get values live-out of predecessors.
+
+ // This is a PHI inserted by live range splitting. We could trace the
+ // live-out value from predecessor blocks, but that search can be very
+ // expensive if there are many predecessors and many more PHIs as
+ // generated by tail-dup when it sees an indirectbr. Instead, look at
+ // all the non-PHI defs that have the same value as OrigVNI. They must
+ // jointly dominate VNI->def. This is not optimal since VNI may actually
+ // be jointly dominated by a smaller subset of defs, so there is a change
+ // we will miss a AllDefsAreReloads optimization.
+
+ // Separate all values dominated by OrigVNI into PHIs and non-PHIs.
+ SmallVector<VNInfo*, 8> PHIs, NonPHIs;
LiveInterval &LI = LIS.getInterval(Reg);
- MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
- for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
- PE = MBB->pred_end(); PI != PE; ++PI) {
- VNInfo *PVNI = LI.getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot());
- if (PVNI)
- WorkList.push_back(std::make_pair(Reg, PVNI));
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+
+ for (LiveInterval::vni_iterator VI = LI.vni_begin(), VE = LI.vni_end();
+ VI != VE; ++VI) {
+ VNInfo *VNI2 = *VI;
+ if (VNI2->isUnused())
+ continue;
+ if (!OrigLI.containsOneValue() &&
+ OrigLI.getVNInfoAt(VNI2->def) != OrigVNI)
+ continue;
+ if (VNI2->isPHIDef() && VNI2->def != OrigVNI->def)
+ PHIs.push_back(VNI2);
+ else
+ NonPHIs.push_back(VNI2);
+ }
+ DEBUG(dbgs() << "split phi value, checking " << PHIs.size()
+ << " phi-defs, and " << NonPHIs.size()
+ << " non-phi/orig defs\n");
+
+ // Create entries for all the PHIs. Don't add them to the worklist, we
+ // are processing all of them in one go here.
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
+ SibValues.insert(std::make_pair(PHIs[i], SibValueInfo(Reg, PHIs[i])));
+
+ // Add every PHI as a dependent of all the non-PHIs.
+ for (unsigned i = 0, e = NonPHIs.size(); i != e; ++i) {
+ VNInfo *NonPHI = NonPHIs[i];
+ // Known value? Try an insertion.
+ tie(SVI, Inserted) =
+ SibValues.insert(std::make_pair(NonPHI, SibValueInfo(Reg, NonPHI)));
+ // Add all the PHIs as dependents of NonPHI.
+ for (unsigned pi = 0, pe = PHIs.size(); pi != pe; ++pi)
+ SVI->second.Deps.push_back(PHIs[pi]);
+ // This is the first time we see NonPHI, add it to the worklist.
+ if (Inserted)
+ WorkList.push_back(std::make_pair(Reg, NonPHI));
+ else
+ // Propagate to all inserted PHIs, not just VNI.
+ propagateSiblingValue(SVI);
}
+
+ // Next work list item.
continue;
}
@@ -382,48 +578,49 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
if (unsigned SrcReg = isFullCopyOf(MI, Reg)) {
if (isSibling(SrcReg)) {
LiveInterval &SrcLI = LIS.getInterval(SrcReg);
- VNInfo *SrcVNI = SrcLI.getVNInfoAt(VNI->def.getUseIndex());
- assert(SrcVNI && "Copy from non-existing value");
- DEBUG(dbgs() << " copy of " << PrintReg(SrcReg) << ':'
- << SrcVNI->id << '@' << SrcVNI->def << '\n');
- WorkList.push_back(std::make_pair(SrcReg, SrcVNI));
+ LiveRange *SrcLR = SrcLI.getLiveRangeContaining(VNI->def.getUseIndex());
+ assert(SrcLR && "Copy from non-existing value");
+ // Check if this COPY kills its source.
+ SVI->second.KillsSource = (SrcLR->end == VNI->def);
+ VNInfo *SrcVNI = SrcLR->valno;
+ DEBUG(dbgs() << "copy of " << PrintReg(SrcReg) << ':'
+ << SrcVNI->id << '@' << SrcVNI->def
+ << " kill=" << unsigned(SVI->second.KillsSource) << '\n');
+ // Known sibling source value? Try an insertion.
+ tie(SVI, Inserted) = SibValues.insert(std::make_pair(SrcVNI,
+ SibValueInfo(SrcReg, SrcVNI)));
+ // This is the first time we see Src, add it to the worklist.
+ if (Inserted)
+ WorkList.push_back(std::make_pair(SrcReg, SrcVNI));
+ propagateSiblingValue(SVI, VNI);
+ // Next work list item.
continue;
}
}
// Track reachable reloads.
+ SVI->second.DefMI = MI;
+ SVI->second.SpillMBB = MI->getParent();
int FI;
if (Reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) {
- DEBUG(dbgs() << " reload " << PrintReg(Reg) << ':'
- << VNI->id << "@" << VNI->def << '\n');
- SVI.AllDefsAreReloads = true;
+ DEBUG(dbgs() << "reload\n");
+ propagateSiblingValue(SVI);
+ // Next work list item.
continue;
}
- // We have an 'original' def. Don't record trivial cases.
- if (VNI == UseVNI) {
- DEBUG(dbgs() << "Not a sibling copy.\n");
- return MI;
- }
-
// Potential remat candidate.
- DEBUG(dbgs() << " def " << PrintReg(Reg) << ':'
- << VNI->id << '@' << VNI->def << '\t' << *MI);
- SVI.DefMI = MI;
+ DEBUG(dbgs() << "def " << *MI);
+ SVI->second.AllDefsAreReloads = false;
+ propagateSiblingValue(SVI);
} while (!WorkList.empty());
- if (SeenOrigPHI || SVI.DefMI)
- SVI.AllDefsAreReloads = false;
-
- DEBUG({
- if (SVI.AllDefsAreReloads)
- dbgs() << "All defs are reloads.\n";
- else
- dbgs() << "Prefer to spill " << PrintReg(SVI.SpillReg) << ':'
- << SVI.SpillVNI->id << '@' << SVI.SpillVNI->def << '\n';
- });
- SibValues.insert(std::make_pair(UseVNI, SVI));
- return SVI.DefMI;
+ // Look up the value we were looking for. We already did this lokup at the
+ // top of the function, but SibValues may have been invalidated.
+ SVI = SibValues.find(UseVNI);
+ assert(SVI != SibValues.end() && "Didn't compute requested info");
+ DEBUG(dbgs() << " traced to:\t" << SVI->second);
+ return SVI->second.DefMI;
}
/// analyzeSiblingValues - Trace values defined by sibling copies back to
@@ -506,6 +703,7 @@ bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) {
// Already spilled everywhere.
if (SVI.AllDefsAreReloads) {
+ DEBUG(dbgs() << "\tno spill needed: " << SVI);
++NumOmitReloadSpill;
return true;
}
@@ -531,10 +729,8 @@ bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) {
VRM.addSpillSlotUse(StackSlot, MII);
DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII);
- if (MBB == CopyMI->getParent())
- ++NumHoistLocal;
- else
- ++NumHoistGlobal;
+ ++NumSpills;
+ ++NumHoists;
return true;
}
@@ -589,7 +785,8 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
// eliminateDeadDefs won't normally remove stores, so switch opcode.
MI->setDesc(TII.get(TargetOpcode::KILL));
DeadDefs.push_back(MI);
- ++NumRedundantSpills;
+ ++NumSpillsRemoved;
+ --NumSpills;
}
}
} while (!WorkList.empty());
@@ -637,7 +834,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
MachineBasicBlock::iterator MI) {
SlotIndex UseIdx = LIS.getInstructionIndex(MI).getUseIndex();
- VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx);
+ VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx.getBaseIndex());
if (!ParentVNI) {
DEBUG(dbgs() << "\tadding <undef> flags: ");
@@ -787,10 +984,10 @@ void InlineSpiller::reMaterializeAll() {
/// If MI is a load or store of StackSlot, it can be removed.
bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) {
int FI = 0;
- unsigned InstrReg;
- if (!(InstrReg = TII.isLoadFromStackSlot(MI, FI)) &&
- !(InstrReg = TII.isStoreToStackSlot(MI, FI)))
- return false;
+ unsigned InstrReg = TII.isLoadFromStackSlot(MI, FI);
+ bool IsLoad = InstrReg;
+ if (!IsLoad)
+ InstrReg = TII.isStoreToStackSlot(MI, FI);
// We have a stack access. Is it the right register and slot?
if (InstrReg != Reg || FI != StackSlot)
@@ -799,6 +996,15 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) {
DEBUG(dbgs() << "Coalescing stack access: " << *MI);
LIS.RemoveMachineInstrFromMaps(MI);
MI->eraseFromParent();
+
+ if (IsLoad) {
+ ++NumReloadsRemoved;
+ --NumReloads;
+ } else {
+ ++NumSpillsRemoved;
+ --NumSpills;
+ }
+
return true;
}
@@ -810,6 +1016,7 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) {
bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
const SmallVectorImpl<unsigned> &Ops,
MachineInstr *LoadMI) {
+ bool WasCopy = MI->isCopy();
// TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
// operands.
SmallVector<unsigned, 8> FoldOps;
@@ -839,7 +1046,12 @@ bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
VRM.addSpillSlotUse(StackSlot, FoldMI);
MI->eraseFromParent();
DEBUG(dbgs() << "\tfolded: " << *FoldMI);
- ++NumFolded;
+ if (!WasCopy)
+ ++NumFolded;
+ else if (Ops.front() == 0)
+ ++NumSpills;
+ else
+ ++NumReloads;
return true;
}
@@ -975,8 +1187,16 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI);
// FIXME: Use a second vreg if instruction has no tied ops.
- if (Writes && hasLiveDef)
+ if (Writes) {
+ if (hasLiveDef)
insertSpill(NewLI, OldLI, Idx, MI);
+ else {
+ // This instruction defines a dead value. We don't need to spill it,
+ // but do create a live range for the dead value.
+ VNInfo *VNI = NewLI.getNextValue(Idx, 0, LIS.getVNInfoAllocator());
+ NewLI.addRange(LiveRange(Idx, Idx.getNextSlot(), VNI));
+ }
+ }
DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
}
diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp
index a09bb39..29b47bd 100644
--- a/lib/CodeGen/InterferenceCache.cpp
+++ b/lib/CodeGen/InterferenceCache.cpp
@@ -18,10 +18,13 @@
using namespace llvm;
+// Static member used for null interference cursors.
+InterferenceCache::BlockInterference InterferenceCache::Cursor::NoInterference;
+
void InterferenceCache::init(MachineFunction *mf,
LiveIntervalUnion *liuarray,
SlotIndexes *indexes,
- const TargetRegisterInfo *tri) {
+ const TargetRegisterInfo *tri) {
MF = mf;
LIUArray = liuarray;
TRI = tri;
diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h
index 7f0a27a..4df0a9e 100644
--- a/lib/CodeGen/InterferenceCache.h
+++ b/lib/CodeGen/InterferenceCache.h
@@ -138,6 +138,7 @@ public:
class Cursor {
Entry *CacheEntry;
BlockInterference *Current;
+ static BlockInterference NoInterference;
void setEntry(Entry *E) {
Current = 0;
@@ -175,7 +176,7 @@ public:
/// moveTo - Move cursor to basic block MBBNum.
void moveToBlock(unsigned MBBNum) {
- Current = CacheEntry->get(MBBNum);
+ Current = CacheEntry ? CacheEntry->get(MBBNum) : &NoInterference;
}
/// hasInterference - Return true if the current block has any interference.
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 611886f..0f92c2d 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -27,7 +27,7 @@ using namespace llvm;
template <class ArgIt>
static void EnsureFunctionExists(Module &M, const char *Name,
ArgIt ArgBegin, ArgIt ArgEnd,
- const Type *RetTy) {
+ Type *RetTy) {
// Insert a correctly-typed definition now.
std::vector<Type *> ParamTys;
for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
@@ -64,7 +64,7 @@ static void EnsureFPIntrinsicsExist(Module &M, Function *Fn,
template <class ArgIt>
static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
ArgIt ArgBegin, ArgIt ArgEnd,
- const Type *RetTy) {
+ Type *RetTy) {
// If we haven't already looked up this function, check to see if the
// program already contains a function with this name.
Module *M = CI->getParent()->getParent()->getParent();
@@ -462,7 +462,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
break; // Strip out annotate intrinsic
case Intrinsic::memcpy: {
- const IntegerType *IntPtr = TD.getIntPtrType(Context);
+ IntegerType *IntPtr = TD.getIntPtrType(Context);
Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
/* isSigned */ false);
Value *Ops[3];
@@ -473,7 +473,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
break;
}
case Intrinsic::memmove: {
- const IntegerType *IntPtr = TD.getIntPtrType(Context);
+ IntegerType *IntPtr = TD.getIntPtrType(Context);
Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
/* isSigned */ false);
Value *Ops[3];
@@ -484,7 +484,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
break;
}
case Intrinsic::memset: {
- const IntegerType *IntPtr = TD.getIntPtrType(Context);
+ IntegerType *IntPtr = TD.getIntPtrType(Context);
Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
/* isSigned */ false);
Value *Ops[3];
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index f985af8..80ecc22 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -27,16 +27,18 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetAsmInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
namespace llvm {
@@ -55,8 +57,12 @@ static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden,
cl::desc("Disable code placement"));
static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
cl::desc("Disable Stack Slot Coloring"));
+static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden,
+ cl::desc("Disable Machine Dead Code Elimination"));
static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
cl::desc("Disable Machine LICM"));
+static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden,
+ cl::desc("Disable Machine Common Subexpression Elimination"));
static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm",
cl::Hidden,
cl::desc("Disable Machine LICM"));
@@ -103,20 +109,17 @@ EnableFastISelOption("fast-isel", cl::Hidden,
cl::desc("Enable the \"fast\" instruction selector"));
LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
- StringRef CPU, StringRef FS)
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
: TargetMachine(T, Triple, CPU, FS) {
+ CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM);
AsmInfo = T.createMCAsmInfo(Triple);
-}
-
-// Set the default code model for the JIT for a generic target.
-// FIXME: Is small right here? or .is64Bit() ? Large : Small?
-void LLVMTargetMachine::setCodeModelForJIT() {
- setCodeModel(CodeModel::Small);
-}
-
-// Set the default code model for static compilation for a generic target.
-void LLVMTargetMachine::setCodeModelForStatic() {
- setCodeModel(CodeModel::Small);
+ // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0,
+ // and if the old one gets included then MCAsmInfo will be NULL and
+ // we'll crash later.
+ // Provide the user with a useful error message about what's wrong.
+ assert(AsmInfo && "MCAsmInfo not initialized."
+ "Make sure you include the correct TargetSelect.h!");
}
bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
@@ -134,21 +137,22 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
Context->setAllowTemporaryLabels(false);
const MCAsmInfo &MAI = *getMCAsmInfo();
+ const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
OwningPtr<MCStreamer> AsmStreamer;
switch (FileType) {
default: return true;
case CGFT_AssemblyFile: {
MCInstPrinter *InstPrinter =
- getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI);
+ getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, STI);
// Create a code emitter if asked to show the encoding.
MCCodeEmitter *MCE = 0;
- TargetAsmBackend *TAB = 0;
+ MCAsmBackend *MAB = 0;
if (ShowMCEncoding) {
const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
- MCE = getTarget().createCodeEmitter(*getInstrInfo(), STI, *Context);
- TAB = getTarget().createAsmBackend(getTargetTriple());
+ MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), STI, *Context);
+ MAB = getTarget().createMCAsmBackend(getTargetTriple());
}
MCStreamer *S = getTarget().createAsmStreamer(*Context, Out,
@@ -156,7 +160,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
hasMCUseLoc(),
hasMCUseCFI(),
InstPrinter,
- MCE, TAB,
+ MCE, MAB,
ShowMCInst);
AsmStreamer.reset(S);
break;
@@ -164,17 +168,16 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
case CGFT_ObjectFile: {
// Create the code emitter for the target if it exists. If not, .o file
// emission fails.
- const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
- MCCodeEmitter *MCE = getTarget().createCodeEmitter(*getInstrInfo(), STI,
- *Context);
- TargetAsmBackend *TAB = getTarget().createAsmBackend(getTargetTriple());
- if (MCE == 0 || TAB == 0)
+ MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), STI,
+ *Context);
+ MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple());
+ if (MCE == 0 || MAB == 0)
return true;
- AsmStreamer.reset(getTarget().createObjectStreamer(getTargetTriple(),
- *Context, *TAB, Out, MCE,
- hasMCRelaxAll(),
- hasMCNoExecStack()));
+ AsmStreamer.reset(getTarget().createMCObjectStreamer(getTargetTriple(),
+ *Context, *MAB, Out,
+ MCE, hasMCRelaxAll(),
+ hasMCNoExecStack()));
AsmStreamer.get()->InitSections();
break;
}
@@ -198,8 +201,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
PM.add(Printer);
- // Make sure the code model is set.
- setCodeModelForStatic();
PM.add(createGCInfoDeleter());
return false;
}
@@ -214,9 +215,6 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
JITCodeEmitter &JCE,
CodeGenOpt::Level OptLevel,
bool DisableVerify) {
- // Make sure the code model is set.
- setCodeModelForJIT();
-
// Add common CodeGen passes.
MCContext *Ctx = 0;
if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx))
@@ -248,16 +246,16 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
// Create the code emitter for the target if it exists. If not, .o file
// emission fails.
const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
- MCCodeEmitter *MCE = getTarget().createCodeEmitter(*getInstrInfo(),STI, *Ctx);
- TargetAsmBackend *TAB = getTarget().createAsmBackend(getTargetTriple());
- if (MCE == 0 || TAB == 0)
+ MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(),STI, *Ctx);
+ MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple());
+ if (MCE == 0 || MAB == 0)
return true;
OwningPtr<MCStreamer> AsmStreamer;
- AsmStreamer.reset(getTarget().createObjectStreamer(getTargetTriple(), *Ctx,
- *TAB, Out, MCE,
- hasMCRelaxAll(),
- hasMCNoExecStack()));
+ AsmStreamer.reset(getTarget().createMCObjectStreamer(getTargetTriple(), *Ctx,
+ *MAB, Out, MCE,
+ hasMCRelaxAll(),
+ hasMCNoExecStack()));
AsmStreamer.get()->InitSections();
// Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
@@ -270,9 +268,6 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
PM.add(Printer);
- // Make sure the code model is set.
- setCodeModelForJIT();
-
return false; // success!
}
@@ -369,8 +364,9 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
// Install a MachineModuleInfo class, which is an immutable pass that holds
// all the per-module stuff we're generating, including MCContext.
- TargetAsmInfo *TAI = new TargetAsmInfo(*this);
- MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(), TAI);
+ MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(),
+ *getRegisterInfo(),
+ &getTargetLowering()->getObjFileLowering());
PM.add(MMI);
OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref.
@@ -412,12 +408,14 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
// there is one known exception: lowered code for arguments that are only
// used by tail calls, where the tail calls reuse the incoming stack
// arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
- PM.add(createDeadMachineInstructionElimPass());
+ if (!DisableMachineDCE)
+ PM.add(createDeadMachineInstructionElimPass());
printAndVerify(PM, "After codegen DCE pass");
if (!DisableMachineLICM)
PM.add(createMachineLICMPass());
- PM.add(createMachineCSEPass());
+ if (!DisableMachineCSE)
+ PM.add(createMachineCSEPass());
if (!DisableMachineSink)
PM.add(createMachineSinkingPass());
printAndVerify(PM, "After Machine LICM, CSE and Sinking passes");
@@ -452,8 +450,8 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
if (addPostRegAlloc(PM, OptLevel))
printAndVerify(PM, "After PostRegAlloc passes");
- PM.add(createLowerSubregsPass());
- printAndVerify(PM, "After LowerSubregs");
+ PM.add(createExpandPostRAPseudosPass());
+ printAndVerify(PM, "After ExpandPostRAPseudos");
// Insert prolog/epilog code. Eliminate abstract frame index references...
PM.add(createPrologEpilogCodeInserter());
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
new file mode 100644
index 0000000..a12e1a3
--- /dev/null
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -0,0 +1,335 @@
+//===- LexicalScopes.cpp - Collecting lexical scope info ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements LexicalScopes analysis.
+//
+// This pass collects lexical scope information and maps machine instructions
+// to respective lexical scopes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lexicalscopes"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/Function.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+LexicalScopes::~LexicalScopes() {
+ releaseMemory();
+}
+
+/// releaseMemory - release memory.
+void LexicalScopes::releaseMemory() {
+ MF = NULL;
+ CurrentFnLexicalScope = NULL;
+ DeleteContainerSeconds(LexicalScopeMap);
+ DeleteContainerSeconds(AbstractScopeMap);
+ InlinedLexicalScopeMap.clear();
+ AbstractScopesList.clear();
+}
+
+/// initialize - Scan machine function and constuct lexical scope nest.
+void LexicalScopes::initialize(const MachineFunction &Fn) {
+ releaseMemory();
+ MF = &Fn;
+ SmallVector<InsnRange, 4> MIRanges;
+ DenseMap<const MachineInstr *, LexicalScope *> MI2ScopeMap;
+ extractLexicalScopes(MIRanges, MI2ScopeMap);
+ if (CurrentFnLexicalScope) {
+ constructScopeNest(CurrentFnLexicalScope);
+ assignInstructionRanges(MIRanges, MI2ScopeMap);
+ }
+}
+
+/// extractLexicalScopes - Extract instruction ranges for each lexical scopes
+/// for the given machine function.
+void LexicalScopes::
+extractLexicalScopes(SmallVectorImpl<InsnRange> &MIRanges,
+ DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) {
+
+ // Scan each instruction and create scopes. First build working set of scopes.
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ const MachineInstr *RangeBeginMI = NULL;
+ const MachineInstr *PrevMI = NULL;
+ DebugLoc PrevDL;
+ for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+ II != IE; ++II) {
+ const MachineInstr *MInsn = II;
+
+ // Check if instruction has valid location information.
+ const DebugLoc MIDL = MInsn->getDebugLoc();
+ if (MIDL.isUnknown()) {
+ PrevMI = MInsn;
+ continue;
+ }
+
+ // If scope has not changed then skip this instruction.
+ if (MIDL == PrevDL) {
+ PrevMI = MInsn;
+ continue;
+ }
+
+ // Ignore DBG_VALUE. It does not contribute to any instruction in output.
+ if (MInsn->isDebugValue())
+ continue;
+
+ if (RangeBeginMI) {
+ // If we have already seen a beginning of an instruction range and
+ // current instruction scope does not match scope of first instruction
+ // in this range then create a new instruction range.
+ InsnRange R(RangeBeginMI, PrevMI);
+ MI2ScopeMap[RangeBeginMI] = getOrCreateLexicalScope(PrevDL);
+ MIRanges.push_back(R);
+ }
+
+ // This is a beginning of a new instruction range.
+ RangeBeginMI = MInsn;
+
+ // Reset previous markers.
+ PrevMI = MInsn;
+ PrevDL = MIDL;
+ }
+
+ // Create last instruction range.
+ if (RangeBeginMI && PrevMI && !PrevDL.isUnknown()) {
+ InsnRange R(RangeBeginMI, PrevMI);
+ MIRanges.push_back(R);
+ MI2ScopeMap[RangeBeginMI] = getOrCreateLexicalScope(PrevDL);
+ }
+ }
+}
+
+/// findLexicalScope - Find lexical scope, either regular or inlined, for the
+/// given DebugLoc. Return NULL if not found.
+LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) {
+ MDNode *Scope = NULL;
+ MDNode *IA = NULL;
+ DL.getScopeAndInlinedAt(Scope, IA, MF->getFunction()->getContext());
+ if (!Scope) return NULL;
+
+ // The scope that we were created with could have an extra file - which
+ // isn't what we care about in this case.
+ DIDescriptor D = DIDescriptor(Scope);
+ if (D.isLexicalBlockFile())
+ Scope = DILexicalBlockFile(Scope).getScope();
+
+ if (IA)
+ return InlinedLexicalScopeMap.lookup(DebugLoc::getFromDILocation(IA));
+ return LexicalScopeMap.lookup(Scope);
+}
+
+/// getOrCreateLexicalScope - Find lexical scope for the given DebugLoc. If
+/// not available then create new lexical scope.
+LexicalScope *LexicalScopes::getOrCreateLexicalScope(DebugLoc DL) {
+ MDNode *Scope = NULL;
+ MDNode *InlinedAt = NULL;
+ DL.getScopeAndInlinedAt(Scope, InlinedAt, MF->getFunction()->getContext());
+
+ if (InlinedAt) {
+ // Create an abstract scope for inlined function.
+ getOrCreateAbstractScope(Scope);
+ // Create an inlined scope for inlined function.
+ return getOrCreateInlinedScope(Scope, InlinedAt);
+ }
+
+ return getOrCreateRegularScope(Scope);
+}
+
+/// getOrCreateRegularScope - Find or create a regular lexical scope.
+LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) {
+ DIDescriptor D = DIDescriptor(Scope);
+ if (D.isLexicalBlockFile()) {
+ Scope = DILexicalBlockFile(Scope).getScope();
+ D = DIDescriptor(Scope);
+ }
+
+ LexicalScope *WScope = LexicalScopeMap.lookup(Scope);
+ if (WScope)
+ return WScope;
+
+ LexicalScope *Parent = NULL;
+ if (D.isLexicalBlock())
+ Parent = getOrCreateLexicalScope(DebugLoc::getFromDILexicalBlock(Scope));
+ WScope = new LexicalScope(Parent, DIDescriptor(Scope), NULL, false);
+ LexicalScopeMap.insert(std::make_pair(Scope, WScope));
+ if (!Parent && DIDescriptor(Scope).isSubprogram()
+ && DISubprogram(Scope).describes(MF->getFunction()))
+ CurrentFnLexicalScope = WScope;
+
+ return WScope;
+}
+
+/// getOrCreateInlinedScope - Find or create an inlined lexical scope.
+LexicalScope *LexicalScopes::getOrCreateInlinedScope(MDNode *Scope,
+ MDNode *InlinedAt) {
+ LexicalScope *InlinedScope = LexicalScopeMap.lookup(InlinedAt);
+ if (InlinedScope)
+ return InlinedScope;
+
+ DebugLoc InlinedLoc = DebugLoc::getFromDILocation(InlinedAt);
+ InlinedScope = new LexicalScope(getOrCreateLexicalScope(InlinedLoc),
+ DIDescriptor(Scope), InlinedAt, false);
+ InlinedLexicalScopeMap[InlinedLoc] = InlinedScope;
+ LexicalScopeMap[InlinedAt] = InlinedScope;
+ return InlinedScope;
+}
+
+/// getOrCreateAbstractScope - Find or create an abstract lexical scope.
+LexicalScope *LexicalScopes::getOrCreateAbstractScope(const MDNode *N) {
+ assert(N && "Invalid Scope encoding!");
+
+ DIDescriptor Scope(N);
+ if (Scope.isLexicalBlockFile())
+ Scope = DILexicalBlockFile(Scope).getScope();
+ LexicalScope *AScope = AbstractScopeMap.lookup(N);
+ if (AScope)
+ return AScope;
+
+ LexicalScope *Parent = NULL;
+ if (Scope.isLexicalBlock()) {
+ DILexicalBlock DB(N);
+ DIDescriptor ParentDesc = DB.getContext();
+ Parent = getOrCreateAbstractScope(ParentDesc);
+ }
+ AScope = new LexicalScope(Parent, DIDescriptor(N), NULL, true);
+ AbstractScopeMap[N] = AScope;
+ if (DIDescriptor(N).isSubprogram())
+ AbstractScopesList.push_back(AScope);
+ return AScope;
+}
+
+/// constructScopeNest
+void LexicalScopes::constructScopeNest(LexicalScope *Scope) {
+ assert (Scope && "Unable to calculate scop edominance graph!");
+ SmallVector<LexicalScope *, 4> WorkStack;
+ WorkStack.push_back(Scope);
+ unsigned Counter = 0;
+ while (!WorkStack.empty()) {
+ LexicalScope *WS = WorkStack.back();
+ const SmallVector<LexicalScope *, 4> &Children = WS->getChildren();
+ bool visitedChildren = false;
+ for (SmallVector<LexicalScope *, 4>::const_iterator SI = Children.begin(),
+ SE = Children.end(); SI != SE; ++SI) {
+ LexicalScope *ChildScope = *SI;
+ if (!ChildScope->getDFSOut()) {
+ WorkStack.push_back(ChildScope);
+ visitedChildren = true;
+ ChildScope->setDFSIn(++Counter);
+ break;
+ }
+ }
+ if (!visitedChildren) {
+ WorkStack.pop_back();
+ WS->setDFSOut(++Counter);
+ }
+ }
+}
+
+/// assignInstructionRanges - Find ranges of instructions covered by each
+/// lexical scope.
+void LexicalScopes::
+assignInstructionRanges(SmallVectorImpl<InsnRange> &MIRanges,
+ DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap)
+{
+
+ LexicalScope *PrevLexicalScope = NULL;
+ for (SmallVectorImpl<InsnRange>::const_iterator RI = MIRanges.begin(),
+ RE = MIRanges.end(); RI != RE; ++RI) {
+ const InsnRange &R = *RI;
+ LexicalScope *S = MI2ScopeMap.lookup(R.first);
+ assert (S && "Lost LexicalScope for a machine instruction!");
+ if (PrevLexicalScope && !PrevLexicalScope->dominates(S))
+ PrevLexicalScope->closeInsnRange(S);
+ S->openInsnRange(R.first);
+ S->extendInsnRange(R.second);
+ PrevLexicalScope = S;
+ }
+
+ if (PrevLexicalScope)
+ PrevLexicalScope->closeInsnRange();
+}
+
+/// getMachineBasicBlocks - Populate given set using machine basic blocks which
+/// have machine instructions that belong to lexical scope identified by
+/// DebugLoc.
+void LexicalScopes::
+getMachineBasicBlocks(DebugLoc DL,
+ SmallPtrSet<const MachineBasicBlock*, 4> &MBBs) {
+ MBBs.clear();
+ LexicalScope *Scope = getOrCreateLexicalScope(DL);
+ if (!Scope)
+ return;
+
+ if (Scope == CurrentFnLexicalScope) {
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I)
+ MBBs.insert(I);
+ return;
+ }
+
+ SmallVector<InsnRange, 4> &InsnRanges = Scope->getRanges();
+ for (SmallVector<InsnRange, 4>::iterator I = InsnRanges.begin(),
+ E = InsnRanges.end(); I != E; ++I) {
+ InsnRange &R = *I;
+ MBBs.insert(R.first->getParent());
+ }
+}
+
+/// dominates - Return true if DebugLoc's lexical scope dominates at least one
+/// machine instruction's lexical scope in a given machine basic block.
+bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) {
+ LexicalScope *Scope = getOrCreateLexicalScope(DL);
+ if (!Scope)
+ return false;
+
+ // Current function scope covers all basic blocks in the function.
+ if (Scope == CurrentFnLexicalScope && MBB->getParent() == MF)
+ return true;
+
+ bool Result = false;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ DebugLoc IDL = I->getDebugLoc();
+ if (IDL.isUnknown())
+ continue;
+ if (LexicalScope *IScope = getOrCreateLexicalScope(IDL))
+ if (Scope->dominates(IScope))
+ return true;
+ }
+ return Result;
+}
+
+/// dump - Print data structures.
+void LexicalScope::dump() const {
+#ifndef NDEBUG
+ raw_ostream &err = dbgs();
+ err.indent(IndentLevel);
+ err << "DFSIn: " << DFSIn << " DFSOut: " << DFSOut << "\n";
+ const MDNode *N = Desc;
+ N->dump();
+ if (AbstractScope)
+ err << "Abstract Scope\n";
+
+ IndentLevel += 2;
+ if (!Children.empty())
+ err << "Children ...\n";
+ for (unsigned i = 0, e = Children.size(); i != e; ++i)
+ if (Children[i] != this)
+ Children[i]->dump();
+
+ IndentLevel -= 2;
+#endif
+}
+
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 5d38c83..3dfe4c0 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -25,7 +25,10 @@
#include "llvm/Constants.h"
#include "llvm/Metadata.h"
#include "llvm/Value.h"
+#include "llvm/Analysis/DebugInfo.h"
#include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -44,6 +47,7 @@ static cl::opt<bool>
EnableLDV("live-debug-variables", cl::init(true),
cl::desc("Enable the live debug variables pass"), cl::Hidden);
+STATISTIC(NumInsertedDebugValues, "Number of DBG_VALUEs inserted");
char LiveDebugVariables::ID = 0;
INITIALIZE_PASS_BEGIN(LiveDebugVariables, "livedebugvars",
@@ -67,6 +71,29 @@ LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(0) {
/// LocMap - Map of where a user value is live, and its location.
typedef IntervalMap<SlotIndex, unsigned, 4> LocMap;
+namespace {
+/// UserValueScopes - Keeps track of lexical scopes associated with an
+/// user value's source location.
+class UserValueScopes {
+ DebugLoc DL;
+ LexicalScopes &LS;
+ SmallPtrSet<const MachineBasicBlock *, 4> LBlocks;
+
+public:
+ UserValueScopes(DebugLoc D, LexicalScopes &L) : DL(D), LS(L) {}
+
+ /// dominates - Return true if current scope dominates at least one machine
+ /// instruction in a given machine basic block.
+ bool dominates(MachineBasicBlock *MBB) {
+ if (LBlocks.empty())
+ LS.getMachineBasicBlocks(DL, LBlocks);
+ if (LBlocks.count(MBB) != 0 || LS.dominates(DL, MBB))
+ return true;
+ return false;
+ }
+};
+} // end anonymous namespace
+
/// UserValue - A user value is a part of a debug info user variable.
///
/// A DBG_VALUE instruction notes that (a sub-register of) a virtual register
@@ -179,6 +206,9 @@ public:
LocMap::iterator I = locInts.find(Idx);
if (!I.valid() || I.start() != Idx)
I.insert(Idx, Idx.getNextSlot(), getLocationNo(LocMO));
+ else
+ // A later DBG_VALUE at the same SlotIndex overrides the old location.
+ I.setValue(getLocationNo(LocMO));
}
/// extendDef - Extend the current definition as far as possible down the
@@ -195,7 +225,8 @@ public:
void extendDef(SlotIndex Idx, unsigned LocNo,
LiveInterval *LI, const VNInfo *VNI,
SmallVectorImpl<SlotIndex> *Kills,
- LiveIntervals &LIS, MachineDominatorTree &MDT);
+ LiveIntervals &LIS, MachineDominatorTree &MDT,
+ UserValueScopes &UVS);
/// addDefsFromCopies - The value in LI/LocNo may be copies to other
/// registers. Determine if any of the copies are available at the kill
@@ -213,7 +244,8 @@ public:
/// computeIntervals - Compute the live intervals of all locations after
/// collecting all their def points.
void computeIntervals(MachineRegisterInfo &MRI,
- LiveIntervals &LIS, MachineDominatorTree &MDT);
+ LiveIntervals &LIS, MachineDominatorTree &MDT,
+ UserValueScopes &UVS);
/// renameRegister - Update locations to rewrite OldReg as NewReg:SubIdx.
void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx,
@@ -236,6 +268,9 @@ public:
/// Only first one needs DebugLoc to identify variable's lexical scope
/// in source file.
DebugLoc findDebugLoc();
+
+ /// getDebugLoc - Return DebugLoc of this UserValue.
+ DebugLoc getDebugLoc() { return dl;}
void print(raw_ostream&, const TargetMachine*);
};
} // namespace
@@ -247,6 +282,7 @@ class LDVImpl {
LocMap::Allocator allocator;
MachineFunction *MF;
LiveIntervals *LIS;
+ LexicalScopes LS;
MachineDominatorTree *MDT;
const TargetRegisterInfo *TRI;
@@ -312,8 +348,10 @@ public:
} // namespace
void UserValue::print(raw_ostream &OS, const TargetMachine *TM) {
- if (const MDString *MDS = dyn_cast<MDString>(variable->getOperand(2)))
- OS << "!\"" << MDS->getString() << "\"\t";
+ DIVariable DV(variable);
+ OS << "!\"";
+ DV.printExtendedName(OS);
+ OS << "\"\t";
if (offset)
OS << '+' << offset;
for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) {
@@ -447,10 +485,10 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) {
void UserValue::extendDef(SlotIndex Idx, unsigned LocNo,
LiveInterval *LI, const VNInfo *VNI,
SmallVectorImpl<SlotIndex> *Kills,
- LiveIntervals &LIS, MachineDominatorTree &MDT) {
+ LiveIntervals &LIS, MachineDominatorTree &MDT,
+ UserValueScopes &UVS) {
SmallVector<SlotIndex, 16> Todo;
Todo.push_back(Idx);
-
do {
SlotIndex Start = Todo.pop_back_val();
MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start);
@@ -497,8 +535,11 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo,
continue;
const std::vector<MachineDomTreeNode*> &Children =
MDT.getNode(MBB)->getChildren();
- for (unsigned i = 0, e = Children.size(); i != e; ++i)
- Todo.push_back(LIS.getMBBStartIdx(Children[i]->getBlock()));
+ for (unsigned i = 0, e = Children.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = Children[i]->getBlock();
+ if (UVS.dominates(MBB))
+ Todo.push_back(LIS.getMBBStartIdx(MBB));
+ }
} while (!Todo.empty());
}
@@ -578,7 +619,8 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
void
UserValue::computeIntervals(MachineRegisterInfo &MRI,
LiveIntervals &LIS,
- MachineDominatorTree &MDT) {
+ MachineDominatorTree &MDT,
+ UserValueScopes &UVS) {
SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs;
// Collect all defs to be extended (Skipping undefs).
@@ -597,10 +639,10 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI,
LiveInterval *LI = &LIS.getInterval(Loc.getReg());
const VNInfo *VNI = LI->getVNInfoAt(Idx);
SmallVector<SlotIndex, 16> Kills;
- extendDef(Idx, LocNo, LI, VNI, &Kills, LIS, MDT);
+ extendDef(Idx, LocNo, LI, VNI, &Kills, LIS, MDT, UVS);
addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS);
} else
- extendDef(Idx, LocNo, 0, 0, 0, LIS, MDT);
+ extendDef(Idx, LocNo, 0, 0, 0, LIS, MDT, UVS);
}
// Finally, erase all the undefs.
@@ -613,7 +655,8 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI,
void LDVImpl::computeIntervals() {
for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
- userValues[i]->computeIntervals(MF->getRegInfo(), *LIS, *MDT);
+ UserValueScopes UVS(userValues[i]->getDebugLoc(), LS);
+ userValues[i]->computeIntervals(MF->getRegInfo(), *LIS, *MDT, UVS);
userValues[i]->mapVirtRegs(this);
}
}
@@ -624,6 +667,7 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
MDT = &pass.getAnalysis<MachineDominatorTree>();
TRI = mf.getTarget().getRegisterInfo();
clear();
+ LS.initialize(mf);
DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: "
<< ((Value*)mf.getFunction())->getName()
<< " **********\n");
@@ -631,6 +675,7 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
bool Changed = collectDebugValues(mf);
computeIntervals();
DEBUG(print(dbgs()));
+ LS.releaseMemory();
return Changed;
}
@@ -891,6 +936,7 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx,
const TargetInstrInfo &TII) {
MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS);
MachineOperand &Loc = locations[LocNo];
+ ++NumInsertedDebugValues;
// Frame index locations may require a target callback.
if (Loc.isFI()) {
@@ -921,7 +967,6 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
insertDebugValue(MBB, Start, LocNo, LIS, TII);
-
// This interval may span multiple basic blocks.
// Insert a DBG_VALUE into each one.
while(Stop > MBBEnd) {
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index cfade24..b69945a 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -148,7 +148,6 @@ void LiveInterval::markValNoForDeletion(VNInfo *ValNo) {
/// remaining unused values.
void LiveInterval::RenumberValues(LiveIntervals &lis) {
SmallPtrSet<VNInfo*, 8> Seen;
- bool seenPHIDef = false;
valnos.clear();
for (const_iterator I = begin(), E = end(); I != E; ++I) {
VNInfo *VNI = I->valno;
@@ -157,26 +156,6 @@ void LiveInterval::RenumberValues(LiveIntervals &lis) {
assert(!VNI->isUnused() && "Unused valno used by live range");
VNI->id = (unsigned)valnos.size();
valnos.push_back(VNI);
- VNI->setHasPHIKill(false);
- if (VNI->isPHIDef())
- seenPHIDef = true;
- }
-
- // Recompute phi kill flags.
- if (!seenPHIDef)
- return;
- for (const_vni_iterator I = vni_begin(), E = vni_end(); I != E; ++I) {
- VNInfo *VNI = *I;
- if (!VNI->isPHIDef())
- continue;
- const MachineBasicBlock *PHIBB = lis.getMBBFromIndex(VNI->def);
- assert(PHIBB && "No basic block for phi-def");
- for (MachineBasicBlock::const_pred_iterator PI = PHIBB->pred_begin(),
- PE = PHIBB->pred_end(); PI != PE; ++PI) {
- VNInfo *KVNI = getVNInfoAt(lis.getMBBEndIdx(*PI).getPrevSlot());
- if (KVNI)
- KVNI->setHasPHIKill(true);
- }
}
}
@@ -294,20 +273,20 @@ LiveInterval::addRangeFrom(LiveRange LR, iterator From) {
return ranges.insert(it, LR);
}
-/// extendInBlock - If this interval is live before UseIdx in the basic
-/// block that starts at StartIdx, extend it to be live at UseIdx and return
-/// the value. If there is no live range before UseIdx, return NULL.
-VNInfo *LiveInterval::extendInBlock(SlotIndex StartIdx, SlotIndex UseIdx) {
+/// extendInBlock - If this interval is live before Kill in the basic
+/// block that starts at StartIdx, extend it to be live up to Kill and return
+/// the value. If there is no live range before Kill, return NULL.
+VNInfo *LiveInterval::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) {
if (empty())
return 0;
- iterator I = std::upper_bound(begin(), end(), UseIdx);
+ iterator I = std::upper_bound(begin(), end(), Kill.getPrevSlot());
if (I == begin())
return 0;
--I;
if (I->end <= StartIdx)
return 0;
- if (I->end <= UseIdx)
- extendIntervalEndTo(I, UseIdx.getNextSlot());
+ if (I->end < Kill)
+ extendIntervalEndTo(I, Kill);
return I->valno;
}
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 9257191..b1e202a 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -304,8 +304,19 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// Make sure the first definition is not a partial redefinition. Add an
// <imp-def> of the full register.
- if (MO.getSubReg())
+ // FIXME: LiveIntervals shouldn't modify the code like this. Whoever
+ // created the machine instruction should annotate it with <undef> flags
+ // as needed. Then we can simply assert here. The REG_SEQUENCE lowering
+ // is the main suspect.
+ if (MO.getSubReg()) {
mi->addRegisterDefined(interval.reg);
+ // Mark all defs of interval.reg on this instruction as reading <undef>.
+ for (unsigned i = MOIdx, e = mi->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO2 = mi->getOperand(i);
+ if (MO2.isReg() && MO2.getReg() == interval.reg && MO2.getSubReg())
+ MO2.setIsUndef();
+ }
+ }
MachineInstr *CopyMI = NULL;
if (mi->isCopyLike()) {
@@ -747,6 +758,9 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
// Find all the values used, including PHI kills.
SmallVector<std::pair<SlotIndex, VNInfo*>, 16> WorkList;
+ // Blocks that have already been added to WorkList as live-out.
+ SmallPtrSet<MachineBasicBlock*, 16> LiveOut;
+
// Visit all instructions reading li->reg.
for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li->reg);
MachineInstr *UseMI = I.skipInstruction();) {
@@ -780,8 +794,6 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
VNInfo *VNI = *I;
if (VNI->isUnused())
continue;
- // We may eliminate PHI values, so recompute PHIKill flags.
- VNI->setHasPHIKill(false);
NewLI.addRange(LiveRange(VNI->def, VNI->def.getNextSlot(), VNI));
// A use tied to an early-clobber def ends at the load slot and isn't caught
@@ -804,7 +816,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
SlotIndex BlockStart = getMBBStartIdx(MBB);
// Extend the live range for VNI to be live at Idx.
- if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx)) {
+ if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx.getNextSlot())) {
(void)ExtVNI;
assert(ExtVNI == VNI && "Unexpected existing value number");
// Is this a PHIDef we haven't seen before?
@@ -813,13 +825,12 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
// The PHI is live, make sure the predecessors are live-out.
for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
PE = MBB->pred_end(); PI != PE; ++PI) {
+ if (!LiveOut.insert(*PI))
+ continue;
SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
- VNInfo *PVNI = li->getVNInfoAt(Stop);
// A predecessor is not required to have a live-out value for a PHI.
- if (PVNI) {
- PVNI->setHasPHIKill(true);
+ if (VNInfo *PVNI = li->getVNInfoAt(Stop))
WorkList.push_back(std::make_pair(Stop, PVNI));
- }
}
continue;
}
@@ -831,6 +842,8 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
// Make sure VNI is live-out from the predecessors.
for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
PE = MBB->pred_end(); PI != PE; ++PI) {
+ if (!LiveOut.insert(*PI))
+ continue;
SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
assert(li->getVNInfoAt(Stop) == VNI && "Wrong value out of predecessor");
WorkList.push_back(std::make_pair(Stop, VNI));
diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp
index 70003e7..110fe1e 100644
--- a/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/lib/CodeGen/LiveIntervalUnion.cpp
@@ -91,25 +91,6 @@ LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
OS << '\n';
}
-void LiveIntervalUnion::InterferenceResult::print(raw_ostream &OS,
- const TargetRegisterInfo *TRI) const {
- OS << '[' << start() << ';' << stop() << "):"
- << PrintReg(interference()->reg, TRI);
-}
-
-void LiveIntervalUnion::Query::print(raw_ostream &OS,
- const TargetRegisterInfo *TRI) {
- OS << "Interferences with ";
- LiveUnion->print(OS, TRI);
- InterferenceResult IR = firstInterference();
- while (isInterference(IR)) {
- OS << " ";
- IR.print(OS, TRI);
- OS << '\n';
- nextInterference(IR);
- }
-}
-
#ifndef NDEBUG
// Verify the live intervals in this union and add them to the visited set.
void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) {
@@ -118,114 +99,6 @@ void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) {
}
#endif //!NDEBUG
-// Private interface accessed by Query.
-//
-// Find a pair of segments that intersect, one in the live virtual register
-// (LiveInterval), and the other in this LiveIntervalUnion. The caller (Query)
-// is responsible for advancing the LiveIntervalUnion segments to find a
-// "notable" intersection, which requires query-specific logic.
-//
-// This design assumes only a fast mechanism for intersecting a single live
-// virtual register segment with a set of LiveIntervalUnion segments. This may
-// be ok since most virtual registers have very few segments. If we had a data
-// structure that optimizd MxN intersection of segments, then we would bypass
-// the loop that advances within the LiveInterval.
-//
-// If no intersection exists, set VirtRegI = VirtRegEnd, and set SI to the first
-// segment whose start point is greater than LiveInterval's end point.
-//
-// Assumes that segments are sorted by start position in both
-// LiveInterval and LiveSegments.
-void LiveIntervalUnion::Query::findIntersection(InterferenceResult &IR) const {
- // Search until reaching the end of the LiveUnion segments.
- LiveInterval::iterator VirtRegEnd = VirtReg->end();
- if (IR.VirtRegI == VirtRegEnd)
- return;
- while (IR.LiveUnionI.valid()) {
- // Slowly advance the live virtual reg iterator until we surpass the next
- // segment in LiveUnion.
- //
- // Note: If this is ever used for coalescing of fixed registers and we have
- // a live vreg with thousands of segments, then change this code to use
- // upperBound instead.
- IR.VirtRegI = VirtReg->advanceTo(IR.VirtRegI, IR.LiveUnionI.start());
- if (IR.VirtRegI == VirtRegEnd)
- break; // Retain current (nonoverlapping) LiveUnionI
-
- // VirtRegI may have advanced far beyond LiveUnionI, catch up.
- IR.LiveUnionI.advanceTo(IR.VirtRegI->start);
-
- // Check if no LiveUnionI exists with VirtRegI->Start < LiveUnionI.end
- if (!IR.LiveUnionI.valid())
- break;
- if (IR.LiveUnionI.start() < IR.VirtRegI->end) {
- assert(overlap(*IR.VirtRegI, IR.LiveUnionI) &&
- "upperBound postcondition");
- break;
- }
- }
- if (!IR.LiveUnionI.valid())
- IR.VirtRegI = VirtRegEnd;
-}
-
-// Find the first intersection, and cache interference info
-// (retain segment iterators into both VirtReg and LiveUnion).
-const LiveIntervalUnion::InterferenceResult &
-LiveIntervalUnion::Query::firstInterference() {
- if (CheckedFirstInterference)
- return FirstInterference;
- CheckedFirstInterference = true;
- InterferenceResult &IR = FirstInterference;
- IR.LiveUnionI.setMap(LiveUnion->getMap());
-
- // Quickly skip interference check for empty sets.
- if (VirtReg->empty() || LiveUnion->empty()) {
- IR.VirtRegI = VirtReg->end();
- } else if (VirtReg->beginIndex() < LiveUnion->startIndex()) {
- // VirtReg starts first, perform double binary search.
- IR.VirtRegI = VirtReg->find(LiveUnion->startIndex());
- if (IR.VirtRegI != VirtReg->end())
- IR.LiveUnionI.find(IR.VirtRegI->start);
- } else {
- // LiveUnion starts first, perform double binary search.
- IR.LiveUnionI.find(VirtReg->beginIndex());
- if (IR.LiveUnionI.valid())
- IR.VirtRegI = VirtReg->find(IR.LiveUnionI.start());
- else
- IR.VirtRegI = VirtReg->end();
- }
- findIntersection(FirstInterference);
- assert((IR.VirtRegI == VirtReg->end() || IR.LiveUnionI.valid())
- && "Uninitialized iterator");
- return FirstInterference;
-}
-
-// Treat the result as an iterator and advance to the next interfering pair
-// of segments. This is a plain iterator with no filter.
-bool LiveIntervalUnion::Query::nextInterference(InterferenceResult &IR) const {
- assert(isInterference(IR) && "iteration past end of interferences");
-
- // Advance either the VirtReg or LiveUnion segment to ensure that we visit all
- // unique overlapping pairs.
- if (IR.VirtRegI->end < IR.LiveUnionI.stop()) {
- if (++IR.VirtRegI == VirtReg->end())
- return false;
- }
- else {
- if (!(++IR.LiveUnionI).valid()) {
- IR.VirtRegI = VirtReg->end();
- return false;
- }
- }
- // Short-circuit findIntersection() if possible.
- if (overlap(*IR.VirtRegI, IR.LiveUnionI))
- return true;
-
- // Find the next intersection.
- findIntersection(IR);
- return isInterference(IR);
-}
-
// Scan the vector of interfering virtual registers in this union. Assume it's
// quite small.
bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
@@ -234,64 +107,75 @@ bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
return I != InterferingVRegs.end();
}
-// Count the number of virtual registers in this union that interfere with this
+// Collect virtual registers in this union that interfere with this
// query's live virtual register.
//
-// The number of times that we either advance IR.VirtRegI or call
-// LiveUnion.upperBound() will be no more than the number of holes in
-// VirtReg. So each invocation of collectInterferingVRegs() takes
-// time proportional to |VirtReg Holes| * time(LiveUnion.upperBound()).
+// The query state is one of:
+//
+// 1. CheckedFirstInterference == false: Iterators are uninitialized.
+// 2. SeenAllInterferences == true: InterferingVRegs complete, iterators unused.
+// 3. Iterators left at the last seen intersection.
//
-// For comments on how to speed it up, see Query::findIntersection().
unsigned LiveIntervalUnion::Query::
collectInterferingVRegs(unsigned MaxInterferingRegs) {
- InterferenceResult IR = firstInterference();
- LiveInterval::iterator VirtRegEnd = VirtReg->end();
- LiveInterval *RecentInterferingVReg = NULL;
- if (IR.VirtRegI != VirtRegEnd) while (IR.LiveUnionI.valid()) {
- // Advance the union's iterator to reach an unseen interfering vreg.
- do {
- if (IR.LiveUnionI.value() == RecentInterferingVReg)
- continue;
+ // Fast path return if we already have the desired information.
+ if (SeenAllInterferences || InterferingVRegs.size() >= MaxInterferingRegs)
+ return InterferingVRegs.size();
+
+ // Set up iterators on the first call.
+ if (!CheckedFirstInterference) {
+ CheckedFirstInterference = true;
+
+ // Quickly skip interference check for empty sets.
+ if (VirtReg->empty() || LiveUnion->empty()) {
+ SeenAllInterferences = true;
+ return 0;
+ }
- if (!isSeenInterference(IR.LiveUnionI.value()))
- break;
+ // In most cases, the union will start before VirtReg.
+ VirtRegI = VirtReg->begin();
+ LiveUnionI.setMap(LiveUnion->getMap());
+ LiveUnionI.find(VirtRegI->start);
+ }
- // Cache the most recent interfering vreg to bypass isSeenInterference.
- RecentInterferingVReg = IR.LiveUnionI.value();
+ LiveInterval::iterator VirtRegEnd = VirtReg->end();
+ LiveInterval *RecentReg = 0;
+ while (LiveUnionI.valid()) {
+ assert(VirtRegI != VirtRegEnd && "Reached end of VirtReg");
+
+ // Check for overlapping interference.
+ while (VirtRegI->start < LiveUnionI.stop() &&
+ VirtRegI->end > LiveUnionI.start()) {
+ // This is an overlap, record the interfering register.
+ LiveInterval *VReg = LiveUnionI.value();
+ if (VReg != RecentReg && !isSeenInterference(VReg)) {
+ RecentReg = VReg;
+ InterferingVRegs.push_back(VReg);
+ if (InterferingVRegs.size() >= MaxInterferingRegs)
+ return InterferingVRegs.size();
+ }
+ // This LiveUnion segment is no longer interesting.
+ if (!(++LiveUnionI).valid()) {
+ SeenAllInterferences = true;
+ return InterferingVRegs.size();
+ }
+ }
- } while ((++IR.LiveUnionI).valid());
- if (!IR.LiveUnionI.valid())
- break;
+ // The iterators are now not overlapping, LiveUnionI has been advanced
+ // beyond VirtRegI.
+ assert(VirtRegI->end <= LiveUnionI.start() && "Expected non-overlap");
- // Advance the VirtReg iterator until surpassing the next segment in
- // LiveUnion.
- IR.VirtRegI = VirtReg->advanceTo(IR.VirtRegI, IR.LiveUnionI.start());
- if (IR.VirtRegI == VirtRegEnd)
+ // Advance the iterator that ends first.
+ VirtRegI = VirtReg->advanceTo(VirtRegI, LiveUnionI.start());
+ if (VirtRegI == VirtRegEnd)
break;
- // Check for intersection with the union's segment.
- if (overlap(*IR.VirtRegI, IR.LiveUnionI)) {
-
- if (!IR.LiveUnionI.value()->isSpillable())
- SeenUnspillableVReg = true;
-
- if (InterferingVRegs.size() == MaxInterferingRegs)
- // Leave SeenAllInterferences set to false to indicate that at least one
- // interference exists beyond those we collected.
- return MaxInterferingRegs;
-
- InterferingVRegs.push_back(IR.LiveUnionI.value());
-
- // Cache the most recent interfering vreg to bypass isSeenInterference.
- RecentInterferingVReg = IR.LiveUnionI.value();
- ++IR.LiveUnionI;
-
+ // Detect overlap, handle above.
+ if (VirtRegI->start < LiveUnionI.stop())
continue;
- }
- // VirtRegI may have advanced far beyond LiveUnionI,
- // do a fast intersection test to "catch up"
- IR.LiveUnionI.advanceTo(IR.VirtRegI->start);
+
+ // Still not overlapping. Catch up LiveUnionI.
+ LiveUnionI.advanceTo(VirtRegI->start);
}
SeenAllInterferences = true;
return InterferingVRegs.size();
diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h
index 5e78d5e..5d64d28 100644
--- a/lib/CodeGen/LiveIntervalUnion.h
+++ b/lib/CodeGen/LiveIntervalUnion.h
@@ -59,7 +59,6 @@ public:
// LiveIntervalUnions share an external allocator.
typedef LiveSegments::Allocator Allocator;
- class InterferenceResult;
class Query;
private:
@@ -106,62 +105,13 @@ public:
void verify(LiveVirtRegBitSet& VisitedVRegs);
#endif
- /// Cache a single interference test result in the form of two intersecting
- /// segments. This allows efficiently iterating over the interferences. The
- /// iteration logic is handled by LiveIntervalUnion::Query which may
- /// filter interferences depending on the type of query.
- class InterferenceResult {
- friend class Query;
-
- LiveInterval::iterator VirtRegI; // current position in VirtReg
- SegmentIter LiveUnionI; // current position in LiveUnion
-
- // Internal ctor.
- InterferenceResult(LiveInterval::iterator VRegI, SegmentIter UnionI)
- : VirtRegI(VRegI), LiveUnionI(UnionI) {}
-
- public:
- // Public default ctor.
- InterferenceResult(): VirtRegI(), LiveUnionI() {}
-
- /// start - Return the start of the current overlap.
- SlotIndex start() const {
- return std::max(VirtRegI->start, LiveUnionI.start());
- }
-
- /// stop - Return the end of the current overlap.
- SlotIndex stop() const {
- return std::min(VirtRegI->end, LiveUnionI.stop());
- }
-
- /// interference - Return the register that is interfering here.
- LiveInterval *interference() const { return LiveUnionI.value(); }
-
- // Note: this interface provides raw access to the iterators because the
- // result has no way to tell if it's valid to dereference them.
-
- // Access the VirtReg segment.
- LiveInterval::iterator virtRegPos() const { return VirtRegI; }
-
- // Access the LiveUnion segment.
- const SegmentIter &liveUnionPos() const { return LiveUnionI; }
-
- bool operator==(const InterferenceResult &IR) const {
- return VirtRegI == IR.VirtRegI && LiveUnionI == IR.LiveUnionI;
- }
- bool operator!=(const InterferenceResult &IR) const {
- return !operator==(IR);
- }
-
- void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const;
- };
-
/// Query interferences between a single live virtual register and a live
/// interval union.
class Query {
LiveIntervalUnion *LiveUnion;
LiveInterval *VirtReg;
- InterferenceResult FirstInterference;
+ LiveInterval::iterator VirtRegI; // current position in VirtReg
+ SegmentIter LiveUnionI; // current position in LiveUnion
SmallVector<LiveInterval*,4> InterferingVRegs;
bool CheckedFirstInterference;
bool SeenAllInterferences;
@@ -206,26 +156,8 @@ public:
return *VirtReg;
}
- bool isInterference(const InterferenceResult &IR) const {
- if (IR.VirtRegI != VirtReg->end()) {
- assert(overlap(*IR.VirtRegI, IR.LiveUnionI) &&
- "invalid segment iterators");
- return true;
- }
- return false;
- }
-
// Does this live virtual register interfere with the union?
- bool checkInterference() { return isInterference(firstInterference()); }
-
- // Get the first pair of interfering segments, or a noninterfering result.
- // This initializes the firstInterference_ cache.
- const InterferenceResult &firstInterference();
-
- // Treat the result as an iterator and advance to the next interfering pair
- // of segments. Visiting each unique interfering pairs means that the same
- // VirtReg or LiveUnion segment may be visited multiple times.
- bool nextInterference(InterferenceResult &IR) const;
+ bool checkInterference() { return collectInterferingVRegs(1); }
// Count the virtual registers in this union that interfere with this
// query's live virtual register, up to maxInterferingRegs.
@@ -249,13 +181,9 @@ public:
/// Loop.
bool checkLoopInterference(MachineLoopRange*);
- void print(raw_ostream &OS, const TargetRegisterInfo *TRI);
private:
Query(const Query&); // DO NOT IMPLEMENT
void operator=(const Query&); // DO NOT IMPLEMENT
-
- // Private interface for queries
- void findIntersection(InterferenceResult &IR) const;
};
};
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
new file mode 100644
index 0000000..a7d5af5
--- /dev/null
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -0,0 +1,270 @@
+//===---- LiveRangeCalc.cpp - Calculate live ranges -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the LiveRangeCalc class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "LiveRangeCalc.h"
+#include "llvm/CodeGen/MachineDominators.h"
+
+using namespace llvm;
+
+void LiveRangeCalc::reset(const MachineFunction *MF) {
+ unsigned N = MF->getNumBlockIDs();
+ Seen.clear();
+ Seen.resize(N);
+ LiveOut.resize(N);
+ LiveIn.clear();
+}
+
+
+// Transfer information from the LiveIn vector to the live ranges.
+void LiveRangeCalc::updateLiveIns(VNInfo *OverrideVNI, SlotIndexes *Indexes) {
+ for (SmallVectorImpl<LiveInBlock>::iterator I = LiveIn.begin(),
+ E = LiveIn.end(); I != E; ++I) {
+ if (!I->DomNode)
+ continue;
+ MachineBasicBlock *MBB = I->DomNode->getBlock();
+
+ VNInfo *VNI = OverrideVNI ? OverrideVNI : I->Value;
+ assert(VNI && "No live-in value found");
+
+ SlotIndex Start, End;
+ tie(Start, End) = Indexes->getMBBRange(MBB);
+
+ if (I->Kill.isValid())
+ I->LI->addRange(LiveRange(Start, I->Kill, VNI));
+ else {
+ I->LI->addRange(LiveRange(Start, End, VNI));
+ // The value is live-through, update LiveOut as well. Defer the Domtree
+ // lookup until it is needed.
+ assert(Seen.test(MBB->getNumber()));
+ LiveOut[MBB] = LiveOutPair(VNI, (MachineDomTreeNode *)0);
+ }
+ }
+ LiveIn.clear();
+}
+
+
+void LiveRangeCalc::extend(LiveInterval *LI,
+ SlotIndex Kill,
+ SlotIndexes *Indexes,
+ MachineDominatorTree *DomTree,
+ VNInfo::Allocator *Alloc) {
+ assert(LI && "Missing live range");
+ assert(Kill.isValid() && "Invalid SlotIndex");
+ assert(Indexes && "Missing SlotIndexes");
+ assert(DomTree && "Missing dominator tree");
+
+ MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill.getPrevSlot());
+ assert(Kill && "No MBB at Kill");
+
+ // Is there a def in the same MBB we can extend?
+ if (LI->extendInBlock(Indexes->getMBBStartIdx(KillMBB), Kill))
+ return;
+
+ // Find the single reaching def, or determine if Kill is jointly dominated by
+ // multiple values, and we may need to create even more phi-defs to preserve
+ // VNInfo SSA form. Perform a search for all predecessor blocks where we
+ // know the dominating VNInfo.
+ VNInfo *VNI = findReachingDefs(LI, KillMBB, Kill, Indexes, DomTree);
+
+ // When there were multiple different values, we may need new PHIs.
+ if (!VNI)
+ updateSSA(Indexes, DomTree, Alloc);
+
+ updateLiveIns(VNI, Indexes);
+}
+
+
+// This function is called by a client after using the low-level API to add
+// live-out and live-in blocks. The unique value optimization is not
+// available, SplitEditor::transferValues handles that case directly anyway.
+void LiveRangeCalc::calculateValues(SlotIndexes *Indexes,
+ MachineDominatorTree *DomTree,
+ VNInfo::Allocator *Alloc) {
+ assert(Indexes && "Missing SlotIndexes");
+ assert(DomTree && "Missing dominator tree");
+ updateSSA(Indexes, DomTree, Alloc);
+ updateLiveIns(0, Indexes);
+}
+
+
+VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI,
+ MachineBasicBlock *KillMBB,
+ SlotIndex Kill,
+ SlotIndexes *Indexes,
+ MachineDominatorTree *DomTree) {
+ // Blocks where LI should be live-in.
+ SmallVector<MachineBasicBlock*, 16> WorkList(1, KillMBB);
+
+ // Remember if we have seen more than one value.
+ bool UniqueVNI = true;
+ VNInfo *TheVNI = 0;
+
+ // Using Seen as a visited set, perform a BFS for all reaching defs.
+ for (unsigned i = 0; i != WorkList.size(); ++i) {
+ MachineBasicBlock *MBB = WorkList[i];
+ assert(!MBB->pred_empty() && "Value live-in to entry block?");
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock *Pred = *PI;
+
+ // Is this a known live-out block?
+ if (Seen.test(Pred->getNumber())) {
+ if (VNInfo *VNI = LiveOut[Pred].first) {
+ if (TheVNI && TheVNI != VNI)
+ UniqueVNI = false;
+ TheVNI = VNI;
+ }
+ continue;
+ }
+
+ SlotIndex Start, End;
+ tie(Start, End) = Indexes->getMBBRange(Pred);
+
+ // First time we see Pred. Try to determine the live-out value, but set
+ // it as null if Pred is live-through with an unknown value.
+ VNInfo *VNI = LI->extendInBlock(Start, End);
+ setLiveOutValue(Pred, VNI);
+ if (VNI) {
+ if (TheVNI && TheVNI != VNI)
+ UniqueVNI = false;
+ TheVNI = VNI;
+ continue;
+ }
+
+ // No, we need a live-in value for Pred as well
+ if (Pred != KillMBB)
+ WorkList.push_back(Pred);
+ else
+ // Loopback to KillMBB, so value is really live through.
+ Kill = SlotIndex();
+ }
+ }
+
+ // Transfer WorkList to LiveInBlocks in reverse order.
+ // This ordering works best with updateSSA().
+ LiveIn.clear();
+ LiveIn.reserve(WorkList.size());
+ while(!WorkList.empty())
+ addLiveInBlock(LI, DomTree->getNode(WorkList.pop_back_val()));
+
+ // The kill block may not be live-through.
+ assert(LiveIn.back().DomNode->getBlock() == KillMBB);
+ LiveIn.back().Kill = Kill;
+
+ return UniqueVNI ? TheVNI : 0;
+}
+
+
+// This is essentially the same iterative algorithm that SSAUpdater uses,
+// except we already have a dominator tree, so we don't have to recompute it.
+void LiveRangeCalc::updateSSA(SlotIndexes *Indexes,
+ MachineDominatorTree *DomTree,
+ VNInfo::Allocator *Alloc) {
+ assert(Indexes && "Missing SlotIndexes");
+ assert(DomTree && "Missing dominator tree");
+
+ // Interate until convergence.
+ unsigned Changes;
+ do {
+ Changes = 0;
+ // Propagate live-out values down the dominator tree, inserting phi-defs
+ // when necessary.
+ for (SmallVectorImpl<LiveInBlock>::iterator I = LiveIn.begin(),
+ E = LiveIn.end(); I != E; ++I) {
+ MachineDomTreeNode *Node = I->DomNode;
+ // Skip block if the live-in value has already been determined.
+ if (!Node)
+ continue;
+ MachineBasicBlock *MBB = Node->getBlock();
+ MachineDomTreeNode *IDom = Node->getIDom();
+ LiveOutPair IDomValue;
+
+ // We need a live-in value to a block with no immediate dominator?
+ // This is probably an unreachable block that has survived somehow.
+ bool needPHI = !IDom || !Seen.test(IDom->getBlock()->getNumber());
+
+ // IDom dominates all of our predecessors, but it may not be their
+ // immediate dominator. Check if any of them have live-out values that are
+ // properly dominated by IDom. If so, we need a phi-def here.
+ if (!needPHI) {
+ IDomValue = LiveOut[IDom->getBlock()];
+
+ // Cache the DomTree node that defined the value.
+ if (IDomValue.first && !IDomValue.second)
+ LiveOut[IDom->getBlock()].second = IDomValue.second =
+ DomTree->getNode(Indexes->getMBBFromIndex(IDomValue.first->def));
+
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ LiveOutPair &Value = LiveOut[*PI];
+ if (!Value.first || Value.first == IDomValue.first)
+ continue;
+
+ // Cache the DomTree node that defined the value.
+ if (!Value.second)
+ Value.second =
+ DomTree->getNode(Indexes->getMBBFromIndex(Value.first->def));
+
+ // This predecessor is carrying something other than IDomValue.
+ // It could be because IDomValue hasn't propagated yet, or it could be
+ // because MBB is in the dominance frontier of that value.
+ if (DomTree->dominates(IDom, Value.second)) {
+ needPHI = true;
+ break;
+ }
+ }
+ }
+
+ // The value may be live-through even if Kill is set, as can happen when
+ // we are called from extendRange. In that case LiveOutSeen is true, and
+ // LiveOut indicates a foreign or missing value.
+ LiveOutPair &LOP = LiveOut[MBB];
+
+ // Create a phi-def if required.
+ if (needPHI) {
+ ++Changes;
+ assert(Alloc && "Need VNInfo allocator to create PHI-defs");
+ SlotIndex Start, End;
+ tie(Start, End) = Indexes->getMBBRange(MBB);
+ VNInfo *VNI = I->LI->getNextValue(Start, 0, *Alloc);
+ VNI->setIsPHIDef(true);
+ I->Value = VNI;
+ // This block is done, we know the final value.
+ I->DomNode = 0;
+
+ // Add liveness since updateLiveIns now skips this node.
+ if (I->Kill.isValid())
+ I->LI->addRange(LiveRange(Start, I->Kill, VNI));
+ else {
+ I->LI->addRange(LiveRange(Start, End, VNI));
+ LOP = LiveOutPair(VNI, Node);
+ }
+ } else if (IDomValue.first) {
+ // No phi-def here. Remember incoming value.
+ I->Value = IDomValue.first;
+
+ // If the IDomValue is killed in the block, don't propagate through.
+ if (I->Kill.isValid())
+ continue;
+
+ // Propagate IDomValue if it isn't killed:
+ // MBB is live-out and doesn't define its own value.
+ if (LOP.first == IDomValue.first)
+ continue;
+ ++Changes;
+ LOP = IDomValue;
+ }
+ }
+ } while (Changes);
+}
diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h
new file mode 100644
index 0000000..b8c8585
--- /dev/null
+++ b/lib/CodeGen/LiveRangeCalc.h
@@ -0,0 +1,226 @@
+//===---- LiveRangeCalc.h - Calculate live ranges ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeCalc class can be used to compute live ranges from scratch. It
+// caches information about values in the CFG to speed up repeated operations
+// on the same live range. The cache can be shared by non-overlapping live
+// ranges. SplitKit uses that when computing the live range of split products.
+//
+// A low-level interface is available to clients that know where a variable is
+// live, but don't know which value it has as every point. LiveRangeCalc will
+// propagate values down the dominator tree, and even insert PHI-defs where
+// needed. SplitKit uses this faster interface when possible.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVERANGECALC_H
+#define LLVM_CODEGEN_LIVERANGECALC_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/CodeGen/LiveInterval.h"
+
+namespace llvm {
+
+/// Forward declarations for MachineDominators.h:
+class MachineDominatorTree;
+template <class NodeT> class DomTreeNodeBase;
+typedef DomTreeNodeBase<MachineBasicBlock> MachineDomTreeNode;
+
+class LiveRangeCalc {
+ /// Seen - Bit vector of active entries in LiveOut, also used as a visited
+ /// set by findReachingDefs. One entry per basic block, indexed by block
+ /// number. This is kept as a separate bit vector because it can be cleared
+ /// quickly when switching live ranges.
+ BitVector Seen;
+
+ /// LiveOutPair - A value and the block that defined it. The domtree node is
+ /// redundant, it can be computed as: MDT[Indexes.getMBBFromIndex(VNI->def)].
+ typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair;
+
+ /// LiveOutMap - Map basic blocks to the value leaving the block.
+ typedef IndexedMap<LiveOutPair, MBB2NumberFunctor> LiveOutMap;
+
+ /// LiveOut - Map each basic block where a live range is live out to the
+ /// live-out value and its defining block.
+ ///
+ /// For every basic block, MBB, one of these conditions shall be true:
+ ///
+ /// 1. !Seen.count(MBB->getNumber())
+ /// Blocks without a Seen bit are ignored.
+ /// 2. LiveOut[MBB].second.getNode() == MBB
+ /// The live-out value is defined in MBB.
+ /// 3. forall P in preds(MBB): LiveOut[P] == LiveOut[MBB]
+ /// The live-out value passses through MBB. All predecessors must carry
+ /// the same value.
+ ///
+ /// The domtree node may be null, it can be computed.
+ ///
+ /// The map can be shared by multiple live ranges as long as no two are
+ /// live-out of the same block.
+ LiveOutMap LiveOut;
+
+ /// LiveInBlock - Information about a basic block where a live range is known
+ /// to be live-in, but the value has not yet been determined.
+ struct LiveInBlock {
+ // LI - The live range that is live-in to this block. The algorithms can
+ // handle multiple non-overlapping live ranges simultaneously.
+ LiveInterval *LI;
+
+ // DomNode - Dominator tree node for the block.
+ // Cleared when the final value has been determined and LI has been updated.
+ MachineDomTreeNode *DomNode;
+
+ // Position in block where the live-in range ends, or SlotIndex() if the
+ // range passes through the block. When the final value has been
+ // determined, the range from the block start to Kill will be added to LI.
+ SlotIndex Kill;
+
+ // Live-in value filled in by updateSSA once it is known.
+ VNInfo *Value;
+
+ LiveInBlock(LiveInterval *li, MachineDomTreeNode *node, SlotIndex kill)
+ : LI(li), DomNode(node), Kill(kill), Value(0) {}
+ };
+
+ /// LiveIn - Work list of blocks where the live-in value has yet to be
+ /// determined. This list is typically computed by findReachingDefs() and
+ /// used as a work list by updateSSA(). The low-level interface may also be
+ /// used to add entries directly.
+ SmallVector<LiveInBlock, 16> LiveIn;
+
+ /// findReachingDefs - Assuming that LI is live-in to KillMBB and killed at
+ /// Kill, search for values that can reach KillMBB. All blocks that need LI
+ /// to be live-in are added to LiveIn. If a unique reaching def is found,
+ /// its value is returned, if Kill is jointly dominated by multiple values,
+ /// NULL is returned.
+ VNInfo *findReachingDefs(LiveInterval *LI,
+ MachineBasicBlock *KillMBB,
+ SlotIndex Kill,
+ SlotIndexes *Indexes,
+ MachineDominatorTree *DomTree);
+
+ /// updateSSA - Compute the values that will be live in to all requested
+ /// blocks in LiveIn. Create PHI-def values as required to preserve SSA form.
+ ///
+ /// Every live-in block must be jointly dominated by the added live-out
+ /// blocks. No values are read from the live ranges.
+ void updateSSA(SlotIndexes *Indexes,
+ MachineDominatorTree *DomTree,
+ VNInfo::Allocator *Alloc);
+
+ /// updateLiveIns - Add liveness as specified in the LiveIn vector, using VNI
+ /// as a wildcard value for LiveIn entries without a value.
+ void updateLiveIns(VNInfo *VNI, SlotIndexes*);
+
+public:
+ //===--------------------------------------------------------------------===//
+ // High-level interface.
+ //===--------------------------------------------------------------------===//
+ //
+ // Calculate live ranges from scratch.
+ //
+
+ /// reset - Prepare caches for a new set of non-overlapping live ranges. The
+ /// caches must be reset before attempting calculations with a live range
+ /// that may overlap a previously computed live range, and before the first
+ /// live range in a function. If live ranges are not known to be
+ /// non-overlapping, call reset before each.
+ void reset(const MachineFunction *MF);
+
+ /// calculate - Calculate the live range of a virtual register from its defs
+ /// and uses. LI must be empty with no values.
+ void calculate(LiveInterval *LI,
+ MachineRegisterInfo *MRI,
+ SlotIndexes *Indexes,
+ VNInfo::Allocator *Alloc);
+
+ //===--------------------------------------------------------------------===//
+ // Mid-level interface.
+ //===--------------------------------------------------------------------===//
+ //
+ // Modify existing live ranges.
+ //
+
+ /// extend - Extend the live range of LI to reach Kill.
+ ///
+ /// The existing values in LI must be live so they jointly dominate Kill. If
+ /// Kill is not dominated by a single existing value, PHI-defs are inserted
+ /// as required to preserve SSA form. If Kill is known to be dominated by a
+ /// single existing value, Alloc may be null.
+ void extend(LiveInterval *LI,
+ SlotIndex Kill,
+ SlotIndexes *Indexes,
+ MachineDominatorTree *DomTree,
+ VNInfo::Allocator *Alloc);
+
+ /// extendToUses - Extend the live range of LI to reach all uses.
+ ///
+ /// All uses must be jointly dominated by existing liveness. PHI-defs are
+ /// inserted as needed to preserve SSA form.
+ void extendToUses(LiveInterval *LI,
+ MachineRegisterInfo *MRI,
+ SlotIndexes *Indexes,
+ MachineDominatorTree *DomTree,
+ VNInfo::Allocator *Alloc);
+
+ //===--------------------------------------------------------------------===//
+ // Low-level interface.
+ //===--------------------------------------------------------------------===//
+ //
+ // These functions can be used to compute live ranges where the live-in and
+ // live-out blocks are already known, but the SSA value in each block is
+ // unknown.
+ //
+ // After calling reset(), add known live-out values and known live-in blocks.
+ // Then call calculateValues() to compute the actual value that is
+ // live-in to each block, and add liveness to the live ranges.
+ //
+
+ /// setLiveOutValue - Indicate that VNI is live out from MBB. The
+ /// calculateValues() function will not add liveness for MBB, the caller
+ /// should take care of that.
+ ///
+ /// VNI may be null only if MBB is a live-through block also passed to
+ /// addLiveInBlock().
+ void setLiveOutValue(MachineBasicBlock *MBB, VNInfo *VNI) {
+ Seen.set(MBB->getNumber());
+ LiveOut[MBB] = LiveOutPair(VNI, (MachineDomTreeNode *)0);
+ }
+
+ /// addLiveInBlock - Add a block with an unknown live-in value. This
+ /// function can only be called once per basic block. Once the live-in value
+ /// has been determined, calculateValues() will add liveness to LI.
+ ///
+ /// @param LI The live range that is live-in to the block.
+ /// @param DomNode The domtree node for the block.
+ /// @param Kill Index in block where LI is killed. If the value is
+ /// live-through, set Kill = SLotIndex() and also call
+ /// setLiveOutValue(MBB, 0).
+ void addLiveInBlock(LiveInterval *LI,
+ MachineDomTreeNode *DomNode,
+ SlotIndex Kill = SlotIndex()) {
+ LiveIn.push_back(LiveInBlock(LI, DomNode, Kill));
+ }
+
+ /// calculateValues - Calculate the value that will be live-in to each block
+ /// added with addLiveInBlock. Add PHI-def values as needed to preserve SSA
+ /// form. Add liveness to all live-in blocks up to the Kill point, or the
+ /// whole block for live-through blocks.
+ ///
+ /// Every predecessor of a live-in block must have been given a value with
+ /// setLiveOutValue, the value may be null for live-trough blocks.
+ void calculateValues(SlotIndexes *Indexes,
+ MachineDominatorTree *DomTree,
+ VNInfo::Allocator *Alloc);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index b385fb3..b23f851 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -319,9 +319,12 @@ void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
LiveIntervals &LIS,
const MachineLoopInfo &Loops) {
VirtRegAuxInfo VRAI(MF, LIS, Loops);
+ MachineRegisterInfo &MRI = MF.getRegInfo();
for (iterator I = begin(), E = end(); I != E; ++I) {
LiveInterval &LI = **I;
- VRAI.CalculateRegClass(LI.reg);
+ if (MRI.recomputeRegClass(LI.reg, MF.getTarget()))
+ DEBUG(dbgs() << "Inflated " << PrintReg(LI.reg) << " to "
+ << MRI.getRegClass(LI.reg)->getName() << '\n');
VRAI.CalculateWeightAndHint(LI);
}
}
diff --git a/lib/CodeGen/LiveRangeEdit.h b/lib/CodeGen/LiveRangeEdit.h
index db6740c..9b0a671 100644
--- a/lib/CodeGen/LiveRangeEdit.h
+++ b/lib/CodeGen/LiveRangeEdit.h
@@ -115,7 +115,7 @@ public:
LiveInterval *get(unsigned idx) const { return newRegs_[idx+firstNew_]; }
ArrayRef<LiveInterval*> regs() const {
- return ArrayRef<LiveInterval*>(newRegs_).slice(firstNew_);
+ return makeArrayRef(newRegs_).slice(firstNew_);
}
/// FIXME: Temporary accessors until we can get rid of
diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp
index c75196a..939e795 100644
--- a/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/lib/CodeGen/LiveStackAnalysis.cpp
@@ -44,7 +44,8 @@ void LiveStacks::releaseMemory() {
S2RCMap.clear();
}
-bool LiveStacks::runOnMachineFunction(MachineFunction &) {
+bool LiveStacks::runOnMachineFunction(MachineFunction &MF) {
+ TRI = MF.getTarget().getRegisterInfo();
// FIXME: No analysis is being done right now. We are relying on the
// register allocators to provide the information.
return false;
@@ -61,7 +62,7 @@ LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) {
} else {
// Use the largest common subclass register class.
const TargetRegisterClass *OldRC = S2RCMap[Slot];
- S2RCMap[Slot] = getCommonSubClass(OldRC, RC);
+ S2RCMap[Slot] = TRI->getCommonSubClass(OldRC, RC);
}
return I->second;
}
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 20bad60..2ca90f9 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -662,7 +662,7 @@ void LiveVariables::removeVirtualRegistersKilled(MachineInstr *MI) {
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
bool removed = getVarInfo(Reg).removeKill(MI);
assert(removed && "kill not in register's VarInfo?");
- removed = true;
+ (void)removed;
}
}
}
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 8f0fb46..4c5fe4c 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -571,6 +571,11 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
if (i->getOperand(ni+1).getMBB() == this)
i->getOperand(ni+1).setMBB(NMBB);
+ // Inherit live-ins from the successor
+ for (MachineBasicBlock::livein_iterator I = Succ->livein_begin(),
+ E = Succ->livein_end(); I != E; ++I)
+ NMBB->addLiveIn(*I);
+
// Update LiveVariables.
if (LV) {
// Restore kills of virtual registers that were killed by the terminators.
diff --git a/lib/CodeGen/MachineBlockFrequency.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index 893a320..b92cda9 100644
--- a/lib/CodeGen/MachineBlockFrequency.cpp
+++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -1,4 +1,4 @@
-//====----- MachineBlockFrequency.cpp - Machine Block Frequency Analysis ----====//
+//====----- MachineBlockFrequencyInfo.cpp - Machine Block Frequency Analysis ----====//
//
// The LLVM Compiler Infrastructure
//
@@ -13,47 +13,49 @@
#include "llvm/InitializePasses.h"
#include "llvm/Analysis/BlockFrequencyImpl.h"
-#include "llvm/CodeGen/MachineBlockFrequency.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
using namespace llvm;
-INITIALIZE_PASS_BEGIN(MachineBlockFrequency, "machine-block-freq",
+INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, "machine-block-freq",
"Machine Block Frequency Analysis", true, true)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
-INITIALIZE_PASS_END(MachineBlockFrequency, "machine-block-freq",
+INITIALIZE_PASS_END(MachineBlockFrequencyInfo, "machine-block-freq",
"Machine Block Frequency Analysis", true, true)
-char MachineBlockFrequency::ID = 0;
+char MachineBlockFrequencyInfo::ID = 0;
-MachineBlockFrequency::MachineBlockFrequency() : MachineFunctionPass(ID) {
- initializeMachineBlockFrequencyPass(*PassRegistry::getPassRegistry());
+MachineBlockFrequencyInfo::MachineBlockFrequencyInfo() : MachineFunctionPass(ID) {
+ initializeMachineBlockFrequencyInfoPass(*PassRegistry::getPassRegistry());
MBFI = new BlockFrequencyImpl<MachineBasicBlock, MachineFunction,
MachineBranchProbabilityInfo>();
}
-MachineBlockFrequency::~MachineBlockFrequency() {
+MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() {
delete MBFI;
}
-void MachineBlockFrequency::getAnalysisUsage(AnalysisUsage &AU) const {
+void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineBranchProbabilityInfo>();
AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
}
-bool MachineBlockFrequency::runOnMachineFunction(MachineFunction &F) {
+bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) {
MachineBranchProbabilityInfo &MBPI = getAnalysis<MachineBranchProbabilityInfo>();
MBFI->doFunction(&F, &MBPI);
return false;
}
-/// getblockFreq - Return block frequency. Never return 0, value must be
-/// positive. Please note that initial frequency is equal to 1024. It means that
-/// we should not rely on the value itself, but only on the comparison to the
-/// other block frequencies. We do this to avoid using of floating points.
+/// getblockFreq - Return block frequency. Return 0 if we don't have the
+/// information. Please note that initial frequency is equal to 1024. It means
+/// that we should not rely on the value itself, but only on the comparison to
+/// the other block frequencies. We do this to avoid using of floating points.
///
-uint32_t MachineBlockFrequency::getBlockFreq(MachineBasicBlock *MBB) {
+BlockFrequency MachineBlockFrequencyInfo::
+getBlockFreq(MachineBasicBlock *MBB) const {
return MBFI->getBlockFreq(MBB);
}
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 3a60a37..7eda8c1 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -430,13 +430,24 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
unsigned NewReg = CSMI->getOperand(i).getReg();
if (OldReg == NewReg)
continue;
+
assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
TargetRegisterInfo::isVirtualRegister(NewReg) &&
"Do not CSE physical register defs!");
+
if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
DoCSE = false;
break;
}
+
+ // Don't perform CSE if the result of the old instruction cannot exist
+ // within the register class of the new instruction.
+ const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg);
+ if (!MRI->constrainRegClass(NewReg, OldRC)) {
+ DoCSE = false;
+ break;
+ }
+
CSEPairs.push_back(std::make_pair(OldReg, NewReg));
--NumDefs;
}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index cd25156..20066a0 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -619,7 +619,7 @@ void MachineJumpTableInfo::dump() const { print(dbgs()); }
// MachineConstantPool implementation
//===----------------------------------------------------------------------===//
-const Type *MachineConstantPoolEntry::getType() const {
+Type *MachineConstantPoolEntry::getType() const {
if (isMachineConstantPoolEntry())
return Val.MachineCPVal->getType();
return Val.ConstVal->getType();
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 143a29b..a240667 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -51,7 +51,7 @@ using namespace llvm;
/// explicitly nulled out.
void MachineOperand::AddRegOperandToRegInfo(MachineRegisterInfo *RegInfo) {
assert(isReg() && "Can only add reg operand to use lists");
-
+
// If the reginfo pointer is null, just explicitly null out or next/prev
// pointers, to ensure they are not garbage.
if (RegInfo == 0) {
@@ -59,23 +59,23 @@ void MachineOperand::AddRegOperandToRegInfo(MachineRegisterInfo *RegInfo) {
Contents.Reg.Next = 0;
return;
}
-
+
// Otherwise, add this operand to the head of the registers use/def list.
MachineOperand **Head = &RegInfo->getRegUseDefListHead(getReg());
-
+
// For SSA values, we prefer to keep the definition at the start of the list.
// we do this by skipping over the definition if it is at the head of the
// list.
if (*Head && (*Head)->isDef())
Head = &(*Head)->Contents.Reg.Next;
-
+
Contents.Reg.Next = *Head;
if (Contents.Reg.Next) {
assert(getReg() == Contents.Reg.Next->getReg() &&
"Different regs on the same list!");
Contents.Reg.Next->Contents.Reg.Prev = &Contents.Reg.Next;
}
-
+
Contents.Reg.Prev = Head;
*Head = this;
}
@@ -86,7 +86,7 @@ void MachineOperand::RemoveRegOperandFromRegInfo() {
assert(isOnRegUseList() && "Reg operand is not on a use list");
// Unlink this from the doubly linked list of operands.
MachineOperand *NextOp = Contents.Reg.Next;
- *Contents.Reg.Prev = NextOp;
+ *Contents.Reg.Prev = NextOp;
if (NextOp) {
assert(NextOp->getReg() == getReg() && "Corrupt reg use/def chain!");
NextOp->Contents.Reg.Prev = Contents.Reg.Prev;
@@ -97,7 +97,7 @@ void MachineOperand::RemoveRegOperandFromRegInfo() {
void MachineOperand::setReg(unsigned Reg) {
if (getReg() == Reg) return; // No change.
-
+
// Otherwise, we have to change the register. If this operand is embedded
// into a machine function, we need to update the old and new register's
// use/def lists.
@@ -109,7 +109,7 @@ void MachineOperand::setReg(unsigned Reg) {
AddRegOperandToRegInfo(&MF->getRegInfo());
return;
}
-
+
// Otherwise, just change the register, no problem. :)
SmallContents.RegNo = Reg;
}
@@ -144,7 +144,7 @@ void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
if (isReg() && getParent() && getParent()->getParent() &&
getParent()->getParent()->getParent())
RemoveRegOperandFromRegInfo();
-
+
OpKind = MO_Immediate;
Contents.ImmVal = ImmVal;
}
@@ -155,7 +155,7 @@ void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
bool isKill, bool isDead, bool isUndef,
bool isDebug) {
- // If this operand is already a register operand, use setReg to update the
+ // If this operand is already a register operand, use setReg to update the
// register's use/def lists.
if (isReg()) {
assert(!isEarlyClobber());
@@ -189,7 +189,7 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
if (getType() != Other.getType() ||
getTargetFlags() != Other.getTargetFlags())
return false;
-
+
switch (getType()) {
default: llvm_unreachable("Unrecognized operand type");
case MachineOperand::MO_Register:
@@ -322,7 +322,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
default:
llvm_unreachable("Unrecognized operand type");
}
-
+
if (unsigned TF = getTargetFlags())
OS << "[TF=" << TF << ']';
}
@@ -408,7 +408,7 @@ uint64_t MachineMemOperand::getAlignment() const {
raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
assert((MMO.isLoad() || MMO.isStore()) &&
"SV has to be a load, store or both.");
-
+
if (MMO.isVolatile())
OS << "Volatile ";
@@ -417,7 +417,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
if (MMO.isStore())
OS << "ST";
OS << MMO.getSize();
-
+
// Print the address information.
OS << "[";
if (!MMO.getValue())
@@ -464,7 +464,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
/// MachineInstr ctor - This constructor creates a dummy MachineInstr with
/// MCID NULL and no operands.
MachineInstr::MachineInstr()
- : MCID(0), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+ : MCID(0), Flags(0), AsmPrinterFlags(0),
MemRefs(0), MemRefsEnd(0),
Parent(0) {
// Make sure that we get added to a machine basicblock
@@ -484,8 +484,9 @@ void MachineInstr::addImplicitDefUseOperands() {
/// implicit operands. It reserves space for the number of operands specified by
/// the MCInstrDesc.
MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp)
- : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+ : MCID(&tid), Flags(0), AsmPrinterFlags(0),
MemRefs(0), MemRefsEnd(0), Parent(0) {
+ unsigned NumImplicitOps = 0;
if (!NoImp)
NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
Operands.reserve(NumImplicitOps + MCID->getNumOperands());
@@ -498,8 +499,9 @@ MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp)
/// MachineInstr ctor - As above, but with a DebugLoc.
MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl,
bool NoImp)
- : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+ : MCID(&tid), Flags(0), AsmPrinterFlags(0),
MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) {
+ unsigned NumImplicitOps = 0;
if (!NoImp)
NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
Operands.reserve(NumImplicitOps + MCID->getNumOperands());
@@ -510,13 +512,14 @@ MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl,
}
/// MachineInstr ctor - Work exactly the same as the ctor two above, except
-/// that the MachineInstr is created and added to the end of the specified
+/// that the MachineInstr is created and added to the end of the specified
/// basic block.
MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid)
- : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+ : MCID(&tid), Flags(0), AsmPrinterFlags(0),
MemRefs(0), MemRefsEnd(0), Parent(0) {
assert(MBB && "Cannot use inserting ctor with null basic block!");
- NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
+ unsigned NumImplicitOps =
+ MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
Operands.reserve(NumImplicitOps + MCID->getNumOperands());
addImplicitDefUseOperands();
// Make sure that we get added to a machine basicblock
@@ -528,10 +531,11 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid)
///
MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
const MCInstrDesc &tid)
- : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+ : MCID(&tid), Flags(0), AsmPrinterFlags(0),
MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) {
assert(MBB && "Cannot use inserting ctor with null basic block!");
- NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
+ unsigned NumImplicitOps =
+ MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
Operands.reserve(NumImplicitOps + MCID->getNumOperands());
addImplicitDefUseOperands();
// Make sure that we get added to a machine basicblock
@@ -542,7 +546,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
/// MachineInstr ctor - Copies MachineInstr arg exactly
///
MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
- : MCID(&MI.getDesc()), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+ : MCID(&MI.getDesc()), Flags(0), AsmPrinterFlags(0),
MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd),
Parent(0), debugLoc(MI.getDebugLoc()) {
Operands.reserve(MI.getNumOperands());
@@ -550,7 +554,6 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
// Add operands
for (unsigned i = 0; i != MI.getNumOperands(); ++i)
addOperand(MI.getOperand(i));
- NumImplicitOps = MI.NumImplicitOps;
// Copy all the flags.
Flags = MI.Flags;
@@ -605,102 +608,74 @@ void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &RegInfo) {
/// addOperand - Add the specified operand to the instruction. If it is an
/// implicit operand, it is added to the end of the operand list. If it is
/// an explicit operand it is added at the end of the explicit operand list
-/// (before the first implicit operand).
+/// (before the first implicit operand).
void MachineInstr::addOperand(const MachineOperand &Op) {
+ assert(MCID && "Cannot add operands before providing an instr descriptor");
bool isImpReg = Op.isReg() && Op.isImplicit();
- assert((isImpReg || !OperandsComplete()) &&
- "Trying to add an operand to a machine instr that is already done!");
-
MachineRegisterInfo *RegInfo = getRegInfo();
- // If we are adding the operand to the end of the list, our job is simpler.
- // This is true most of the time, so this is a reasonable optimization.
- if (isImpReg || NumImplicitOps == 0) {
- // We can only do this optimization if we know that the operand list won't
- // reallocate.
- if (Operands.empty() || Operands.size()+1 <= Operands.capacity()) {
- Operands.push_back(Op);
-
- // Set the parent of the operand.
- Operands.back().ParentMI = this;
-
- // If the operand is a register, update the operand's use list.
- if (Op.isReg()) {
- Operands.back().AddRegOperandToRegInfo(RegInfo);
- // If the register operand is flagged as early, mark the operand as such
- unsigned OpNo = Operands.size() - 1;
- if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
- Operands[OpNo].setIsEarlyClobber(true);
- }
- return;
+ // If the Operands backing store is reallocated, all register operands must
+ // be removed and re-added to RegInfo. It is storing pointers to operands.
+ bool Reallocate = RegInfo &&
+ !Operands.empty() && Operands.size() == Operands.capacity();
+
+ // Find the insert location for the new operand. Implicit registers go at
+ // the end, everything goes before the implicit regs.
+ unsigned OpNo = Operands.size();
+
+ // Remove all the implicit operands from RegInfo if they need to be shifted.
+ // FIXME: Allow mixed explicit and implicit operands on inline asm.
+ // InstrEmitter::EmitSpecialNode() is marking inline asm clobbers as
+ // implicit-defs, but they must not be moved around. See the FIXME in
+ // InstrEmitter.cpp.
+ if (!isImpReg && !isInlineAsm()) {
+ while (OpNo && Operands[OpNo-1].isReg() && Operands[OpNo-1].isImplicit()) {
+ --OpNo;
+ if (RegInfo)
+ Operands[OpNo].RemoveRegOperandFromRegInfo();
}
}
-
- // Otherwise, we have to insert a real operand before any implicit ones.
- unsigned OpNo = Operands.size()-NumImplicitOps;
- // If this instruction isn't embedded into a function, then we don't need to
- // update any operand lists.
- if (RegInfo == 0) {
- // Simple insertion, no reginfo update needed for other register operands.
- Operands.insert(Operands.begin()+OpNo, Op);
- Operands[OpNo].ParentMI = this;
-
- // Do explicitly set the reginfo for this operand though, to ensure the
- // next/prev fields are properly nulled out.
- if (Operands[OpNo].isReg()) {
- Operands[OpNo].AddRegOperandToRegInfo(0);
- // If the register operand is flagged as early, mark the operand as such
- if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
- Operands[OpNo].setIsEarlyClobber(true);
- }
+ // OpNo now points as the desired insertion point. Unless this is a variadic
+ // instruction, only implicit regs are allowed beyond MCID->getNumOperands().
+ assert((isImpReg || MCID->isVariadic() || OpNo < MCID->getNumOperands()) &&
+ "Trying to add an operand to a machine instr that is already done!");
- } else if (Operands.size()+1 <= Operands.capacity()) {
- // Otherwise, we have to remove register operands from their register use
- // list, add the operand, then add the register operands back to their use
- // list. This also must handle the case when the operand list reallocates
- // to somewhere else.
-
- // If insertion of this operand won't cause reallocation of the operand
- // list, just remove the implicit operands, add the operand, then re-add all
- // the rest of the operands.
- for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
- assert(Operands[i].isReg() && "Should only be an implicit reg!");
- Operands[i].RemoveRegOperandFromRegInfo();
- }
-
- // Add the operand. If it is a register, add it to the reg list.
- Operands.insert(Operands.begin()+OpNo, Op);
- Operands[OpNo].ParentMI = this;
-
- if (Operands[OpNo].isReg()) {
- Operands[OpNo].AddRegOperandToRegInfo(RegInfo);
- // If the register operand is flagged as early, mark the operand as such
- if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
- Operands[OpNo].setIsEarlyClobber(true);
- }
-
- // Re-add all the implicit ops.
- for (unsigned i = OpNo+1, e = Operands.size(); i != e; ++i) {
+ // All operands from OpNo have been removed from RegInfo. If the Operands
+ // backing store needs to be reallocated, we also need to remove any other
+ // register operands.
+ if (Reallocate)
+ for (unsigned i = 0; i != OpNo; ++i)
+ if (Operands[i].isReg())
+ Operands[i].RemoveRegOperandFromRegInfo();
+
+ // Insert the new operand at OpNo.
+ Operands.insert(Operands.begin() + OpNo, Op);
+ Operands[OpNo].ParentMI = this;
+
+ // The Operands backing store has now been reallocated, so we can re-add the
+ // operands before OpNo.
+ if (Reallocate)
+ for (unsigned i = 0; i != OpNo; ++i)
+ if (Operands[i].isReg())
+ Operands[i].AddRegOperandToRegInfo(RegInfo);
+
+ // When adding a register operand, tell RegInfo about it.
+ if (Operands[OpNo].isReg()) {
+ // Add the new operand to RegInfo, even when RegInfo is NULL.
+ // This will initialize the linked list pointers.
+ Operands[OpNo].AddRegOperandToRegInfo(RegInfo);
+ // If the register operand is flagged as early, mark the operand as such.
+ if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
+ Operands[OpNo].setIsEarlyClobber(true);
+ }
+
+ // Re-add all the implicit ops.
+ if (RegInfo) {
+ for (unsigned i = OpNo + 1, e = Operands.size(); i != e; ++i) {
assert(Operands[i].isReg() && "Should only be an implicit reg!");
Operands[i].AddRegOperandToRegInfo(RegInfo);
}
- } else {
- // Otherwise, we will be reallocating the operand list. Remove all reg
- // operands from their list, then readd them after the operand list is
- // reallocated.
- RemoveRegOperandsFromUseLists();
-
- Operands.insert(Operands.begin()+OpNo, Op);
- Operands[OpNo].ParentMI = this;
-
- // Re-add all the operands.
- AddRegOperandsToUseLists(*RegInfo);
-
- // If the register operand is flagged as early, mark the operand as such
- if (Operands[OpNo].isReg()
- && MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
- Operands[OpNo].setIsEarlyClobber(true);
}
}
@@ -709,13 +684,13 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
///
void MachineInstr::RemoveOperand(unsigned OpNo) {
assert(OpNo < Operands.size() && "Invalid operand number");
-
+
// Special case removing the last one.
if (OpNo == Operands.size()-1) {
// If needed, remove from the reg def/use list.
if (Operands.back().isReg() && Operands.back().isOnRegUseList())
Operands.back().RemoveRegOperandFromRegInfo();
-
+
Operands.pop_back();
return;
}
@@ -730,7 +705,7 @@ void MachineInstr::RemoveOperand(unsigned OpNo) {
Operands[i].RemoveRegOperandFromRegInfo();
}
}
-
+
Operands.erase(Operands.begin()+OpNo);
if (RegInfo) {
@@ -827,15 +802,6 @@ void MachineInstr::eraseFromParent() {
}
-/// OperandComplete - Return true if it's illegal to add a new operand
-///
-bool MachineInstr::OperandsComplete() const {
- unsigned short NumOperands = MCID->getNumOperands();
- if (!MCID->isVariadic() && getNumOperands()-NumImplicitOps >= NumOperands)
- return true; // Broken: we have all the operands of this instruction!
- return false;
-}
-
/// getNumExplicitOperands - Returns the number of non-implicit operands.
///
unsigned MachineInstr::getNumExplicitOperands() const {
@@ -860,6 +826,67 @@ bool MachineInstr::isStackAligningInlineAsm() const {
return false;
}
+int MachineInstr::findInlineAsmFlagIdx(unsigned OpIdx,
+ unsigned *GroupNo) const {
+ assert(isInlineAsm() && "Expected an inline asm instruction");
+ assert(OpIdx < getNumOperands() && "OpIdx out of range");
+
+ // Ignore queries about the initial operands.
+ if (OpIdx < InlineAsm::MIOp_FirstOperand)
+ return -1;
+
+ unsigned Group = 0;
+ unsigned NumOps;
+ for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands(); i < e;
+ i += NumOps) {
+ const MachineOperand &FlagMO = getOperand(i);
+ // If we reach the implicit register operands, stop looking.
+ if (!FlagMO.isImm())
+ return -1;
+ NumOps = 1 + InlineAsm::getNumOperandRegisters(FlagMO.getImm());
+ if (i + NumOps > OpIdx) {
+ if (GroupNo)
+ *GroupNo = Group;
+ return i;
+ }
+ ++Group;
+ }
+ return -1;
+}
+
+const TargetRegisterClass*
+MachineInstr::getRegClassConstraint(unsigned OpIdx,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) const {
+ // Most opcodes have fixed constraints in their MCInstrDesc.
+ if (!isInlineAsm())
+ return TII->getRegClass(getDesc(), OpIdx, TRI);
+
+ if (!getOperand(OpIdx).isReg())
+ return NULL;
+
+ // For tied uses on inline asm, get the constraint from the def.
+ unsigned DefIdx;
+ if (getOperand(OpIdx).isUse() && isRegTiedToDefOperand(OpIdx, &DefIdx))
+ OpIdx = DefIdx;
+
+ // Inline asm stores register class constraints in the flag word.
+ int FlagIdx = findInlineAsmFlagIdx(OpIdx);
+ if (FlagIdx < 0)
+ return NULL;
+
+ unsigned Flag = getOperand(FlagIdx).getImm();
+ unsigned RCID;
+ if (InlineAsm::hasRegClassConstraint(Flag, RCID))
+ return TRI->getRegClass(RCID);
+
+ // Assume that all registers in a memory operand are pointers.
+ if (InlineAsm::getKind(Flag) == InlineAsm::Kind_Mem)
+ return TRI->getPointerRegClass();
+
+ return NULL;
+}
+
/// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
/// the specific register or -1 if it is not found. It further tightens
/// the search criteria to a use that kills the register if isKill is true.
@@ -901,7 +928,8 @@ MachineInstr::readsWritesVirtualRegister(unsigned Reg,
Ops->push_back(i);
if (MO.isUse())
Use |= !MO.isUndef();
- else if (MO.getSubReg())
+ else if (MO.getSubReg() && !MO.isUndef())
+ // A partial <def,undef> doesn't count as reading the register.
PartDef = true;
else
FullDef = true;
@@ -941,6 +969,10 @@ MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap,
/// operand list that is used to represent the predicate. It returns -1 if
/// none is found.
int MachineInstr::findFirstPredOperandIdx() const {
+ // Don't call MCID.findFirstPredOperandIdx() because this variant
+ // is sometimes called on an instruction that's not yet complete, and
+ // so the number of operands is less than the MCID indicates. In
+ // particular, the PTX target does this.
const MCInstrDesc &MCID = getDesc();
if (MCID.isPredicable()) {
for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
@@ -950,7 +982,7 @@ int MachineInstr::findFirstPredOperandIdx() const {
return -1;
}
-
+
/// isRegTiedToUseOperand - Given the index of a register def operand,
/// check if the register def is tied to a source operand, due to either
/// two-address elimination or inline assembly constraints. Returns the
@@ -964,23 +996,13 @@ isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
return false;
// Determine the actual operand index that corresponds to this index.
unsigned DefNo = 0;
- unsigned DefPart = 0;
- for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands();
- i < e; ) {
- const MachineOperand &FMO = getOperand(i);
- // After the normal asm operands there may be additional imp-def regs.
- if (!FMO.isImm())
- return false;
- // Skip over this def.
- unsigned NumOps = InlineAsm::getNumOperandRegisters(FMO.getImm());
- unsigned PrevDef = i + 1;
- i = PrevDef + NumOps;
- if (i > DefOpIdx) {
- DefPart = DefOpIdx - PrevDef;
- break;
- }
- ++DefNo;
- }
+ int FlagIdx = findInlineAsmFlagIdx(DefOpIdx, &DefNo);
+ if (FlagIdx < 0)
+ return false;
+
+ // Which part of the group is DefOpIdx?
+ unsigned DefPart = DefOpIdx - (FlagIdx + 1);
+
for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands();
i != e; ++i) {
const MachineOperand &FMO = getOperand(i);
@@ -1024,20 +1046,10 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
return false;
// Find the flag operand corresponding to UseOpIdx
- unsigned FlagIdx, NumOps=0;
- for (FlagIdx = InlineAsm::MIOp_FirstOperand;
- FlagIdx < UseOpIdx; FlagIdx += NumOps+1) {
- const MachineOperand &UFMO = getOperand(FlagIdx);
- // After the normal asm operands there may be additional imp-def regs.
- if (!UFMO.isImm())
- return false;
- NumOps = InlineAsm::getNumOperandRegisters(UFMO.getImm());
- assert(NumOps < getNumOperands() && "Invalid inline asm flag");
- if (UseOpIdx < FlagIdx+NumOps+1)
- break;
- }
- if (FlagIdx >= UseOpIdx)
+ int FlagIdx = findInlineAsmFlagIdx(UseOpIdx);
+ if (FlagIdx < 0)
return false;
+
const MachineOperand &UFMO = getOperand(FlagIdx);
unsigned DefNo;
if (InlineAsm::isUseOperandTiedToDef(UFMO.getImm(), DefNo)) {
@@ -1211,7 +1223,7 @@ bool MachineInstr::hasVolatileMemoryRef() const {
// conservatively assume it wasn't preserved.
if (memoperands_empty())
return true;
-
+
// Check the memory reference information for volatile references.
for (mmo_iterator I = memoperands_begin(), E = memoperands_end(); I != E; ++I)
if ((*I)->isVolatile())
@@ -1318,7 +1330,7 @@ void MachineInstr::dump() const {
dbgs() << " " << *this;
}
-static void printDebugLoc(DebugLoc DL, const MachineFunction *MF,
+static void printDebugLoc(DebugLoc DL, const MachineFunction *MF,
raw_ostream &CommentOS) {
const LLVMContext &Ctx = MF->getFunction()->getContext();
if (!DL.isUnknown()) { // Print source line info.
@@ -1380,7 +1392,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
unsigned AsmDescOp = ~0u;
unsigned AsmOpCount = 0;
- if (isInlineAsm()) {
+ if (isInlineAsm() && e >= InlineAsm::MIOp_FirstOperand) {
// Print asm string.
OS << " ";
getOperand(InlineAsm::MIOp_AsmString).print(OS, TM);
@@ -1451,18 +1463,28 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
OS << '$' << AsmOpCount++;
unsigned Flag = MO.getImm();
switch (InlineAsm::getKind(Flag)) {
- case InlineAsm::Kind_RegUse: OS << ":[reguse]"; break;
- case InlineAsm::Kind_RegDef: OS << ":[regdef]"; break;
- case InlineAsm::Kind_RegDefEarlyClobber: OS << ":[regdef-ec]"; break;
- case InlineAsm::Kind_Clobber: OS << ":[clobber]"; break;
- case InlineAsm::Kind_Imm: OS << ":[imm]"; break;
- case InlineAsm::Kind_Mem: OS << ":[mem]"; break;
- default: OS << ":[??" << InlineAsm::getKind(Flag) << ']'; break;
+ case InlineAsm::Kind_RegUse: OS << ":[reguse"; break;
+ case InlineAsm::Kind_RegDef: OS << ":[regdef"; break;
+ case InlineAsm::Kind_RegDefEarlyClobber: OS << ":[regdef-ec"; break;
+ case InlineAsm::Kind_Clobber: OS << ":[clobber"; break;
+ case InlineAsm::Kind_Imm: OS << ":[imm"; break;
+ case InlineAsm::Kind_Mem: OS << ":[mem"; break;
+ default: OS << ":[??" << InlineAsm::getKind(Flag); break;
+ }
+
+ unsigned RCID = 0;
+ if (InlineAsm::hasRegClassConstraint(Flag, RCID)) {
+ if (TM)
+ OS << ':' << TM->getRegisterInfo()->getRegClass(RCID)->getName();
+ else
+ OS << ":RC" << RCID;
}
unsigned TiedTo = 0;
if (InlineAsm::isUseOperandTiedToDef(Flag, TiedTo))
- OS << " [tiedto:$" << TiedTo << ']';
+ OS << " tiedto:$" << TiedTo;
+
+ OS << ']';
// Compute the index of the next operand descriptor.
AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag);
@@ -1516,7 +1538,19 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
}
// Print debug location information.
- if (!debugLoc.isUnknown() && MF) {
+ if (isDebugValue() && getOperand(e - 1).isMetadata()) {
+ if (!HaveSemi) OS << ";"; HaveSemi = true;
+ DIVariable DV(getOperand(e - 1).getMetadata());
+ OS << " line no:" << DV.getLineNumber();
+ if (MDNode *InlinedAt = DV.getInlinedAt()) {
+ DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt);
+ if (!InlinedAtDL.isUnknown()) {
+ OS << " inlined @[ ";
+ printDebugLoc(InlinedAtDL, MF, OS);
+ OS << " ]";
+ }
+ }
+ } else if (!debugLoc.isUnknown() && MF) {
if (!HaveSemi) OS << ";"; HaveSemi = true;
OS << " dbg:";
printDebugLoc(debugLoc, MF, OS);
@@ -1627,7 +1661,7 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg,
// new implicit operand if required.
if (Found || !AddIfNotFound)
return Found;
-
+
addOperand(MachineOperand::CreateReg(IncomingReg,
true /*IsDef*/,
true /*IsImp*/,
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 722ceb2..a1f80d5 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -37,10 +37,16 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+static cl::opt<bool>
+AvoidSpeculation("avoid-speculation",
+ cl::desc("MachineLICM should avoid speculation"),
+ cl::init(false), cl::Hidden);
+
STATISTIC(NumHoisted,
"Number of machine instructions hoisted out of loops");
STATISTIC(NumLowRP,
@@ -91,6 +97,17 @@ namespace {
// For each opcode, keep a list of potential CSE instructions.
DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap;
+ enum {
+ SpeculateFalse = 0,
+ SpeculateTrue = 1,
+ SpeculateUnknown = 2
+ };
+
+ // If a MBB does not dominate loop exiting blocks then it may not safe
+ // to hoist loads from this block.
+ // Tri-state: 0 - false, 1 - true, 2 - unknown
+ unsigned SpeculationState;
+
public:
static char ID; // Pass identification, replacement for typeid
MachineLICM() :
@@ -194,6 +211,10 @@ namespace {
/// hoist the given loop invariant.
bool IsProfitableToHoist(MachineInstr &MI);
+ /// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute.
+ /// If not then a load from this mbb may not be safe to hoist.
+ bool IsGuaranteedToExecute(MachineBasicBlock *BB);
+
/// HoistRegion - Walk the specified region of the CFG (defined by all
/// blocks dominated by the specified block, and that are in the current
/// loop) in depth first order w.r.t the DominatorTree. This allows us to
@@ -202,6 +223,13 @@ namespace {
///
void HoistRegion(MachineDomTreeNode *N, bool IsHeader = false);
+ /// getRegisterClassIDAndCost - For a given MI, register, and the operand
+ /// index, return the ID and cost of its representative register class by
+ /// reference.
+ void getRegisterClassIDAndCost(const MachineInstr *MI,
+ unsigned Reg, unsigned OpIdx,
+ unsigned &RCId, unsigned &RCCost) const;
+
/// InitRegPressure - Find all virtual register references that are liveout
/// of the preheader to initialize the starting "register pressure". Note
/// this does not count live through (livein but not used) registers.
@@ -229,6 +257,10 @@ namespace {
bool EliminateCSE(MachineInstr *MI,
DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI);
+ /// MayCSE - Return true if the given instruction will be CSE'd if it's
+ /// hoisted out of the loop.
+ bool MayCSE(MachineInstr *MI);
+
/// Hoist - When an instruction is found to only use loop invariant operands
/// that is safe to hoist, this instruction is called to do the dirty work.
/// It returns true if the instruction is hoisted.
@@ -441,6 +473,12 @@ void MachineLICM::HoistRegionPostRA() {
const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks();
for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
MachineBasicBlock *BB = Blocks[i];
+
+ // If the header of the loop containing this basic block is a landing pad,
+ // then don't try to hoist instructions out of this loop.
+ const MachineLoop *ML = MLI->getLoopFor(BB);
+ if (ML && ML->getHeader()->isLandingPad()) continue;
+
// Conservatively treat live-in's as an external def.
// FIXME: That means a reload that're reused in successor block(s) will not
// be LICM'ed.
@@ -452,6 +490,7 @@ void MachineLICM::HoistRegionPostRA() {
++PhysRegDefs[*AS];
}
+ SpeculationState = SpeculateUnknown;
for (MachineBasicBlock::iterator
MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
MachineInstr *MI = &*MII;
@@ -545,6 +584,27 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
Changed = true;
}
+// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute.
+// If not then a load from this mbb may not be safe to hoist.
+bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
+ if (SpeculationState != SpeculateUnknown)
+ return SpeculationState == SpeculateFalse;
+
+ if (BB != CurLoop->getHeader()) {
+ // Check loop exiting blocks.
+ SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
+ CurLoop->getExitingBlocks(CurrentLoopExitingBlocks);
+ for (unsigned i = 0, e = CurrentLoopExitingBlocks.size(); i != e; ++i)
+ if (!DT->dominates(BB, CurrentLoopExitingBlocks[i])) {
+ SpeculationState = SpeculateTrue;
+ return false;
+ }
+ }
+
+ SpeculationState = SpeculateFalse;
+ return true;
+}
+
/// HoistRegion - Walk the specified region of the CFG (defined by all blocks
/// dominated by the specified block, and that are in the current loop) in depth
/// first order w.r.t the DominatorTree. This allows us to visit definitions
@@ -554,6 +614,11 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) {
assert(N != 0 && "Null dominator tree node?");
MachineBasicBlock *BB = N->getBlock();
+ // If the header of the loop containing this basic block is a landing pad,
+ // then don't try to hoist instructions out of this loop.
+ const MachineLoop *ML = MLI->getLoopFor(BB);
+ if (ML && ML->getHeader()->isLandingPad()) return;
+
// If this subregion is not in the top level loop at all, exit.
if (!CurLoop->contains(BB)) return;
@@ -571,6 +636,7 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) {
// Remember livein register pressure.
BackTrace.push_back(RegPressure);
+ SpeculationState = SpeculateUnknown;
for (MachineBasicBlock::iterator
MII = BB->begin(), E = BB->end(); MII != E; ) {
MachineBasicBlock::iterator NextMII = MII; ++NextMII;
@@ -596,6 +662,23 @@ static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
}
+/// getRegisterClassIDAndCost - For a given MI, register, and the operand
+/// index, return the ID and cost of its representative register class.
+void
+MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI,
+ unsigned Reg, unsigned OpIdx,
+ unsigned &RCId, unsigned &RCCost) const {
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ EVT VT = *RC->vt_begin();
+ if (VT == MVT::untyped) {
+ RCId = RC->getID();
+ RCCost = 1;
+ } else {
+ RCId = TLI->getRepRegClassFor(VT)->getID();
+ RCCost = TLI->getRepRegClassCostFor(VT);
+ }
+}
+
/// InitRegPressure - Find all virtual register references that are liveout of
/// the preheader to initialize the starting "register pressure". Note this
/// does not count live through (livein but not used) registers.
@@ -625,18 +708,17 @@ void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
continue;
bool isNew = RegSeen.insert(Reg);
- const TargetRegisterClass *RC = MRI->getRegClass(Reg);
- EVT VT = *RC->vt_begin();
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ unsigned RCId, RCCost;
+ getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
if (MO.isDef())
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ RegPressure[RCId] += RCCost;
else {
bool isKill = isOperandKill(MO, MRI);
if (isNew && !isKill)
// Haven't seen this, it must be a livein.
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ RegPressure[RCId] += RCCost;
else if (!isNew && isKill)
- RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+ RegPressure[RCId] -= RCCost;
}
}
}
@@ -661,11 +743,8 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
if (MO.isDef())
Defs.push_back(Reg);
else if (!isNew && isOperandKill(MO, MRI)) {
- const TargetRegisterClass *RC = MRI->getRegClass(Reg);
- EVT VT = *RC->vt_begin();
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- unsigned RCCost = TLI->getRepRegClassCostFor(VT);
-
+ unsigned RCId, RCCost;
+ getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
if (RCCost > RegPressure[RCId])
RegPressure[RCId] = 0;
else
@@ -673,13 +752,13 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
}
}
+ unsigned Idx = 0;
while (!Defs.empty()) {
unsigned Reg = Defs.pop_back_val();
- const TargetRegisterClass *RC = MRI->getRegClass(Reg);
- EVT VT = *RC->vt_begin();
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+ unsigned RCId, RCCost;
+ getRegisterClassIDAndCost(MI, Reg, Idx, RCId, RCCost);
RegPressure[RCId] += RCCost;
+ ++Idx;
}
}
@@ -691,7 +770,14 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
bool DontMoveAcrossStore = true;
if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore))
return false;
-
+
+ // If it is load then check if it is guaranteed to execute by making sure that
+ // it dominates all exiting blocks. If it doesn't, then there is a path out of
+ // the loop which does not execute this load, so we can't hoist it.
+ // Stores and side effects are already checked by isSafeToMove.
+ if (I.getDesc().mayLoad() && !IsGuaranteedToExecute(I.getParent()))
+ return false;
+
return true;
}
@@ -879,10 +965,8 @@ void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
- const TargetRegisterClass *RC = MRI->getRegClass(Reg);
- EVT VT = *RC->vt_begin();
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+ unsigned RCId, RCCost;
+ getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
if (MO.isDef()) {
DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
if (CI != Cost.end())
@@ -941,16 +1025,15 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
unsigned Reg = MO.getReg();
if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
+
+ unsigned RCId, RCCost;
+ getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost);
if (MO.isDef()) {
if (HasHighOperandLatency(MI, i, Reg)) {
++NumHighLatency;
return true;
}
- const TargetRegisterClass *RC = MRI->getRegClass(Reg);
- EVT VT = *RC->vt_begin();
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- unsigned RCCost = TLI->getRepRegClassCostFor(VT);
DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
if (CI != Cost.end())
CI->second += RCCost;
@@ -960,10 +1043,6 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
// Is a virtual register use is a kill, hoisting it out of the loop
// may actually reduce register pressure or be register pressure
// neutral.
- const TargetRegisterClass *RC = MRI->getRegClass(Reg);
- EVT VT = *RC->vt_begin();
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- unsigned RCCost = TLI->getRepRegClassCostFor(VT);
DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
if (CI != Cost.end())
CI->second -= RCCost;
@@ -979,6 +1058,13 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
return true;
}
+ // Do not "speculate" in high register pressure situation. If an
+ // instruction is not guaranteed to be executed in the loop, it's best to be
+ // conservative.
+ if (AvoidSpeculation &&
+ (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI)))
+ return false;
+
// High register pressure situation, only hoist if the instruction is going to
// be remat'ed.
if (!TII->isTriviallyReMaterializable(&MI, AA) &&
@@ -1116,6 +1202,20 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
return false;
}
+/// MayCSE - Return true if the given instruction will be CSE'd if it's
+/// hoisted out of the loop.
+bool MachineLICM::MayCSE(MachineInstr *MI) {
+ unsigned Opcode = MI->getOpcode();
+ DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
+ CI = CSEMap.find(Opcode);
+ // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
+ // the undef property onto uses.
+ if (CI == CSEMap.end() || MI->isImplicitDef())
+ return false;
+
+ return LookForDuplicate(MI, CI->second) != 0;
+}
+
/// Hoist - When an instruction is found to use only loop invariant operands
/// that are safe to hoist, this instruction is called to do the dirty work.
///
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index fadc594..80c4854 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -17,9 +17,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/Support/Dwarf.h"
@@ -254,11 +252,12 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
//===----------------------------------------------------------------------===//
MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI,
- const TargetAsmInfo *TAI)
-: ImmutablePass(ID), Context(MAI, TAI),
- ObjFileMMI(0),
- CurCallSite(0), CallsEHReturn(0), CallsUnwindInit(0), DbgInfoAvailable(false),
- CallsExternalVAFunctionWithFloatingPointArguments(false) {
+ const MCRegisterInfo &MRI,
+ const MCObjectFileInfo *MOFI)
+ : ImmutablePass(ID), Context(MAI, MRI, MOFI),
+ ObjFileMMI(0), CompactUnwindEncoding(0), CurCallSite(0), CallsEHReturn(0),
+ CallsUnwindInit(0), DbgInfoAvailable(false),
+ CallsExternalVAFunctionWithFloatingPointArguments(false) {
initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
// Always emit some info, by default "no personality" info.
Personalities.push_back(NULL);
@@ -267,7 +266,8 @@ MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI,
}
MachineModuleInfo::MachineModuleInfo()
-: ImmutablePass(ID), Context(*(MCAsmInfo*)0, NULL) {
+ : ImmutablePass(ID),
+ Context(*(MCAsmInfo*)0, *(MCRegisterInfo*)0, (MCObjectFileInfo*)0) {
assert(0 && "This MachineModuleInfo constructor should never be called, MMI "
"should always be explicitly constructed by LLVMTargetMachine");
abort();
@@ -311,6 +311,7 @@ void MachineModuleInfo::EndFunction() {
FilterEnds.clear();
CallsEHReturn = 0;
CallsUnwindInit = 0;
+ CompactUnwindEncoding = 0;
VariableDbgInfo.clear();
}
@@ -426,8 +427,9 @@ void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
/// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
///
-void MachineModuleInfo::addCatchTypeInfo(MachineBasicBlock *LandingPad,
- std::vector<const GlobalVariable *> &TyInfo) {
+void MachineModuleInfo::
+addCatchTypeInfo(MachineBasicBlock *LandingPad,
+ ArrayRef<const GlobalVariable *> TyInfo) {
LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
for (unsigned N = TyInfo.size(); N; --N)
LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1]));
@@ -435,8 +437,9 @@ void MachineModuleInfo::addCatchTypeInfo(MachineBasicBlock *LandingPad,
/// addFilterTypeInfo - Provide the filter typeinfo for a landing pad.
///
-void MachineModuleInfo::addFilterTypeInfo(MachineBasicBlock *LandingPad,
- std::vector<const GlobalVariable *> &TyInfo) {
+void MachineModuleInfo::
+addFilterTypeInfo(MachineBasicBlock *LandingPad,
+ ArrayRef<const GlobalVariable *> TyInfo) {
LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
std::vector<unsigned> IdsInFilter(TyInfo.size());
for (unsigned I = 0, E = TyInfo.size(); I != E; ++I)
@@ -496,6 +499,14 @@ void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
}
}
+/// setCallSiteLandingPad - Map the landing pad's EH symbol to the call site
+/// indexes.
+void MachineModuleInfo::setCallSiteLandingPad(MCSymbol *Sym,
+ ArrayRef<unsigned> Sites) {
+ for (unsigned I = 0, E = Sites.size(); I != E; ++I)
+ LPadToCallSiteMap[Sym].push_back(Sites[I]);
+}
+
/// getTypeIDFor - Return the type id for the specified typeinfo. This is
/// function wide.
unsigned MachineModuleInfo::getTypeIDFor(const GlobalVariable *TI) {
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 4b3e64c..266ebf6 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -14,10 +14,11 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
-MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) {
+MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI)
+ : TRI(&TRI), IsSSA(true) {
VRegInfo.reserve(256);
RegAllocHints.reserve(256);
UsedPhysRegs.resize(TRI.getNumRegs());
@@ -48,18 +49,47 @@ MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
const TargetRegisterClass *
MachineRegisterInfo::constrainRegClass(unsigned Reg,
- const TargetRegisterClass *RC) {
+ const TargetRegisterClass *RC,
+ unsigned MinNumRegs) {
const TargetRegisterClass *OldRC = getRegClass(Reg);
if (OldRC == RC)
return RC;
- const TargetRegisterClass *NewRC = getCommonSubClass(OldRC, RC);
- if (!NewRC)
+ const TargetRegisterClass *NewRC = TRI->getCommonSubClass(OldRC, RC);
+ if (!NewRC || NewRC == OldRC)
+ return NewRC;
+ if (NewRC->getNumRegs() < MinNumRegs)
return 0;
- if (NewRC != OldRC)
- setRegClass(Reg, NewRC);
+ setRegClass(Reg, NewRC);
return NewRC;
}
+bool
+MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) {
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterClass *OldRC = getRegClass(Reg);
+ const TargetRegisterClass *NewRC = TRI->getLargestLegalSuperClass(OldRC);
+
+ // Stop early if there is no room to grow.
+ if (NewRC == OldRC)
+ return false;
+
+ // Accumulate constraints from all uses.
+ for (reg_nodbg_iterator I = reg_nodbg_begin(Reg), E = reg_nodbg_end(); I != E;
+ ++I) {
+ // TRI doesn't have accurate enough information to model this yet.
+ if (I.getOperand().getSubReg())
+ return false;
+ const TargetRegisterClass *OpRC =
+ I->getRegClassConstraint(I.getOperandNo(), TII, TRI);
+ if (OpRC)
+ NewRC = TRI->getCommonSubClass(NewRC, OpRC);
+ if (!NewRC || NewRC == OldRC)
+ return false;
+ }
+ setRegClass(Reg, NewRC);
+ return true;
+}
+
/// createVirtualRegister - Create and return a new virtual register in the
/// function with the specified register class.
///
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 916dff7..29cfb49 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -382,6 +382,25 @@ static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) {
return MI->isInsertSubreg() || MI->isSubregToReg() || MI->isRegSequence();
}
+/// collectDebgValues - Scan instructions following MI and collect any
+/// matching DBG_VALUEs.
+static void collectDebugValues(MachineInstr *MI,
+ SmallVector<MachineInstr *, 2> & DbgValues) {
+ DbgValues.clear();
+ if (!MI->getOperand(0).isReg())
+ return;
+
+ MachineBasicBlock::iterator DI = MI; ++DI;
+ for (MachineBasicBlock::iterator DE = MI->getParent()->end();
+ DI != DE; ++DI) {
+ if (!DI->isDebugValue())
+ return;
+ if (DI->getOperand(0).isReg() &&
+ DI->getOperand(0).getReg() == MI->getOperand(0).getReg())
+ DbgValues.push_back(DI);
+ }
+}
+
/// SinkInstruction - Determine whether it is safe to sink the specified machine
/// instruction out of its current block into a successor.
bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
@@ -598,10 +617,22 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())
++InsertPos;
+ // collect matching debug values.
+ SmallVector<MachineInstr *, 2> DbgValuesToSink;
+ collectDebugValues(MI, DbgValuesToSink);
+
// Move the instruction.
SuccToSinkTo->splice(InsertPos, ParentBlock, MI,
++MachineBasicBlock::iterator(MI));
+ // Move debug values.
+ for (SmallVector<MachineInstr *, 2>::iterator DBI = DbgValuesToSink.begin(),
+ DBE = DbgValuesToSink.end(); DBI != DBE; ++DBI) {
+ MachineInstr *DbgMI = *DBI;
+ SuccToSinkTo->splice(InsertPos, ParentBlock, DbgMI,
+ ++MachineBasicBlock::iterator(DbgMI));
+ }
+
// Conservatively, clear any kill flags, since it's possible that they are no
// longer correct.
MI->clearKillInfo();
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 7a55852..26847d3 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -72,6 +72,8 @@ namespace {
typedef DenseSet<unsigned> RegSet;
typedef DenseMap<unsigned, const MachineInstr*> RegMap;
+ const MachineInstr *FirstTerminator;
+
BitVector regsReserved;
RegSet regsLive;
RegVector regsDefined, regsDead, regsKilled;
@@ -389,6 +391,8 @@ static bool matchPair(MachineBasicBlock::const_succ_iterator i,
void
MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
+ FirstTerminator = 0;
+
// Count the number of landing pad successors.
SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs;
for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
@@ -570,6 +574,18 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
}
}
+ // Ensure non-terminators don't follow terminators.
+ if (MCID.isTerminator()) {
+ if (!FirstTerminator)
+ FirstTerminator = MI;
+ } else if (FirstTerminator) {
+ report("Non-terminator instruction after the first terminator", MI);
+ *OS << "First terminator was:\t" << *FirstTerminator;
+ }
+
+ StringRef ErrorInfo;
+ if (!TII->verifyInstruction(MI, ErrorInfo))
+ report(ErrorInfo.data(), MI);
}
void
@@ -686,6 +702,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
else
addRegWithSubRegs(regsDefined, Reg);
+ // Verify SSA form.
+ if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ llvm::next(MRI->def_begin(Reg)) != MRI->def_end())
+ report("Multiple virtual register defs in SSA form", MO, MONum);
+
// Check LiveInts for a live range, but only for virtual registers.
if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) &&
!LiveInts->isNotInMIMap(MI)) {
@@ -714,20 +735,14 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
unsigned SubIdx = MO->getSubReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- unsigned sr = Reg;
if (SubIdx) {
- unsigned s = TRI->getSubReg(Reg, SubIdx);
- if (!s) {
- report("Invalid subregister index for physical register",
- MO, MONum);
- return;
- }
- sr = s;
+ report("Illegal subregister index for physical register", MO, MONum);
+ return;
}
if (const TargetRegisterClass *DRC = TII->getRegClass(MCID,MONum,TRI)) {
- if (!DRC->contains(sr)) {
+ if (!DRC->contains(Reg)) {
report("Illegal physical register for instruction", MO, MONum);
- *OS << TRI->getName(sr) << " is not a "
+ *OS << TRI->getName(Reg) << " is not a "
<< DRC->getName() << " register.\n";
}
}
@@ -735,16 +750,35 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
// Virtual register.
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
if (SubIdx) {
- const TargetRegisterClass *SRC = RC->getSubRegisterRegClass(SubIdx);
+ const TargetRegisterClass *SRC =
+ TRI->getSubClassWithSubReg(RC, SubIdx);
if (!SRC) {
report("Invalid subregister index for virtual register", MO, MONum);
*OS << "Register class " << RC->getName()
<< " does not support subreg index " << SubIdx << "\n";
return;
}
- RC = SRC;
+ if (RC != SRC) {
+ report("Invalid register class for subregister index", MO, MONum);
+ *OS << "Register class " << RC->getName()
+ << " does not fully support subreg index " << SubIdx << "\n";
+ return;
+ }
}
if (const TargetRegisterClass *DRC = TII->getRegClass(MCID,MONum,TRI)) {
+ if (SubIdx) {
+ const TargetRegisterClass *SuperRC =
+ TRI->getLargestLegalSuperClass(RC);
+ if (!SuperRC) {
+ report("No largest legal super class exists.", MO, MONum);
+ return;
+ }
+ DRC = TRI->getMatchingSuperRegClass(SuperRC, DRC, SubIdx);
+ if (!DRC) {
+ report("No matching super-reg register class.", MO, MONum);
+ return;
+ }
+ }
if (!RC->hasSuperClassEq(DRC)) {
report("Illegal virtual register for instruction", MO, MONum);
*OS << "Expected a " << DRC->getName() << " register, but got a "
@@ -1161,18 +1195,8 @@ void MachineVerifier::verifyLiveIntervals() {
SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI).getPrevSlot();
const VNInfo *PVNI = LI.getVNInfoAt(PEnd);
- if (VNI->isPHIDef() && VNI->def == LiveInts->getMBBStartIdx(MFI)) {
- if (PVNI && !PVNI->hasPHIKill()) {
- report("Value live out of predecessor doesn't have PHIKill", MF);
- *OS << "Valno #" << PVNI->id << " live out of BB#"
- << (*PI)->getNumber() << '@' << PEnd
- << " doesn't have PHIKill, but Valno #" << VNI->id
- << " is PHIDef and defined at the beginning of BB#"
- << MFI->getNumber() << '@' << LiveInts->getMBBStartIdx(MFI)
- << " in " << LI << '\n';
- }
+ if (VNI->isPHIDef() && VNI->def == LiveInts->getMBBStartIdx(MFI))
continue;
- }
if (!PVNI) {
report("Register not marked live out of predecessor", *PI);
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index af65f13..6994aa5 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -109,6 +109,9 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
+ // This pass takes the function out of SSA form.
+ MRI->leaveSSA();
+
// Split critical edges to help the coalescer
if (!DisableEdgeSplitting) {
if (LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>()) {
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index c523e39..bbc7ce2 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -295,7 +295,6 @@ bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI,
if (!DefMI || !DefMI->getDesc().isBitcast())
return false;
- unsigned SrcDef = 0;
unsigned SrcSrc = 0;
NumDefs = DefMI->getDesc().getNumDefs();
NumSrcs = DefMI->getDesc().getNumOperands() - NumDefs;
@@ -308,13 +307,13 @@ bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI,
unsigned Reg = MO.getReg();
if (!Reg)
continue;
- if (MO.isDef())
- SrcDef = Reg;
- else if (SrcSrc)
- // Multiple sources?
- return false;
- else
- SrcSrc = Reg;
+ if (!MO.isDef()) {
+ if (SrcSrc)
+ // Multiple sources?
+ return false;
+ else
+ SrcSrc = Reg;
+ }
}
if (MRI->getRegClass(SrcSrc) != MRI->getRegClass(Def))
@@ -434,6 +433,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (MCID.isBitcast()) {
if (OptimizeBitcastInstr(MI, MBB)) {
// MI is deleted.
+ LocalMIs.erase(MI);
Changed = true;
MII = First ? I->begin() : llvm::next(PMII);
continue;
@@ -441,6 +441,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
} else if (MCID.isCompare()) {
if (OptimizeCmpInstr(MI, MBB)) {
// MI is deleted.
+ LocalMIs.erase(MI);
Changed = true;
MII = First ? I->begin() : llvm::next(PMII);
continue;
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index c04d656..b1d8c97 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -125,8 +125,14 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
LiveVariables::VarInfo& vi = LV->getVarInfo(MO.getReg());
vi.removeKill(MI);
}
+ unsigned Reg = MI->getOperand(0).getReg();
MI->eraseFromParent();
Changed = true;
+
+ // A REG_SEQUENCE may have been expanded into partial definitions.
+ // If this was the last one, mark Reg as implicitly defined.
+ if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI->def_empty(Reg))
+ ImpDefRegs.insert(Reg);
continue;
}
}
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index a901c5f..32c9325 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -29,6 +29,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -54,6 +55,8 @@ INITIALIZE_PASS_END(PEI, "prologepilog",
STATISTIC(NumVirtualFrameRegs, "Number of virtual frame regs encountered");
STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
+STATISTIC(NumBytesStackSpace,
+ "Number of bytes used for stack in all functions");
/// createPrologEpilogCodeInserter - This function returns a pass that inserts
/// prolog and epilog code, and eliminates abstract frame references.
@@ -677,7 +680,9 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
}
// Update frame info to pretend that this is part of the stack...
- MFI->setStackSize(Offset - LocalAreaOffset);
+ int64_t StackSize = Offset - LocalAreaOffset;
+ MFI->setStackSize(StackSize);
+ NumBytesStackSpace += StackSize;
}
/// insertPrologEpilogCode - Scan the function for modified callee saved
@@ -696,6 +701,13 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
if (!I->empty() && I->back().getDesc().isReturn())
TFI.emitEpilogue(Fn, *I);
}
+
+ // Emit additional code that is required to support segmented stacks, if
+ // we've been asked for it. This, when linked with a runtime with support
+ // for segmented stacks (libgcc is one), will result in allocating stack
+ // space in small chunks instead of one large contiguous block.
+ if (EnableSegmentedStacks)
+ TFI.adjustForSegmentedStacks(Fn);
}
/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index 5ea26ad..5496d69 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -20,7 +20,6 @@
#include "RenderMachineFunction.h"
#include "Spiller.h"
#include "VirtRegMap.h"
-#include "RegisterCoalescer.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -160,7 +159,7 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<LiveDebugVariables>();
if (StrongPHIElim)
AU.addRequiredID(StrongPHIEliminationID);
- AU.addRequiredTransitive<RegisterCoalescer>();
+ AU.addRequiredTransitiveID(RegisterCoalescerPassID);
AU.addRequired<CalculateSpillWeights>();
AU.addRequired<LiveStacks>();
AU.addPreserved<LiveStacks>();
@@ -439,6 +438,7 @@ void RegAllocBase::addMBBLiveIns(MachineFunction *MF) {
LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg];
if (LiveUnion.empty())
continue;
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " live-in:");
MachineFunction::iterator MBB = llvm::next(MF->begin());
MachineFunction::iterator MFE = MF->end();
SlotIndex Start, Stop;
@@ -449,6 +449,8 @@ void RegAllocBase::addMBBLiveIns(MachineFunction *MF) {
if (SI.start() <= Start) {
if (!MBB->isLiveIn(PhysReg))
MBB->addLiveIn(PhysReg);
+ DEBUG(dbgs() << "\tBB#" << MBB->getNumber() << ':'
+ << PrintReg(SI.value()->reg, TRI));
} else if (SI.start() > Stop)
MBB = Indexes->getMBBFromIndex(SI.start().getPrevIndex());
if (++MBB == MFE)
@@ -456,6 +458,7 @@ void RegAllocBase::addMBBLiveIns(MachineFunction *MF) {
tie(Start, Stop) = Indexes->getMBBRange(MBB);
SI.advanceTo(Start);
}
+ DEBUG(dbgs() << '\n');
}
}
@@ -495,8 +498,9 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
// Found an available register.
return PhysReg;
}
+ Queries[interfReg].collectInterferingVRegs(1);
LiveInterval *interferingVirtReg =
- Queries[interfReg].firstInterference().liveUnionPos().value();
+ Queries[interfReg].interferingVRegs().front();
// The current VirtReg must either be spillable, or one of its interferences
// must have less spill weight.
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index e235e87..f54a2c8 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -22,7 +22,6 @@
#include "SpillPlacement.h"
#include "SplitKit.h"
#include "VirtRegMap.h"
-#include "RegisterCoalescer.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Function.h"
@@ -38,6 +37,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -51,6 +51,15 @@ STATISTIC(NumGlobalSplits, "Number of split global live ranges");
STATISTIC(NumLocalSplits, "Number of split local live ranges");
STATISTIC(NumEvicted, "Number of interferences evicted");
+static cl::opt<SplitEditor::ComplementSpillMode>
+SplitSpillMode("split-spill-mode", cl::Hidden,
+ cl::desc("Spill mode for splitting live ranges"),
+ cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
+ clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
+ clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"),
+ clEnumValEnd),
+ cl::init(SplitEditor::SM_Partition));
+
static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
createGreedyRegisterAllocator);
@@ -90,12 +99,26 @@ class RAGreedy : public MachineFunctionPass,
// range splitting algorithm terminates, something that is otherwise hard to
// ensure.
enum LiveRangeStage {
- RS_New, ///< Never seen before.
- RS_First, ///< First time in the queue.
- RS_Second, ///< Second time in the queue.
- RS_Global, ///< Produced by global splitting.
- RS_Local, ///< Produced by local splitting.
- RS_Spill ///< Produced by spilling.
+ /// Newly created live range that has never been queued.
+ RS_New,
+
+ /// Only attempt assignment and eviction. Then requeue as RS_Split.
+ RS_Assign,
+
+ /// Attempt live range splitting if assignment is impossible.
+ RS_Split,
+
+ /// Attempt more aggressive live range splitting that is guaranteed to make
+ /// progress. This is used for split products that may not be making
+ /// progress.
+ RS_Split2,
+
+ /// Live range will be spilled. No more splitting will be attempted.
+ RS_Spill,
+
+ /// There is nothing more we can do to this live range. Abort compilation
+ /// if it can't be assigned.
+ RS_Done
};
static const char *const StageName[];
@@ -157,17 +180,38 @@ class RAGreedy : public MachineFunctionPass,
/// Global live range splitting candidate info.
struct GlobalSplitCandidate {
+ // Register intended for assignment, or 0.
unsigned PhysReg;
+
+ // SplitKit interval index for this candidate.
+ unsigned IntvIdx;
+
+ // Interference for PhysReg.
InterferenceCache::Cursor Intf;
+
+ // Bundles where this candidate should be live.
BitVector LiveBundles;
SmallVector<unsigned, 8> ActiveBlocks;
void reset(InterferenceCache &Cache, unsigned Reg) {
PhysReg = Reg;
+ IntvIdx = 0;
Intf.setPhysReg(Cache, Reg);
LiveBundles.clear();
ActiveBlocks.clear();
}
+
+ // Set B[i] = C for every live bundle where B[i] was NoCand.
+ unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) {
+ unsigned Count = 0;
+ for (int i = LiveBundles.find_first(); i >= 0;
+ i = LiveBundles.find_next(i))
+ if (B[i] == NoCand) {
+ B[i] = C;
+ Count++;
+ }
+ return Count;
+ }
};
/// Candidate info for for each PhysReg in AllocationOrder.
@@ -175,6 +219,12 @@ class RAGreedy : public MachineFunctionPass,
/// class.
SmallVector<GlobalSplitCandidate, 32> GlobalCand;
+ enum { NoCand = ~0u };
+
+ /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to
+ /// NoCand which indicates the stack interval.
+ SmallVector<unsigned, 32> BundleCand;
+
public:
RAGreedy();
@@ -208,8 +258,8 @@ private:
void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
void growRegion(GlobalSplitCandidate &Cand);
float calcGlobalSplitCost(GlobalSplitCandidate&);
- void splitAroundRegion(LiveInterval&, GlobalSplitCandidate&,
- SmallVectorImpl<LiveInterval*>&);
+ bool calcCompactRegion(GlobalSplitCandidate&);
+ void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>);
void calcGapWeights(unsigned, SmallVectorImpl<float>&);
bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool);
bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&);
@@ -222,6 +272,8 @@ private:
SmallVectorImpl<LiveInterval*>&, unsigned = ~0u);
unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,
SmallVectorImpl<LiveInterval*>&);
+ unsigned tryBlockSplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
unsigned tryLocalSplit(LiveInterval&, AllocationOrder&,
SmallVectorImpl<LiveInterval*>&);
unsigned trySplit(LiveInterval&, AllocationOrder&,
@@ -233,12 +285,12 @@ char RAGreedy::ID = 0;
#ifndef NDEBUG
const char *const RAGreedy::StageName[] = {
- "RS_New",
- "RS_First",
- "RS_Second",
- "RS_Global",
- "RS_Local",
- "RS_Spill"
+ "RS_New",
+ "RS_Assign",
+ "RS_Split",
+ "RS_Split2",
+ "RS_Spill",
+ "RS_Done"
};
#endif
@@ -278,7 +330,7 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<LiveDebugVariables>();
if (StrongPHIElim)
AU.addRequiredID(StrongPHIEliminationID);
- AU.addRequiredTransitive<RegisterCoalescer>();
+ AU.addRequiredTransitiveID(RegisterCoalescerPassID);
AU.addRequired<CalculateSpillWeights>();
AU.addRequired<LiveStacks>();
AU.addPreserved<LiveStacks>();
@@ -325,9 +377,15 @@ void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) {
}
void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) {
+ // Cloning a register we haven't even heard about yet? Just ignore it.
+ if (!ExtraRegInfo.inBounds(Old))
+ return;
+
// LRE may clone a virtual register because dead code elimination causes it to
- // be split into connected components. Ensure that the new register gets the
+ // be split into connected components. The new components are much smaller
+ // than the original, so they should get a new chance at being assigned.
// same stage as the parent.
+ ExtraRegInfo[Old].Stage = RS_Assign;
ExtraRegInfo.grow(New);
ExtraRegInfo[New] = ExtraRegInfo[Old];
}
@@ -350,16 +408,15 @@ void RAGreedy::enqueue(LiveInterval *LI) {
ExtraRegInfo.grow(Reg);
if (ExtraRegInfo[Reg].Stage == RS_New)
- ExtraRegInfo[Reg].Stage = RS_First;
+ ExtraRegInfo[Reg].Stage = RS_Assign;
- if (ExtraRegInfo[Reg].Stage == RS_Second)
+ if (ExtraRegInfo[Reg].Stage == RS_Split) {
// Unsplit ranges that couldn't be allocated immediately are deferred until
- // everything else has been allocated. Long ranges are allocated last so
- // they are split against realistic interference.
- Prio = (1u << 31) - Size;
- else {
- // Everything else is allocated in long->short order. Long ranges that don't
- // fit should be spilled ASAP so they don't create interference.
+ // everything else has been allocated.
+ Prio = Size;
+ } else {
+ // Everything is allocated in long->short order. Long ranges that don't fit
+ // should be spilled (or split) ASAP so they don't create interference.
Prio = (1u << 31) + Size;
// Boost ranges that have a physical register hint.
@@ -442,7 +499,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
/// @param BreaksHint True when B is already assigned to its preferred register.
bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
LiveInterval &B, bool BreaksHint) {
- bool CanSplit = getStage(B) <= RS_Second;
+ bool CanSplit = getStage(B) < RS_Spill;
// Be fairly aggressive about following hints as long as the evictee can be
// split.
@@ -487,7 +544,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
if (TargetRegisterInfo::isPhysicalRegister(Intf->reg))
return false;
// Never evict spill products. They cannot split or spill.
- if (getStage(*Intf) == RS_Spill)
+ if (getStage(*Intf) == RS_Done)
return false;
// Once a live range becomes small enough, it is urgent that we find a
// register for it. This is indicated by an infinite spill weight. These
@@ -627,6 +684,7 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
Intf.moveToBlock(BC.Number);
BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
+ BC.ChangesValue = BI.FirstDef;
if (!Intf.hasInterference())
continue;
@@ -638,9 +696,9 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
if (BI.LiveIn) {
if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number))
BC.Entry = SpillPlacement::MustSpill, ++Ins;
- else if (Intf.first() < BI.FirstUse)
+ else if (Intf.first() < BI.FirstInstr)
BC.Entry = SpillPlacement::PrefSpill, ++Ins;
- else if (Intf.first() < BI.LastUse)
+ else if (Intf.first() < BI.LastInstr)
++Ins;
}
@@ -648,9 +706,9 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
if (BI.LiveOut) {
if (Intf.last() >= SA->getLastSplitPoint(BC.Number))
BC.Exit = SpillPlacement::MustSpill, ++Ins;
- else if (Intf.last() > BI.LastUse)
+ else if (Intf.last() > BI.LastInstr)
BC.Exit = SpillPlacement::PrefSpill, ++Ins;
- else if (Intf.last() > BI.FirstUse)
+ else if (Intf.last() > BI.FirstInstr)
++Ins;
}
@@ -684,7 +742,7 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
assert(T < GroupSize && "Array overflow");
TBS[T] = Number;
if (++T == GroupSize) {
- SpillPlacer->addLinks(ArrayRef<unsigned>(TBS, T));
+ SpillPlacer->addLinks(makeArrayRef(TBS, T));
T = 0;
}
continue;
@@ -714,7 +772,7 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B);
SpillPlacer->addConstraints(Array);
- SpillPlacer->addLinks(ArrayRef<unsigned>(TBS, T));
+ SpillPlacer->addLinks(makeArrayRef(TBS, T));
}
void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
@@ -749,8 +807,16 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
// Any new blocks to add?
if (ActiveBlocks.size() == AddedTo)
break;
- addThroughConstraints(Cand.Intf,
- ArrayRef<unsigned>(ActiveBlocks).slice(AddedTo));
+
+ // Compute through constraints from the interference, or assume that all
+ // through blocks prefer spilling when forming compact regions.
+ ArrayRef<unsigned> NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo);
+ if (Cand.PhysReg)
+ addThroughConstraints(Cand.Intf, NewBlocks);
+ else
+ // Provide a strong negative bias on through blocks to prevent unwanted
+ // liveness on loop backedges.
+ SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
AddedTo = ActiveBlocks.size();
// Perhaps iterating can enable more bundles?
@@ -759,11 +825,55 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
DEBUG(dbgs() << ", v=" << Visited);
}
+/// calcCompactRegion - Compute the set of edge bundles that should be live
+/// when splitting the current live range into compact regions. Compact
+/// regions can be computed without looking at interference. They are the
+/// regions formed by removing all the live-through blocks from the live range.
+///
+/// Returns false if the current live range is already compact, or if the
+/// compact regions would form single block regions anyway.
+bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
+ // Without any through blocks, the live range is already compact.
+ if (!SA->getNumThroughBlocks())
+ return false;
+
+ // Compact regions don't correspond to any physreg.
+ Cand.reset(IntfCache, 0);
+
+ DEBUG(dbgs() << "Compact region bundles");
+
+ // Use the spill placer to determine the live bundles. GrowRegion pretends
+ // that all the through blocks have interference when PhysReg is unset.
+ SpillPlacer->prepare(Cand.LiveBundles);
+
+ // The static split cost will be zero since Cand.Intf reports no interference.
+ float Cost;
+ if (!addSplitConstraints(Cand.Intf, Cost)) {
+ DEBUG(dbgs() << ", none.\n");
+ return false;
+ }
+
+ growRegion(Cand);
+ SpillPlacer->finish();
+
+ if (!Cand.LiveBundles.any()) {
+ DEBUG(dbgs() << ", none.\n");
+ return false;
+ }
+
+ DEBUG({
+ for (int i = Cand.LiveBundles.find_first(); i>=0;
+ i = Cand.LiveBundles.find_next(i))
+ dbgs() << " EB#" << i;
+ dbgs() << ".\n";
+ });
+ return true;
+}
+
/// calcSpillCost - Compute how expensive it would be to split the live range in
/// SA around all use blocks instead of forming bundle regions.
float RAGreedy::calcSpillCost() {
float Cost = 0;
- const LiveInterval &LI = SA->getParent();
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
for (unsigned i = 0; i != UseBlocks.size(); ++i) {
const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
@@ -772,16 +882,8 @@ float RAGreedy::calcSpillCost() {
Cost += SpillPlacer->getBlockFrequency(Number);
// Unless the value is redefined in the block.
- if (BI.LiveIn && BI.LiveOut) {
- SlotIndex Start, Stop;
- tie(Start, Stop) = Indexes->getMBBRange(Number);
- LiveInterval::const_iterator I = LI.find(Start);
- assert(I != LI.end() && "Expected live-in value");
- // Is there a different live-out value? If so, we need an extra spill
- // instruction.
- if (I->end < Stop)
- Cost += SpillPlacer->getBlockFrequency(Number);
- }
+ if (BI.LiveIn && BI.LiveOut && BI.FirstDef)
+ Cost += SpillPlacer->getBlockFrequency(Number);
}
return Cost;
}
@@ -828,81 +930,115 @@ float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) {
return GlobalCost;
}
-/// splitAroundRegion - Split VirtReg around the region determined by
-/// LiveBundles. Make an effort to avoid interference from PhysReg.
+/// splitAroundRegion - Split the current live range around the regions
+/// determined by BundleCand and GlobalCand.
///
-/// The 'register' interval is going to contain as many uses as possible while
-/// avoiding interference. The 'stack' interval is the complement constructed by
-/// SplitEditor. It will contain the rest.
+/// Before calling this function, GlobalCand and BundleCand must be initialized
+/// so each bundle is assigned to a valid candidate, or NoCand for the
+/// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor
+/// objects must be initialized for the current live range, and intervals
+/// created for the used candidates.
///
-void RAGreedy::splitAroundRegion(LiveInterval &VirtReg,
- GlobalSplitCandidate &Cand,
- SmallVectorImpl<LiveInterval*> &NewVRegs) {
- const BitVector &LiveBundles = Cand.LiveBundles;
-
- DEBUG({
- dbgs() << "Splitting around region for " << PrintReg(Cand.PhysReg, TRI)
- << " with bundles";
- for (int i = LiveBundles.find_first(); i>=0; i = LiveBundles.find_next(i))
- dbgs() << " EB#" << i;
- dbgs() << ".\n";
- });
-
- InterferenceCache::Cursor &Intf = Cand.Intf;
- LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
- SE->reset(LREdit);
-
- // Create the main cross-block interval.
- const unsigned MainIntv = SE->openIntv();
+/// @param LREdit The LiveRangeEdit object handling the current split.
+/// @param UsedCands List of used GlobalCand entries. Every BundleCand value
+/// must appear in this list.
+void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
+ ArrayRef<unsigned> UsedCands) {
+ // These are the intervals created for new global ranges. We may create more
+ // intervals for local ranges.
+ const unsigned NumGlobalIntvs = LREdit.size();
+ DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs << " globals.\n");
+ assert(NumGlobalIntvs && "No global intervals configured");
+
+ // Isolate even single instructions when dealing with a proper sub-class.
+ // That guarantees register class inflation for the stack interval because it
+ // is all copies.
+ unsigned Reg = SA->getParent().reg;
+ bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
// First handle all the blocks with uses.
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
for (unsigned i = 0; i != UseBlocks.size(); ++i) {
const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
- bool RegIn = BI.LiveIn &&
- LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)];
- bool RegOut = BI.LiveOut &&
- LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)];
+ unsigned Number = BI.MBB->getNumber();
+ unsigned IntvIn = 0, IntvOut = 0;
+ SlotIndex IntfIn, IntfOut;
+ if (BI.LiveIn) {
+ unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)];
+ if (CandIn != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[CandIn];
+ IntvIn = Cand.IntvIdx;
+ Cand.Intf.moveToBlock(Number);
+ IntfIn = Cand.Intf.first();
+ }
+ }
+ if (BI.LiveOut) {
+ unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)];
+ if (CandOut != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[CandOut];
+ IntvOut = Cand.IntvIdx;
+ Cand.Intf.moveToBlock(Number);
+ IntfOut = Cand.Intf.last();
+ }
+ }
// Create separate intervals for isolated blocks with multiple uses.
- if (!RegIn && !RegOut) {
+ if (!IntvIn && !IntvOut) {
DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n");
- if (!BI.isOneInstr()) {
+ if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
SE->splitSingleBlock(BI);
- SE->selectIntv(MainIntv);
- }
continue;
}
- Intf.moveToBlock(BI.MBB->getNumber());
-
- if (RegIn && RegOut)
- SE->splitLiveThroughBlock(BI.MBB->getNumber(),
- MainIntv, Intf.first(),
- MainIntv, Intf.last());
- else if (RegIn)
- SE->splitRegInBlock(BI, MainIntv, Intf.first());
+ if (IntvIn && IntvOut)
+ SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
+ else if (IntvIn)
+ SE->splitRegInBlock(BI, IntvIn, IntfIn);
else
- SE->splitRegOutBlock(BI, MainIntv, Intf.last());
+ SE->splitRegOutBlock(BI, IntvOut, IntfOut);
}
- // Handle live-through blocks.
- for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) {
- unsigned Number = Cand.ActiveBlocks[i];
- bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)];
- bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)];
- if (!RegIn && !RegOut)
- continue;
- Intf.moveToBlock(Number);
- SE->splitLiveThroughBlock(Number, RegIn ? MainIntv : 0, Intf.first(),
- RegOut ? MainIntv : 0, Intf.last());
+ // Handle live-through blocks. The relevant live-through blocks are stored in
+ // the ActiveBlocks list with each candidate. We need to filter out
+ // duplicates.
+ BitVector Todo = SA->getThroughBlocks();
+ for (unsigned c = 0; c != UsedCands.size(); ++c) {
+ ArrayRef<unsigned> Blocks = GlobalCand[UsedCands[c]].ActiveBlocks;
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ unsigned Number = Blocks[i];
+ if (!Todo.test(Number))
+ continue;
+ Todo.reset(Number);
+
+ unsigned IntvIn = 0, IntvOut = 0;
+ SlotIndex IntfIn, IntfOut;
+
+ unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)];
+ if (CandIn != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[CandIn];
+ IntvIn = Cand.IntvIdx;
+ Cand.Intf.moveToBlock(Number);
+ IntfIn = Cand.Intf.first();
+ }
+
+ unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)];
+ if (CandOut != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[CandOut];
+ IntvOut = Cand.IntvIdx;
+ Cand.Intf.moveToBlock(Number);
+ IntfOut = Cand.Intf.last();
+ }
+ if (!IntvIn && !IntvOut)
+ continue;
+ SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
+ }
}
++NumGlobalSplits;
SmallVector<unsigned, 8> IntvMap;
SE->finish(&IntvMap);
- DebugVars->splitRegister(VirtReg.reg, LREdit.regs());
+ DebugVars->splitRegister(Reg, LREdit.regs());
ExtraRegInfo.resize(MRI->getNumVirtRegs());
unsigned OrigBlocks = SA->getNumLiveBlocks();
@@ -922,18 +1058,18 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg,
// Remainder interval. Don't try splitting again, spill if it doesn't
// allocate.
if (IntvMap[i] == 0) {
- setStage(Reg, RS_Global);
+ setStage(Reg, RS_Spill);
continue;
}
- // Main interval. Allow repeated splitting as long as the number of live
+ // Global intervals. Allow repeated splitting as long as the number of live
// blocks is strictly decreasing.
- if (IntvMap[i] == MainIntv) {
+ if (IntvMap[i] < NumGlobalIntvs) {
if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
<< " blocks as original.\n");
// Don't allow repeated splitting as a safe guard against looping.
- setStage(Reg, RS_Global);
+ setStage(Reg, RS_Split2);
}
continue;
}
@@ -948,11 +1084,23 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg,
unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SmallVectorImpl<LiveInterval*> &NewVRegs) {
- float BestCost = Hysteresis * calcSpillCost();
- DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n');
- const unsigned NoCand = ~0u;
- unsigned BestCand = NoCand;
unsigned NumCands = 0;
+ unsigned BestCand = NoCand;
+ float BestCost;
+ SmallVector<unsigned, 8> UsedCands;
+
+ // Check if we can split this live range around a compact region.
+ bool HasCompact = calcCompactRegion(GlobalCand.front());
+ if (HasCompact) {
+ // Yes, keep GlobalCand[0] as the compact region candidate.
+ NumCands = 1;
+ BestCost = HUGE_VALF;
+ } else {
+ // No benefit from the compact region, our fallback will be per-block
+ // splitting. Make sure we find a solution that is cheaper than spilling.
+ BestCost = Hysteresis * calcSpillCost();
+ DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n');
+ }
Order.rewind();
while (unsigned PhysReg = Order.next()) {
@@ -962,7 +1110,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
unsigned WorstCount = ~0u;
unsigned Worst = 0;
for (unsigned i = 0; i != NumCands; ++i) {
- if (i == BestCand)
+ if (i == BestCand || !GlobalCand[i].PhysReg)
continue;
unsigned Count = GlobalCand[i].LiveBundles.count();
if (Count < WorstCount)
@@ -1019,15 +1167,94 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
++NumCands;
}
- if (BestCand == NoCand)
+ // No solutions found, fall back to single block splitting.
+ if (!HasCompact && BestCand == NoCand)
return 0;
- splitAroundRegion(VirtReg, GlobalCand[BestCand], NewVRegs);
+ // Prepare split editor.
+ LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
+ SE->reset(LREdit, SplitSpillMode);
+
+ // Assign all edge bundles to the preferred candidate, or NoCand.
+ BundleCand.assign(Bundles->getNumBundles(), NoCand);
+
+ // Assign bundles for the best candidate region.
+ if (BestCand != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[BestCand];
+ if (unsigned B = Cand.getBundles(BundleCand, BestCand)) {
+ UsedCands.push_back(BestCand);
+ Cand.IntvIdx = SE->openIntv();
+ DEBUG(dbgs() << "Split for " << PrintReg(Cand.PhysReg, TRI) << " in "
+ << B << " bundles, intv " << Cand.IntvIdx << ".\n");
+ (void)B;
+ }
+ }
+
+ // Assign bundles for the compact region.
+ if (HasCompact) {
+ GlobalSplitCandidate &Cand = GlobalCand.front();
+ assert(!Cand.PhysReg && "Compact region has no physreg");
+ if (unsigned B = Cand.getBundles(BundleCand, 0)) {
+ UsedCands.push_back(0);
+ Cand.IntvIdx = SE->openIntv();
+ DEBUG(dbgs() << "Split for compact region in " << B << " bundles, intv "
+ << Cand.IntvIdx << ".\n");
+ (void)B;
+ }
+ }
+
+ splitAroundRegion(LREdit, UsedCands);
return 0;
}
//===----------------------------------------------------------------------===//
+// Per-Block Splitting
+//===----------------------------------------------------------------------===//
+
+/// tryBlockSplit - Split a global live range around every block with uses. This
+/// creates a lot of local live ranges, that will be split by tryLocalSplit if
+/// they don't allocate.
+unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
+ unsigned Reg = VirtReg.reg;
+ bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
+ LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
+ SE->reset(LREdit, SplitSpillMode);
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+ for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
+ SE->splitSingleBlock(BI);
+ }
+ // No blocks were split.
+ if (LREdit.empty())
+ return 0;
+
+ // We did split for some blocks.
+ SmallVector<unsigned, 8> IntvMap;
+ SE->finish(&IntvMap);
+
+ // Tell LiveDebugVariables about the new ranges.
+ DebugVars->splitRegister(Reg, LREdit.regs());
+
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+
+ // Sort out the new intervals created by splitting. The remainder interval
+ // goes straight to spilling, the new local ranges get to stay RS_New.
+ for (unsigned i = 0, e = LREdit.size(); i != e; ++i) {
+ LiveInterval &LI = *LREdit.get(i);
+ if (getStage(LI) == RS_New && IntvMap[i] == 0)
+ setStage(LI, RS_Spill);
+ }
+
+ if (VerifyEnabled)
+ MF->verify(this, "After splitting live range around basic blocks");
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
// Local Splitting
//===----------------------------------------------------------------------===//
@@ -1045,8 +1272,10 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
const unsigned NumGaps = Uses.size()-1;
// Start and end points for the interference check.
- SlotIndex StartIdx = BI.LiveIn ? BI.FirstUse.getBaseIndex() : BI.FirstUse;
- SlotIndex StopIdx = BI.LiveOut ? BI.LastUse.getBoundaryIndex() : BI.LastUse;
+ SlotIndex StartIdx =
+ BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr;
+ SlotIndex StopIdx =
+ BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr;
GapWeight.assign(NumGaps, 0.0f);
@@ -1056,8 +1285,8 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
.checkInterference())
continue;
- // We know that VirtReg is a continuous interval from FirstUse to LastUse,
- // so we don't need InterferenceQuery.
+ // We know that VirtReg is a continuous interval from FirstInstr to
+ // LastInstr, so we don't need InterferenceQuery.
//
// Interference that overlaps an instruction is counted in both gaps
// surrounding the instruction. The exception is interference before
@@ -1097,8 +1326,8 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// while only covering a single block - A phi-def can use undef values from
// predecessors, and the block could be a single-block loop.
// We don't bother doing anything clever about such a case, we simply assume
- // that the interval is continuous from FirstUse to LastUse. We should make
- // sure that we don't do anything illegal to such an interval, though.
+ // that the interval is continuous from FirstInstr to LastInstr. We should
+ // make sure that we don't do anything illegal to such an interval, though.
const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
if (Uses.size() <= 2)
@@ -1120,17 +1349,17 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
//
// Instead we use these rules:
//
- // 1. Allow any split for ranges with getStage() < RS_Local. (Except for the
+ // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the
// noop split, of course).
- // 2. Require progress be made for ranges with getStage() >= RS_Local. All
+ // 2. Require progress be made for ranges with getStage() == RS_Split2. All
// the new ranges must have fewer instructions than before the split.
- // 3. New ranges with the same number of instructions are marked RS_Local,
+ // 3. New ranges with the same number of instructions are marked RS_Split2,
// smaller ranges are marked RS_New.
//
// These rules allow a 3 -> 2+3 split once, which we need. They also prevent
// excessive splitting and infinite loops.
//
- bool ProgressRequired = getStage(VirtReg) >= RS_Local;
+ bool ProgressRequired = getStage(VirtReg) >= RS_Split2;
// Best split candidate.
unsigned BestBefore = NumGaps;
@@ -1249,7 +1478,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
DebugVars->splitRegister(VirtReg.reg, LREdit.regs());
// If the new range has the same number of instructions as before, mark it as
- // RS_Local so the next split will be forced to make progress. Otherwise,
+ // RS_Split2 so the next split will be forced to make progress. Otherwise,
// leave the new intervals as RS_New so they can compete.
bool LiveBefore = BestBefore != 0 || BI.LiveIn;
bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
@@ -1259,7 +1488,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
assert(!ProgressRequired && "Didn't make progress when it was required.");
for (unsigned i = 0, e = IntvMap.size(); i != e; ++i)
if (IntvMap[i] == 1) {
- setStage(*LREdit.get(i), RS_Local);
+ setStage(*LREdit.get(i), RS_Split2);
DEBUG(dbgs() << PrintReg(LREdit.get(i)->reg));
}
DEBUG(dbgs() << '\n');
@@ -1278,6 +1507,10 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
/// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
SmallVectorImpl<LiveInterval*>&NewVRegs) {
+ // Ranges must be Split2 or less.
+ if (getStage(VirtReg) >= RS_Spill)
+ return 0;
+
// Local intervals are handled separately.
if (LIS->intervalIsInOneMBB(VirtReg)) {
NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled);
@@ -1287,11 +1520,6 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled);
- // Don't iterate global splitting.
- // Move straight to spilling if this range was produced by a global split.
- if (getStage(VirtReg) >= RS_Global)
- return 0;
-
SA->analyze(&VirtReg);
// FIXME: SplitAnalysis may repair broken live ranges coming from the
@@ -1305,24 +1533,17 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
return PhysReg;
}
- // First try to split around a region spanning multiple blocks.
- unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
- if (PhysReg || !NewVRegs.empty())
- return PhysReg;
-
- // Then isolate blocks with multiple uses.
- SplitAnalysis::BlockPtrSet Blocks;
- if (SA->getMultiUseBlocks(Blocks)) {
- LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
- SE->reset(LREdit);
- SE->splitSingleBlocks(Blocks);
- setStage(NewVRegs.begin(), NewVRegs.end(), RS_Global);
- if (VerifyEnabled)
- MF->verify(this, "After splitting live range around basic blocks");
+ // First try to split around a region spanning multiple blocks. RS_Split2
+ // ranges already made dubious progress with region splitting, so they go
+ // straight to single block splitting.
+ if (getStage(VirtReg) < RS_Split2) {
+ unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
+ if (PhysReg || !NewVRegs.empty())
+ return PhysReg;
}
- // Don't assign any physregs.
- return 0;
+ // Then isolate blocks.
+ return tryBlockSplit(VirtReg, Order, NewVRegs);
}
@@ -1342,9 +1563,9 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
<< " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n');
// Try to evict a less worthy live range, but only for ranges from the primary
- // queue. The RS_Second ranges already failed to do this, and they should not
+ // queue. The RS_Split ranges already failed to do this, and they should not
// get a second chance until they have been split.
- if (Stage != RS_Second)
+ if (Stage != RS_Split)
if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs))
return PhysReg;
@@ -1353,8 +1574,8 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
// The first time we see a live range, don't try to split or spill.
// Wait until the second time, when all smaller ranges have been allocated.
// This gives a better picture of the interference to split around.
- if (Stage == RS_First) {
- setStage(VirtReg, RS_Second);
+ if (Stage < RS_Split) {
+ setStage(VirtReg, RS_Split);
DEBUG(dbgs() << "wait for second round\n");
NewVRegs.push_back(&VirtReg);
return 0;
@@ -1362,7 +1583,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
// If we couldn't allocate a register from spilling, there is probably some
// invalid inline assembly. The base class wil report it.
- if (Stage >= RS_Spill || !VirtReg.isSpillable())
+ if (Stage >= RS_Done || !VirtReg.isSpillable())
return ~0u;
// Try splitting VirtReg or interferences.
@@ -1374,7 +1595,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
LiveRangeEdit LRE(VirtReg, NewVRegs, this);
spiller().spill(LRE);
- setStage(NewVRegs.begin(), NewVRegs.end(), RS_Spill);
+ setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
if (VerifyEnabled)
MF->verify(this, "After spilling");
@@ -1408,6 +1629,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
ExtraRegInfo.resize(MRI->getNumVirtRegs());
NextCascade = 1;
IntfCache.init(MF, &PhysReg2LiveUnion[0], Indexes, TRI);
+ GlobalCand.resize(32); // This will grow as needed.
allocatePhysRegs();
addMBBLiveIns(MF);
@@ -1420,7 +1642,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
}
// Write out new DBG_VALUE instructions.
- DebugVars->emitDebugValues(VRM);
+ {
+ NamedRegionTimer T("Emit Debug Info", TimerGroupName, TimePassesIsEnabled);
+ DebugVars->emitDebugValues(VRM);
+ }
// The pass output is in VirtRegMap. Release all the transient data.
releaseMemory();
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 0dd3c598..ce3fb90 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -18,7 +18,6 @@
#include "VirtRegRewriter.h"
#include "RegisterClassInfo.h"
#include "Spiller.h"
-#include "RegisterCoalescer.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
@@ -209,7 +208,7 @@ namespace {
AU.addRequiredID(StrongPHIEliminationID);
// Make sure PassManager knows which analyses to make available
// to coalescing and which analyses coalescing invalidates.
- AU.addRequiredTransitive<RegisterCoalescer>();
+ AU.addRequiredTransitiveID(RegisterCoalescerPassID);
AU.addRequired<CalculateSpillWeights>();
AU.addRequiredID(LiveStacksID);
AU.addPreservedID(LiveStacksID);
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 72230d4..0d2cf2d 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -450,7 +450,7 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
au.addPreserved<SlotIndexes>();
au.addRequired<LiveIntervals>();
//au.addRequiredID(SplitCriticalEdgesID);
- au.addRequired<RegisterCoalescer>();
+ au.addRequiredID(RegisterCoalescerPassID);
if (customPassID)
au.addRequiredID(*customPassID);
au.addRequired<CalculateSpillWeights>();
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index 5a77e47..786d279 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -99,11 +99,16 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
// CSR aliases go after the volatile registers, preserve the target's order.
std::copy(CSRAlias.begin(), CSRAlias.end(), &RCI.Order[N]);
+ // Check if RC is a proper sub-class.
+ if (const TargetRegisterClass *Super = TRI->getLargestLegalSuperClass(RC))
+ if (Super != RC && getNumAllocatableRegs(Super) > RCI.NumRegs)
+ RCI.ProperSubClass = true;
+
DEBUG({
dbgs() << "AllocationOrder(" << RC->getName() << ") = [";
for (unsigned I = 0; I != RCI.NumRegs; ++I)
dbgs() << ' ' << PrintReg(RCI.Order[I], TRI);
- dbgs() << " ]\n";
+ dbgs() << (RCI.ProperSubClass ? " ] (sub-class)\n" : " ]\n");
});
// RCI is now up-to-date.
diff --git a/lib/CodeGen/RegisterClassInfo.h b/lib/CodeGen/RegisterClassInfo.h
index d21fd67..2c14070 100644
--- a/lib/CodeGen/RegisterClassInfo.h
+++ b/lib/CodeGen/RegisterClassInfo.h
@@ -28,11 +28,12 @@ class RegisterClassInfo {
struct RCInfo {
unsigned Tag;
unsigned NumRegs;
+ bool ProperSubClass;
OwningArrayPtr<unsigned> Order;
- RCInfo() : Tag(0), NumRegs(0) {}
+ RCInfo() : Tag(0), NumRegs(0), ProperSubClass(false) {}
operator ArrayRef<unsigned>() const {
- return ArrayRef<unsigned>(Order.get(), NumRegs);
+ return makeArrayRef(Order.get(), NumRegs);
}
};
@@ -87,6 +88,16 @@ public:
return get(RC);
}
+ /// isProperSubClass - Returns true if RC has a legal super-class with more
+ /// allocatable registers.
+ ///
+ /// Register classes like GR32_NOSP are not proper sub-classes because %esp
+ /// is not allocatable. Similarly, tGPR is not a proper sub-class in Thumb
+ /// mode because the GPR super-class is not legal.
+ bool isProperSubClass(const TargetRegisterClass *RC) const {
+ return get(RC).ProperSubClass;
+ }
+
/// getLastCalleeSavedAlias - Returns the last callee saved register that
/// overlaps PhysReg, or 0 if Reg doesn't overlap a CSR.
unsigned getLastCalleeSavedAlias(unsigned PhysReg) const {
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index b91f92c..9b414d6 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -15,8 +15,9 @@
#define DEBUG_TYPE "regcoalescing"
#include "RegisterCoalescer.h"
-#include "VirtRegMap.h"
#include "LiveDebugVariables.h"
+#include "RegisterClassInfo.h"
+#include "VirtRegMap.h"
#include "llvm/Pass.h"
#include "llvm/Value.h"
@@ -54,6 +55,7 @@ STATISTIC(numExtends , "Number of copies extended");
STATISTIC(NumReMats , "Number of instructions re-materialized");
STATISTIC(numPeep , "Number of identity moves eliminated after coalescing");
STATISTIC(numAborts , "Number of times interval joining aborted");
+STATISTIC(NumInflated , "Number of register classes inflated");
static cl::opt<bool>
EnableJoining("join-liveintervals",
@@ -75,6 +77,128 @@ VerifyCoalescing("verify-coalescing",
cl::desc("Verify machine instrs before and after register coalescing"),
cl::Hidden);
+namespace {
+ class RegisterCoalescer : public MachineFunctionPass {
+ MachineFunction* MF;
+ MachineRegisterInfo* MRI;
+ const TargetMachine* TM;
+ const TargetRegisterInfo* TRI;
+ const TargetInstrInfo* TII;
+ LiveIntervals *LIS;
+ LiveDebugVariables *LDV;
+ const MachineLoopInfo* Loops;
+ AliasAnalysis *AA;
+ RegisterClassInfo RegClassInfo;
+
+ /// JoinedCopies - Keep track of copies eliminated due to coalescing.
+ ///
+ SmallPtrSet<MachineInstr*, 32> JoinedCopies;
+
+ /// ReMatCopies - Keep track of copies eliminated due to remat.
+ ///
+ SmallPtrSet<MachineInstr*, 32> ReMatCopies;
+
+ /// ReMatDefs - Keep track of definition instructions which have
+ /// been remat'ed.
+ SmallPtrSet<MachineInstr*, 8> ReMatDefs;
+
+ /// joinIntervals - join compatible live intervals
+ void joinIntervals();
+
+ /// CopyCoalesceInMBB - Coalesce copies in the specified MBB, putting
+ /// copies that cannot yet be coalesced into the "TryAgain" list.
+ void CopyCoalesceInMBB(MachineBasicBlock *MBB,
+ std::vector<MachineInstr*> &TryAgain);
+
+ /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+ /// which are the src/dst of the copy instruction CopyMI. This returns
+ /// true if the copy was successfully coalesced away. If it is not
+ /// currently possible to coalesce this interval, but it may be possible if
+ /// other things get coalesced, then it returns true by reference in
+ /// 'Again'.
+ bool JoinCopy(MachineInstr *TheCopy, bool &Again);
+
+ /// JoinIntervals - Attempt to join these two intervals. On failure, this
+ /// returns false. The output "SrcInt" will not have been modified, so we
+ /// can use this information below to update aliases.
+ bool JoinIntervals(CoalescerPair &CP);
+
+ /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If
+ /// the source value number is defined by a copy from the destination reg
+ /// see if we can merge these two destination reg valno# into a single
+ /// value number, eliminating a copy.
+ bool AdjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
+
+ /// HasOtherReachingDefs - Return true if there are definitions of IntB
+ /// other than BValNo val# that can reach uses of AValno val# of IntA.
+ bool HasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
+ VNInfo *AValNo, VNInfo *BValNo);
+
+ /// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy.
+ /// If the source value number is defined by a commutable instruction and
+ /// its other operand is coalesced to the copy dest register, see if we
+ /// can transform the copy into a noop by commuting the definition.
+ bool RemoveCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
+
+ /// ReMaterializeTrivialDef - If the source of a copy is defined by a
+ /// trivial computation, replace the copy by rematerialize the definition.
+ /// If PreserveSrcInt is true, make sure SrcInt is valid after the call.
+ bool ReMaterializeTrivialDef(LiveInterval &SrcInt, bool PreserveSrcInt,
+ unsigned DstReg, MachineInstr *CopyMI);
+
+ /// shouldJoinPhys - Return true if a physreg copy should be joined.
+ bool shouldJoinPhys(CoalescerPair &CP);
+
+ /// isWinToJoinCrossClass - Return true if it's profitable to coalesce
+ /// two virtual registers from different register classes.
+ bool isWinToJoinCrossClass(unsigned SrcReg,
+ unsigned DstReg,
+ const TargetRegisterClass *SrcRC,
+ const TargetRegisterClass *DstRC,
+ const TargetRegisterClass *NewRC);
+
+ /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
+ /// update the subregister number if it is not zero. If DstReg is a
+ /// physical register and the existing subregister number of the def / use
+ /// being updated is not zero, make sure to set it to the correct physical
+ /// subregister.
+ void UpdateRegDefsUses(const CoalescerPair &CP);
+
+ /// RemoveDeadDef - If a def of a live interval is now determined dead,
+ /// remove the val# it defines. If the live interval becomes empty, remove
+ /// it as well.
+ bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI);
+
+ /// RemoveCopyFlag - If DstReg is no longer defined by CopyMI, clear the
+ /// VNInfo copy flag for DstReg and all aliases.
+ void RemoveCopyFlag(unsigned DstReg, const MachineInstr *CopyMI);
+
+ /// markAsJoined - Remember that CopyMI has already been joined.
+ void markAsJoined(MachineInstr *CopyMI);
+
+ /// eliminateUndefCopy - Handle copies of undef values.
+ bool eliminateUndefCopy(MachineInstr *CopyMI, const CoalescerPair &CP);
+
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ RegisterCoalescer() : MachineFunctionPass(ID) {
+ initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ virtual void releaseMemory();
+
+ /// runOnMachineFunction - pass entry point
+ virtual bool runOnMachineFunction(MachineFunction&);
+
+ /// print - Implement the dump method.
+ virtual void print(raw_ostream &O, const Module* = 0) const;
+ };
+} /// end anonymous namespace
+
+char &llvm::RegisterCoalescerPassID = RegisterCoalescer::ID;
+
INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
"Simple Register Coalescing", false, false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
@@ -116,14 +240,14 @@ static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
}
bool CoalescerPair::setRegisters(const MachineInstr *MI) {
- srcReg_ = dstReg_ = subIdx_ = 0;
- newRC_ = 0;
- flipped_ = crossClass_ = false;
+ SrcReg = DstReg = SubIdx = 0;
+ NewRC = 0;
+ Flipped = CrossClass = false;
unsigned Src, Dst, SrcSub, DstSub;
- if (!isMoveInstr(tri_, MI, Src, Dst, SrcSub, DstSub))
+ if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
return false;
- partial_ = SrcSub || DstSub;
+ Partial = SrcSub || DstSub;
// If one register is a physreg, it must be Dst.
if (TargetRegisterInfo::isPhysicalRegister(Src)) {
@@ -131,7 +255,7 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
return false;
std::swap(Src, Dst);
std::swap(SrcSub, DstSub);
- flipped_ = true;
+ Flipped = true;
}
const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
@@ -139,14 +263,14 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
if (TargetRegisterInfo::isPhysicalRegister(Dst)) {
// Eliminate DstSub on a physreg.
if (DstSub) {
- Dst = tri_.getSubReg(Dst, DstSub);
+ Dst = TRI.getSubReg(Dst, DstSub);
if (!Dst) return false;
DstSub = 0;
}
// Eliminate SrcSub by picking a corresponding Dst superregister.
if (SrcSub) {
- Dst = tri_.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src));
+ Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src));
if (!Dst) return false;
SrcSub = 0;
} else if (!MRI.getRegClass(Src)->contains(Dst)) {
@@ -164,7 +288,7 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
return false;
const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
- if (!getCommonSubClass(DstRC, SrcRC))
+ if (!TRI.getCommonSubClass(DstRC, SrcRC))
return false;
SrcSub = DstSub = 0;
}
@@ -174,36 +298,36 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
std::swap(Src, Dst);
DstSub = SrcSub;
SrcSub = 0;
- assert(!flipped_ && "Unexpected flip");
- flipped_ = true;
+ assert(!Flipped && "Unexpected flip");
+ Flipped = true;
}
// Find the new register class.
const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
if (DstSub)
- newRC_ = tri_.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
+ NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
else
- newRC_ = getCommonSubClass(DstRC, SrcRC);
- if (!newRC_)
+ NewRC = TRI.getCommonSubClass(DstRC, SrcRC);
+ if (!NewRC)
return false;
- crossClass_ = newRC_ != DstRC || newRC_ != SrcRC;
+ CrossClass = NewRC != DstRC || NewRC != SrcRC;
}
// Check our invariants
assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual");
assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) &&
"Cannot have a physical SubIdx");
- srcReg_ = Src;
- dstReg_ = Dst;
- subIdx_ = DstSub;
+ SrcReg = Src;
+ DstReg = Dst;
+ SubIdx = DstSub;
return true;
}
bool CoalescerPair::flip() {
- if (subIdx_ || TargetRegisterInfo::isPhysicalRegister(dstReg_))
+ if (SubIdx || TargetRegisterInfo::isPhysicalRegister(DstReg))
return false;
- std::swap(srcReg_, dstReg_);
- flipped_ = !flipped_;
+ std::swap(SrcReg, DstReg);
+ Flipped = !Flipped;
return true;
}
@@ -211,36 +335,36 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
if (!MI)
return false;
unsigned Src, Dst, SrcSub, DstSub;
- if (!isMoveInstr(tri_, MI, Src, Dst, SrcSub, DstSub))
+ if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
return false;
- // Find the virtual register that is srcReg_.
- if (Dst == srcReg_) {
+ // Find the virtual register that is SrcReg.
+ if (Dst == SrcReg) {
std::swap(Src, Dst);
std::swap(SrcSub, DstSub);
- } else if (Src != srcReg_) {
+ } else if (Src != SrcReg) {
return false;
}
- // Now check that Dst matches dstReg_.
- if (TargetRegisterInfo::isPhysicalRegister(dstReg_)) {
+ // Now check that Dst matches DstReg.
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
if (!TargetRegisterInfo::isPhysicalRegister(Dst))
return false;
- assert(!subIdx_ && "Inconsistent CoalescerPair state.");
+ assert(!SubIdx && "Inconsistent CoalescerPair state.");
// DstSub could be set for a physreg from INSERT_SUBREG.
if (DstSub)
- Dst = tri_.getSubReg(Dst, DstSub);
+ Dst = TRI.getSubReg(Dst, DstSub);
// Full copy of Src.
if (!SrcSub)
- return dstReg_ == Dst;
+ return DstReg == Dst;
// This is a partial register copy. Check that the parts match.
- return tri_.getSubReg(dstReg_, SrcSub) == Dst;
+ return TRI.getSubReg(DstReg, SrcSub) == Dst;
} else {
- // dstReg_ is virtual.
- if (dstReg_ != Dst)
+ // DstReg is virtual.
+ if (DstReg != Dst)
return false;
// Registers match, do the subregisters line up?
- return compose(tri_, subIdx_, SrcSub) == DstSub;
+ return compose(TRI, SubIdx, SrcSub) == DstSub;
}
}
@@ -292,14 +416,14 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
MachineInstr *CopyMI) {
// Bail if there is no dst interval - can happen when merging physical subreg
// operations.
- if (!li_->hasInterval(CP.getDstReg()))
+ if (!LIS->hasInterval(CP.getDstReg()))
return false;
LiveInterval &IntA =
- li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+ LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
LiveInterval &IntB =
- li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
- SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
+ LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+ SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getDefIndex();
// BValNo is a value number in B that is defined by a copy from A. 'B3' in
// the example above.
@@ -355,7 +479,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
// Make sure that the end of the live range is inside the same block as
// CopyMI.
MachineInstr *ValLREndInst =
- li_->getInstructionFromIndex(ValLR->end.getPrevSlot());
+ LIS->getInstructionFromIndex(ValLR->end.getPrevSlot());
if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent())
return false;
@@ -368,11 +492,11 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
// of its aliases is overlapping the live interval of the virtual register.
// If so, do not coalesce.
if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
- for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS)
- if (li_->hasInterval(*AS) && IntA.overlaps(li_->getInterval(*AS))) {
+ for (const unsigned *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS)
+ if (LIS->hasInterval(*AS) && IntA.overlaps(LIS->getInterval(*AS))) {
DEBUG({
dbgs() << "\t\tInterfere with alias ";
- li_->getInterval(*AS).print(dbgs(), tri_);
+ LIS->getInterval(*AS).print(dbgs(), TRI);
});
return false;
}
@@ -380,7 +504,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
DEBUG({
dbgs() << "Extending: ";
- IntB.print(dbgs(), tri_);
+ IntB.print(dbgs(), TRI);
});
SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start;
@@ -398,13 +522,13 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
// If the IntB live range is assigned to a physical register, and if that
// physreg has sub-registers, update their live intervals as well.
if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
- for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
- if (!li_->hasInterval(*SR))
+ for (const unsigned *SR = TRI->getSubRegisters(IntB.reg); *SR; ++SR) {
+ if (!LIS->hasInterval(*SR))
continue;
- LiveInterval &SRLI = li_->getInterval(*SR);
+ LiveInterval &SRLI = LIS->getInterval(*SR);
SRLI.addRange(LiveRange(FillerStart, FillerEnd,
SRLI.getNextValue(FillerStart, 0,
- li_->getVNInfoAllocator())));
+ LIS->getVNInfoAllocator())));
}
}
@@ -419,7 +543,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
}
DEBUG({
dbgs() << " result = ";
- IntB.print(dbgs(), tri_);
+ IntB.print(dbgs(), TRI);
dbgs() << "\n";
});
@@ -434,7 +558,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
// merge, find the last use and trim the live range. That will also add the
// isKill marker.
if (ALR->end == CopyIdx)
- li_->shrinkToUses(&IntA);
+ LIS->shrinkToUses(&IntA);
++numExtends;
return true;
@@ -498,15 +622,15 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
return false;
// Bail if there is no dst interval.
- if (!li_->hasInterval(CP.getDstReg()))
+ if (!LIS->hasInterval(CP.getDstReg()))
return false;
- SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
+ SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getDefIndex();
LiveInterval &IntA =
- li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+ LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
LiveInterval &IntB =
- li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+ LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
// BValNo is a value number in B that is defined by a copy from A. 'B3' in
// the example above.
@@ -524,7 +648,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
// the optimization.
if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill())
return false;
- MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def);
+ MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
if (!DefMI)
return false;
const MCInstrDesc &MCID = DefMI->getDesc();
@@ -538,7 +662,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
return false;
unsigned Op1, Op2, NewDstIdx;
- if (!tii_->findCommutedOpIndices(DefMI, Op1, Op2))
+ if (!TII->findCommutedOpIndices(DefMI, Op1, Op2))
return false;
if (Op1 == UseOpIdx)
NewDstIdx = Op2;
@@ -560,18 +684,18 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
// Abort if the aliases of IntB.reg have values that are not simply the
// clobbers from the superreg.
if (TargetRegisterInfo::isPhysicalRegister(IntB.reg))
- for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS)
- if (li_->hasInterval(*AS) &&
- HasOtherReachingDefs(IntA, li_->getInterval(*AS), AValNo, 0))
+ for (const unsigned *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS)
+ if (LIS->hasInterval(*AS) &&
+ HasOtherReachingDefs(IntA, LIS->getInterval(*AS), AValNo, 0))
return false;
// If some of the uses of IntA.reg is already coalesced away, return false.
// It's not possible to determine whether it's safe to perform the coalescing.
- for (MachineRegisterInfo::use_nodbg_iterator UI =
- mri_->use_nodbg_begin(IntA.reg),
- UE = mri_->use_nodbg_end(); UI != UE; ++UI) {
+ for (MachineRegisterInfo::use_nodbg_iterator UI =
+ MRI->use_nodbg_begin(IntA.reg),
+ UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
MachineInstr *UseMI = &*UI;
- SlotIndex UseIdx = li_->getInstructionIndex(UseMI);
+ SlotIndex UseIdx = LIS->getInstructionIndex(UseMI);
LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
if (ULR == IntA.end())
continue;
@@ -585,15 +709,15 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
// At this point we have decided that it is legal to do this
// transformation. Start by commuting the instruction.
MachineBasicBlock *MBB = DefMI->getParent();
- MachineInstr *NewMI = tii_->commuteInstruction(DefMI);
+ MachineInstr *NewMI = TII->commuteInstruction(DefMI);
if (!NewMI)
return false;
if (TargetRegisterInfo::isVirtualRegister(IntA.reg) &&
TargetRegisterInfo::isVirtualRegister(IntB.reg) &&
- !mri_->constrainRegClass(IntB.reg, mri_->getRegClass(IntA.reg)))
+ !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg)))
return false;
if (NewMI != DefMI) {
- li_->ReplaceMachineInstrInMaps(DefMI, NewMI);
+ LIS->ReplaceMachineInstrInMaps(DefMI, NewMI);
MBB->insert(DefMI, NewMI);
MBB->erase(DefMI);
}
@@ -610,8 +734,8 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
// = B
// Update uses of IntA of the specific Val# with IntB.
- for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg),
- UE = mri_->use_end(); UI != UE;) {
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg),
+ UE = MRI->use_end(); UI != UE;) {
MachineOperand &UseMO = UI.getOperand();
MachineInstr *UseMI = &*UI;
++UI;
@@ -623,12 +747,12 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
UseMO.setReg(NewReg);
continue;
}
- SlotIndex UseIdx = li_->getInstructionIndex(UseMI).getUseIndex();
+ SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getUseIndex();
LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
if (ULR == IntA.end() || ULR->valno != AValNo)
continue;
if (TargetRegisterInfo::isPhysicalRegister(NewReg))
- UseMO.substPhysReg(NewReg, *tri_);
+ UseMO.substPhysReg(NewReg, *TRI);
else
UseMO.setReg(NewReg);
if (UseMI == CopyMI)
@@ -674,27 +798,24 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt,
bool preserveSrcInt,
unsigned DstReg,
- unsigned DstSubIdx,
MachineInstr *CopyMI) {
- SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getUseIndex();
+ SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getUseIndex();
LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx);
assert(SrcLR != SrcInt.end() && "Live range not found!");
VNInfo *ValNo = SrcLR->valno;
- // If other defs can reach uses of this def, then it's not safe to perform
- // the optimization.
- if (ValNo->isPHIDef() || ValNo->isUnused() || ValNo->hasPHIKill())
+ if (ValNo->isPHIDef() || ValNo->isUnused())
return false;
- MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def);
+ MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def);
if (!DefMI)
return false;
assert(DefMI && "Defining instruction disappeared");
const MCInstrDesc &MCID = DefMI->getDesc();
if (!MCID.isAsCheapAsAMove())
return false;
- if (!tii_->isTriviallyReMaterializable(DefMI, AA))
+ if (!TII->isTriviallyReMaterializable(DefMI, AA))
return false;
bool SawStore = false;
- if (!DefMI->isSafeToMove(tii_, AA, SawStore))
+ if (!DefMI->isSafeToMove(TII, AA, SawStore))
return false;
if (MCID.getNumDefs() != 1)
return false;
@@ -702,36 +823,20 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt,
// Make sure the copy destination register class fits the instruction
// definition register class. The mismatch can happen as a result of earlier
// extract_subreg, insert_subreg, subreg_to_reg coalescing.
- const TargetRegisterClass *RC = tii_->getRegClass(MCID, 0, tri_);
+ const TargetRegisterClass *RC = TII->getRegClass(MCID, 0, TRI);
if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
- if (mri_->getRegClass(DstReg) != RC)
+ if (MRI->getRegClass(DstReg) != RC)
return false;
} else if (!RC->contains(DstReg))
return false;
}
- // If destination register has a sub-register index on it, make sure it
- // matches the instruction register class.
- if (DstSubIdx) {
- const MCInstrDesc &MCID = DefMI->getDesc();
- if (MCID.getNumDefs() != 1)
- return false;
- const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg);
- const TargetRegisterClass *DstSubRC =
- DstRC->getSubRegisterRegClass(DstSubIdx);
- const TargetRegisterClass *DefRC = tii_->getRegClass(MCID, 0, tri_);
- if (DefRC == DstRC)
- DstSubIdx = 0;
- else if (DefRC != DstSubRC)
- return false;
- }
-
RemoveCopyFlag(DstReg, CopyMI);
MachineBasicBlock *MBB = CopyMI->getParent();
MachineBasicBlock::iterator MII =
llvm::next(MachineBasicBlock::iterator(CopyMI));
- tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, *tri_);
+ TII->reMaterialize(*MBB, MII, DstReg, 0, DefMI, *TRI);
MachineInstr *NewMI = prior(MII);
// CopyMI may have implicit operands, transfer them over to the newly
@@ -746,7 +851,7 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt,
}
NewMI->copyImplicitOps(CopyMI);
- li_->ReplaceMachineInstrInMaps(CopyMI, NewMI);
+ LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI);
CopyMI->eraseFromParent();
ReMatCopies.insert(CopyMI);
ReMatDefs.insert(DefMI);
@@ -755,8 +860,51 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt,
// The source interval can become smaller because we removed a use.
if (preserveSrcInt)
- li_->shrinkToUses(&SrcInt);
+ LIS->shrinkToUses(&SrcInt);
+
+ return true;
+}
+
+/// eliminateUndefCopy - ProcessImpicitDefs may leave some copies of <undef>
+/// values, it only removes local variables. When we have a copy like:
+///
+/// %vreg1 = COPY %vreg2<undef>
+///
+/// We delete the copy and remove the corresponding value number from %vreg1.
+/// Any uses of that value number are marked as <undef>.
+bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI,
+ const CoalescerPair &CP) {
+ SlotIndex Idx = LIS->getInstructionIndex(CopyMI);
+ LiveInterval *SrcInt = &LIS->getInterval(CP.getSrcReg());
+ if (SrcInt->liveAt(Idx))
+ return false;
+ LiveInterval *DstInt = &LIS->getInterval(CP.getDstReg());
+ if (DstInt->liveAt(Idx))
+ return false;
+ // No intervals are live-in to CopyMI - it is undef.
+ if (CP.isFlipped())
+ DstInt = SrcInt;
+ SrcInt = 0;
+
+ VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getDefIndex());
+ assert(DeadVNI && "No value defined in DstInt");
+ DstInt->removeValNo(DeadVNI);
+
+ // Find new undef uses.
+ for (MachineRegisterInfo::reg_nodbg_iterator
+ I = MRI->reg_nodbg_begin(DstInt->reg), E = MRI->reg_nodbg_end();
+ I != E; ++I) {
+ MachineOperand &MO = I.getOperand();
+ if (MO.isDef() || MO.isUndef())
+ continue;
+ MachineInstr *MI = MO.getParent();
+ SlotIndex Idx = LIS->getInstructionIndex(MI);
+ if (DstInt->liveAt(Idx))
+ continue;
+ MO.setIsUndef(true);
+ DEBUG(dbgs() << "\tnew undef: " << Idx << '\t' << *MI);
+ }
return true;
}
@@ -773,22 +921,20 @@ RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) {
unsigned SubIdx = CP.getSubIdx();
// Update LiveDebugVariables.
- ldv_->renameRegister(SrcReg, DstReg, SubIdx);
+ LDV->renameRegister(SrcReg, DstReg, SubIdx);
- for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg);
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg);
MachineInstr *UseMI = I.skipInstruction();) {
// A PhysReg copy that won't be coalesced can perhaps be rematerialized
// instead.
if (DstIsPhys) {
- if (UseMI->isCopy() &&
- !UseMI->getOperand(1).getSubReg() &&
- !UseMI->getOperand(0).getSubReg() &&
+ if (UseMI->isFullCopy() &&
UseMI->getOperand(1).getReg() == SrcReg &&
UseMI->getOperand(0).getReg() != SrcReg &&
UseMI->getOperand(0).getReg() != DstReg &&
!JoinedCopies.count(UseMI) &&
- ReMaterializeTrivialDef(li_->getInterval(SrcReg), false,
- UseMI->getOperand(0).getReg(), 0, UseMI))
+ ReMaterializeTrivialDef(LIS->getInterval(SrcReg), false,
+ UseMI->getOperand(0).getReg(), UseMI))
continue;
}
@@ -803,10 +949,18 @@ RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) {
Kills |= MO.isKill();
Deads |= MO.isDead();
+ // Make sure we don't create read-modify-write defs accidentally. We
+ // assume here that a SrcReg def cannot be joined into a live DstReg. If
+ // RegisterCoalescer starts tracking partially live registers, we will
+ // need to check the actual LiveInterval to determine if DstReg is live
+ // here.
+ if (SubIdx && !Reads)
+ MO.setIsUndef();
+
if (DstIsPhys)
- MO.substPhysReg(DstReg, *tri_);
+ MO.substPhysReg(DstReg, *TRI);
else
- MO.substVirtReg(DstReg, SubIdx, *tri_);
+ MO.substVirtReg(DstReg, SubIdx, *TRI);
}
// This instruction is a copy that will be removed.
@@ -817,19 +971,19 @@ RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) {
// If UseMI was a simple SrcReg def, make sure we didn't turn it into a
// read-modify-write of DstReg.
if (Deads)
- UseMI->addRegisterDead(DstReg, tri_);
+ UseMI->addRegisterDead(DstReg, TRI);
else if (!Reads && Writes)
- UseMI->addRegisterDefined(DstReg, tri_);
+ UseMI->addRegisterDefined(DstReg, TRI);
// Kill flags apply to the whole physical register.
if (DstIsPhys && Kills)
- UseMI->addRegisterKilled(DstReg, tri_);
+ UseMI->addRegisterKilled(DstReg, TRI);
}
DEBUG({
dbgs() << "\t\tupdated: ";
if (!UseMI->isDebugValue())
- dbgs() << li_->getInstructionIndex(UseMI) << "\t";
+ dbgs() << LIS->getInstructionIndex(UseMI) << "\t";
dbgs() << *UseMI;
});
}
@@ -838,18 +992,18 @@ RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) {
/// removeIntervalIfEmpty - Check if the live interval of a physical register
/// is empty, if so remove it and also remove the empty intervals of its
/// sub-registers. Return true if live interval is removed.
-static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_,
- const TargetRegisterInfo *tri_) {
+static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *LIS,
+ const TargetRegisterInfo *TRI) {
if (li.empty()) {
if (TargetRegisterInfo::isPhysicalRegister(li.reg))
- for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) {
- if (!li_->hasInterval(*SR))
+ for (const unsigned* SR = TRI->getSubRegisters(li.reg); *SR; ++SR) {
+ if (!LIS->hasInterval(*SR))
continue;
- LiveInterval &sli = li_->getInterval(*SR);
+ LiveInterval &sli = LIS->getInterval(*SR);
if (sli.empty())
- li_->removeInterval(*SR);
+ LIS->removeInterval(*SR);
}
- li_->removeInterval(li.reg);
+ LIS->removeInterval(li.reg);
return true;
}
return false;
@@ -859,29 +1013,29 @@ static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_,
/// the val# it defines. If the live interval becomes empty, remove it as well.
bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li,
MachineInstr *DefMI) {
- SlotIndex DefIdx = li_->getInstructionIndex(DefMI).getDefIndex();
+ SlotIndex DefIdx = LIS->getInstructionIndex(DefMI).getDefIndex();
LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx);
if (DefIdx != MLR->valno->def)
return false;
li.removeValNo(MLR->valno);
- return removeIntervalIfEmpty(li, li_, tri_);
+ return removeIntervalIfEmpty(li, LIS, TRI);
}
void RegisterCoalescer::RemoveCopyFlag(unsigned DstReg,
const MachineInstr *CopyMI) {
- SlotIndex DefIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
- if (li_->hasInterval(DstReg)) {
- LiveInterval &LI = li_->getInterval(DstReg);
+ SlotIndex DefIdx = LIS->getInstructionIndex(CopyMI).getDefIndex();
+ if (LIS->hasInterval(DstReg)) {
+ LiveInterval &LI = LIS->getInterval(DstReg);
if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
if (LR->valno->def == DefIdx)
LR->valno->setCopy(0);
}
if (!TargetRegisterInfo::isPhysicalRegister(DstReg))
return;
- for (const unsigned* AS = tri_->getAliasSet(DstReg); *AS; ++AS) {
- if (!li_->hasInterval(*AS))
+ for (const unsigned* AS = TRI->getAliasSet(DstReg); *AS; ++AS) {
+ if (!LIS->hasInterval(*AS))
continue;
- LiveInterval &LI = li_->getInterval(*AS);
+ LiveInterval &LI = LIS->getInterval(*AS);
if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
if (LR->valno->def == DefIdx)
LR->valno->setCopy(0);
@@ -894,8 +1048,8 @@ void RegisterCoalescer::RemoveCopyFlag(unsigned DstReg,
/// are not spillable! If the destination interval uses are far away, think
/// twice about coalescing them!
bool RegisterCoalescer::shouldJoinPhys(CoalescerPair &CP) {
- bool Allocatable = li_->isAllocatable(CP.getDstReg());
- LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg());
+ bool Allocatable = LIS->isAllocatable(CP.getDstReg());
+ LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg());
/// Always join simple intervals that are defined by a single copy from a
/// reserved register. This doesn't increase register pressure, so it is
@@ -918,8 +1072,8 @@ bool RegisterCoalescer::shouldJoinPhys(CoalescerPair &CP) {
// Don't join with physregs that have a ridiculous number of live
// ranges. The data structure performance is really bad when that
// happens.
- if (li_->hasInterval(CP.getDstReg()) &&
- li_->getInterval(CP.getDstReg()).ranges.size() > 1000) {
+ if (LIS->hasInterval(CP.getDstReg()) &&
+ LIS->getInterval(CP.getDstReg()).ranges.size() > 1000) {
++numAborts;
DEBUG(dbgs()
<< "\tPhysical register live interval too complicated, abort!\n");
@@ -929,9 +1083,9 @@ bool RegisterCoalescer::shouldJoinPhys(CoalescerPair &CP) {
// FIXME: Why are we skipping this test for partial copies?
// CodeGen/X86/phys_subreg_coalesce-3.ll needs it.
if (!CP.isPartial()) {
- const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg());
+ const TargetRegisterClass *RC = MRI->getRegClass(CP.getSrcReg());
unsigned Threshold = RegClassInfo.getNumAllocatableRegs(RC) * 2;
- unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
+ unsigned Length = LIS->getApproximateInstructionCount(JoinVInt);
if (Length > Threshold) {
++numAborts;
DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
@@ -957,12 +1111,12 @@ RegisterCoalescer::isWinToJoinCrossClass(unsigned SrcReg,
// Early exit if the function is fairly small, coalesce aggressively if
// that's the case. For really special register classes with 3 or
// fewer registers, be a bit more careful.
- (li_->getFuncInstructionCount() / NewRCCount) < 8)
+ (LIS->getFuncInstructionCount() / NewRCCount) < 8)
return true;
- LiveInterval &SrcInt = li_->getInterval(SrcReg);
- LiveInterval &DstInt = li_->getInterval(DstReg);
- unsigned SrcSize = li_->getApproximateInstructionCount(SrcInt);
- unsigned DstSize = li_->getApproximateInstructionCount(DstInt);
+ LiveInterval &SrcInt = LIS->getInterval(SrcReg);
+ LiveInterval &DstInt = LIS->getInterval(DstReg);
+ unsigned SrcSize = LIS->getApproximateInstructionCount(SrcInt);
+ unsigned DstSize = LIS->getApproximateInstructionCount(DstInt);
// Coalesce aggressively if the intervals are small compared to the number of
// registers in the new class. The number 4 is fairly arbitrary, chosen to be
@@ -972,10 +1126,10 @@ RegisterCoalescer::isWinToJoinCrossClass(unsigned SrcReg,
return true;
// Estimate *register use density*. If it doubles or more, abort.
- unsigned SrcUses = std::distance(mri_->use_nodbg_begin(SrcReg),
- mri_->use_nodbg_end());
- unsigned DstUses = std::distance(mri_->use_nodbg_begin(DstReg),
- mri_->use_nodbg_end());
+ unsigned SrcUses = std::distance(MRI->use_nodbg_begin(SrcReg),
+ MRI->use_nodbg_end());
+ unsigned DstUses = std::distance(MRI->use_nodbg_begin(DstReg),
+ MRI->use_nodbg_end());
unsigned NewUses = SrcUses + DstUses;
unsigned NewSize = SrcSize + DstSize;
if (SrcRC != NewRC && SrcSize > ThresSize) {
@@ -1003,9 +1157,9 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) {
if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI))
return false; // Already done.
- DEBUG(dbgs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI);
+ DEBUG(dbgs() << LIS->getInstructionIndex(CopyMI) << '\t' << *CopyMI);
- CoalescerPair CP(*tii_, *tri_);
+ CoalescerPair CP(*TII, *TRI);
if (!CP.setRegisters(CopyMI)) {
DEBUG(dbgs() << "\tNot coalescable.\n");
return false;
@@ -1018,8 +1172,15 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) {
return false; // Not coalescable.
}
- DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), tri_)
- << " with " << PrintReg(CP.getDstReg(), tri_, CP.getSubIdx())
+ // Eliminate undefs.
+ if (!CP.isPhys() && eliminateUndefCopy(CopyMI, CP)) {
+ markAsJoined(CopyMI);
+ DEBUG(dbgs() << "\tEliminated copy of <undef> value.\n");
+ return false; // Not coalescable.
+ }
+
+ DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), TRI)
+ << " with " << PrintReg(CP.getDstReg(), TRI, CP.getSubIdx())
<< "\n");
// Enforce policies.
@@ -1028,8 +1189,8 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) {
// Before giving up coalescing, if definition of source is defined by
// trivial computation, try rematerializing it.
if (!CP.isFlipped() &&
- ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true,
- CP.getDstReg(), 0, CopyMI))
+ ReMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), true,
+ CP.getDstReg(), CopyMI))
return true;
return false;
}
@@ -1042,8 +1203,8 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) {
return false;
}
if (!isWinToJoinCrossClass(CP.getSrcReg(), CP.getDstReg(),
- mri_->getRegClass(CP.getSrcReg()),
- mri_->getRegClass(CP.getDstReg()),
+ MRI->getRegClass(CP.getSrcReg()),
+ MRI->getRegClass(CP.getDstReg()),
CP.getNewRC())) {
DEBUG(dbgs() << "\tAvoid coalescing to constrained register class.\n");
Again = true; // May be possible to coalesce later.
@@ -1052,8 +1213,8 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) {
}
// When possible, let DstReg be the larger interval.
- if (!CP.getSubIdx() && li_->getInterval(CP.getSrcReg()).ranges.size() >
- li_->getInterval(CP.getDstReg()).ranges.size())
+ if (!CP.getSubIdx() && LIS->getInterval(CP.getSrcReg()).ranges.size() >
+ LIS->getInterval(CP.getDstReg()).ranges.size())
CP.flip();
}
@@ -1067,8 +1228,8 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) {
// If definition of source is defined by trivial computation, try
// rematerializing it.
if (!CP.isFlipped() &&
- ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true,
- CP.getDstReg(), 0, CopyMI))
+ ReMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), true,
+ CP.getDstReg(), CopyMI))
return true;
// If we can eliminate the copy without merging the live ranges, do so now.
@@ -1091,7 +1252,7 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) {
// other. Make sure the resulting register is set to the right register class.
if (CP.isCrossClass()) {
++numCrossRCs;
- mri_->setRegClass(CP.getDstReg(), CP.getNewRC());
+ MRI->setRegClass(CP.getDstReg(), CP.getNewRC());
}
// Remember to delete the copy instruction.
@@ -1105,10 +1266,10 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) {
SmallVector<MachineBasicBlock*, 16> BlockSeq;
// JoinIntervals invalidates the VNInfos in SrcInt, but we only need the
// ranges for this, and they are preserved.
- LiveInterval &SrcInt = li_->getInterval(CP.getSrcReg());
+ LiveInterval &SrcInt = LIS->getInterval(CP.getSrcReg());
for (LiveInterval::const_iterator I = SrcInt.begin(), E = SrcInt.end();
I != E; ++I ) {
- li_->findLiveInMBBs(I->start, I->end, BlockSeq);
+ LIS->findLiveInMBBs(I->start, I->end, BlockSeq);
for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) {
MachineBasicBlock &block = *BlockSeq[idx];
if (!block.isLiveIn(CP.getDstReg()))
@@ -1120,15 +1281,15 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) {
// SrcReg is guarateed to be the register whose live interval that is
// being merged.
- li_->removeInterval(CP.getSrcReg());
+ LIS->removeInterval(CP.getSrcReg());
// Update regalloc hint.
- tri_->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *mf_);
+ TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
DEBUG({
- LiveInterval &DstInt = li_->getInterval(CP.getDstReg());
+ LiveInterval &DstInt = LIS->getInterval(CP.getDstReg());
dbgs() << "\tJoined. Result = ";
- DstInt.print(dbgs(), tri_);
+ DstInt.print(dbgs(), TRI);
dbgs() << "\n";
});
@@ -1197,6 +1358,7 @@ static unsigned ComputeUltimateVN(VNInfo *VNI,
// which allows us to coalesce A and B.
// VNI is the definition of B. LR is the life range of A that includes
// the slot just before B. If we return true, we add "B = X" to DupCopies.
+// This implies that A dominates B.
static bool RegistersDefinedFromSameValue(LiveIntervals &li,
const TargetRegisterInfo &tri,
CoalescerPair &CP,
@@ -1248,7 +1410,9 @@ static bool RegistersDefinedFromSameValue(LiveIntervals &li,
// If the copies use two different value numbers of X, we cannot merge
// A and B.
LiveInterval &SrcInt = li.getInterval(Src);
- if (SrcInt.getVNInfoAt(Other->def) != SrcInt.getVNInfoAt(VNI->def))
+ // getVNInfoBefore returns NULL for undef copies. In this case, the
+ // optimization is still safe.
+ if (SrcInt.getVNInfoBefore(Other->def) != SrcInt.getVNInfoBefore(VNI->def))
return false;
DupCopies.push_back(MI);
@@ -1259,18 +1423,18 @@ static bool RegistersDefinedFromSameValue(LiveIntervals &li,
/// JoinIntervals - Attempt to join these two intervals. On failure, this
/// returns false.
bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
- LiveInterval &RHS = li_->getInterval(CP.getSrcReg());
- DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), tri_); dbgs() << "\n"; });
+ LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
+ DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), TRI); dbgs() << "\n"; });
// If a live interval is a physical register, check for interference with any
// aliases. The interference check implemented here is a bit more conservative
// than the full interfeence check below. We allow overlapping live ranges
// only when one is a copy of the other.
if (CP.isPhys()) {
- for (const unsigned *AS = tri_->getAliasSet(CP.getDstReg()); *AS; ++AS){
- if (!li_->hasInterval(*AS))
+ for (const unsigned *AS = TRI->getAliasSet(CP.getDstReg()); *AS; ++AS){
+ if (!LIS->hasInterval(*AS))
continue;
- const LiveInterval &LHS = li_->getInterval(*AS);
+ const LiveInterval &LHS = LIS->getInterval(*AS);
LiveInterval::const_iterator LI = LHS.begin();
for (LiveInterval::const_iterator RI = RHS.begin(), RE = RHS.end();
RI != RE; ++RI) {
@@ -1278,10 +1442,10 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
// Does LHS have an overlapping live range starting before RI?
if ((LI != LHS.begin() && LI[-1].end > RI->start) &&
(RI->start != RI->valno->def ||
- !CP.isCoalescable(li_->getInstructionFromIndex(RI->start)))) {
+ !CP.isCoalescable(LIS->getInstructionFromIndex(RI->start)))) {
DEBUG({
dbgs() << "\t\tInterference from alias: ";
- LHS.print(dbgs(), tri_);
+ LHS.print(dbgs(), TRI);
dbgs() << "\n\t\tOverlap at " << RI->start << " and no copy.\n";
});
return false;
@@ -1290,10 +1454,10 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
// Check that LHS ranges beginning in this range are copies.
for (; LI != LHS.end() && LI->start < RI->end; ++LI) {
if (LI->start != LI->valno->def ||
- !CP.isCoalescable(li_->getInstructionFromIndex(LI->start))) {
+ !CP.isCoalescable(LIS->getInstructionFromIndex(LI->start))) {
DEBUG({
dbgs() << "\t\tInterference from alias: ";
- LHS.print(dbgs(), tri_);
+ LHS.print(dbgs(), TRI);
dbgs() << "\n\t\tDef at " << LI->start << " is not a copy.\n";
});
return false;
@@ -1313,8 +1477,8 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
SmallVector<MachineInstr*, 8> DupCopies;
- LiveInterval &LHS = li_->getOrCreateInterval(CP.getDstReg());
- DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), tri_); dbgs() << "\n"; });
+ LiveInterval &LHS = LIS->getOrCreateInterval(CP.getDstReg());
+ DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), TRI); dbgs() << "\n"; });
// Loop over the value numbers of the LHS, seeing if any are defined from
// the RHS.
@@ -1337,7 +1501,7 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
// from the RHS interval, we can use its value #.
MachineInstr *MI = VNI->getCopy();
if (!CP.isCoalescable(MI) &&
- !RegistersDefinedFromSameValue(*li_, *tri_, CP, VNI, lr, DupCopies))
+ !RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies))
continue;
LHSValsDefinedFromRHS[VNI] = lr->valno;
@@ -1364,7 +1528,7 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
// from the LHS interval, we can use its value #.
MachineInstr *MI = VNI->getCopy();
if (!CP.isCoalescable(MI) &&
- !RegistersDefinedFromSameValue(*li_, *tri_, CP, VNI, lr, DupCopies))
+ !RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies))
continue;
RHSValsDefinedFromLHS[VNI] = lr->valno;
@@ -1486,7 +1650,7 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
// and mark the X as coalesced to keep the illusion.
unsigned Src = MI->getOperand(1).getReg();
SourceRegisters.push_back(Src);
- MI->getOperand(0).substVirtReg(Src, 0, *tri_);
+ MI->getOperand(0).substVirtReg(Src, 0, *TRI);
markAsJoined(MI);
}
@@ -1495,13 +1659,13 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
// that B = X is gone.
for (SmallVector<unsigned, 8>::iterator I = SourceRegisters.begin(),
E = SourceRegisters.end(); I != E; ++I) {
- li_->shrinkToUses(&li_->getInterval(*I));
+ LIS->shrinkToUses(&LIS->getInterval(*I));
}
// If we get here, we know that we can coalesce the live ranges. Ask the
// intervals to coalesce themselves now.
LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo,
- mri_);
+ MRI);
return true;
}
@@ -1552,7 +1716,7 @@ void RegisterCoalescer::CopyCoalesceInMBB(MachineBasicBlock *MBB,
bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
- if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty())
+ if (LIS->hasInterval(SrcReg) && LIS->getInterval(SrcReg).empty())
ImpDefCopies.push_back(Inst);
else if (SrcIsPhys || DstIsPhys)
PhysCopies.push_back(Inst);
@@ -1590,9 +1754,9 @@ void RegisterCoalescer::joinIntervals() {
DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
std::vector<MachineInstr*> TryAgainList;
- if (loopInfo->empty()) {
+ if (Loops->empty()) {
// If there are no loops in the function, join intervals in function order.
- for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end();
I != E; ++I)
CopyCoalesceInMBB(I, TryAgainList);
} else {
@@ -1603,9 +1767,9 @@ void RegisterCoalescer::joinIntervals() {
// Join intervals in the function prolog first. We want to join physical
// registers with virtual registers before the intervals got too long.
std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs;
- for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();I != E;++I){
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){
MachineBasicBlock *MBB = I;
- MBBs.push_back(std::make_pair(loopInfo->getLoopDepth(MBB), I));
+ MBBs.push_back(std::make_pair(Loops->getLoopDepth(MBB), I));
}
// Sort by loop depth.
@@ -1644,22 +1808,22 @@ void RegisterCoalescer::releaseMemory() {
}
bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
- mf_ = &fn;
- mri_ = &fn.getRegInfo();
- tm_ = &fn.getTarget();
- tri_ = tm_->getRegisterInfo();
- tii_ = tm_->getInstrInfo();
- li_ = &getAnalysis<LiveIntervals>();
- ldv_ = &getAnalysis<LiveDebugVariables>();
+ MF = &fn;
+ MRI = &fn.getRegInfo();
+ TM = &fn.getTarget();
+ TRI = TM->getRegisterInfo();
+ TII = TM->getInstrInfo();
+ LIS = &getAnalysis<LiveIntervals>();
+ LDV = &getAnalysis<LiveDebugVariables>();
AA = &getAnalysis<AliasAnalysis>();
- loopInfo = &getAnalysis<MachineLoopInfo>();
+ Loops = &getAnalysis<MachineLoopInfo>();
DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
<< "********** Function: "
- << ((Value*)mf_->getFunction())->getName() << '\n');
+ << ((Value*)MF->getFunction())->getName() << '\n');
if (VerifyCoalescing)
- mf_->verify(this, "Before register coalescing");
+ MF->verify(this, "Before register coalescing");
RegClassInfo.runOnMachineFunction(fn);
@@ -1668,9 +1832,9 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
joinIntervals();
DEBUG({
dbgs() << "********** INTERVALS POST JOINING **********\n";
- for (LiveIntervals::iterator I = li_->begin(), E = li_->end();
+ for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end();
I != E; ++I){
- I->second->print(dbgs(), tri_);
+ I->second->print(dbgs(), TRI);
dbgs() << "\n";
}
});
@@ -1678,8 +1842,8 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
// Perform a final pass over the instructions and compute spill weights
// and remove identity moves.
- SmallVector<unsigned, 4> DeadDefs;
- for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
+ SmallVector<unsigned, 4> DeadDefs, InflateRegs;
+ for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end();
mbbi != mbbe; ++mbbi) {
MachineBasicBlock* mbb = mbbi;
for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end();
@@ -1690,6 +1854,16 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
bool DoDelete = true;
assert(MI->isCopyLike() && "Unrecognized copy instruction");
unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+
+ // Collect candidates for register class inflation.
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ RegClassInfo.isProperSubClass(MRI->getRegClass(SrcReg)))
+ InflateRegs.push_back(SrcReg);
+ if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+ RegClassInfo.isProperSubClass(MRI->getRegClass(DstReg)))
+ InflateRegs.push_back(DstReg);
+
if (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
MI->getNumOperands() > 2)
// Do not delete extract_subreg, insert_subreg of physical
@@ -1701,8 +1875,8 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
if (MI->allDefsAreDead()) {
if (TargetRegisterInfo::isVirtualRegister(SrcReg) &&
- li_->hasInterval(SrcReg))
- li_->shrinkToUses(&li_->getInterval(SrcReg));
+ LIS->hasInterval(SrcReg))
+ LIS->shrinkToUses(&LIS->getInterval(SrcReg));
DoDelete = true;
}
if (!DoDelete) {
@@ -1711,10 +1885,10 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
MI->RemoveOperand(3);
MI->RemoveOperand(1);
}
- MI->setDesc(tii_->get(TargetOpcode::KILL));
+ MI->setDesc(TII->get(TargetOpcode::KILL));
mii = llvm::next(mii);
} else {
- li_->RemoveMachineInstrFromMaps(MI);
+ LIS->RemoveMachineInstrFromMaps(MI);
mii = mbbi->erase(mii);
++numPeep;
}
@@ -1731,12 +1905,16 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
unsigned Reg = MO.getReg();
if (!Reg)
continue;
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
DeadDefs.push_back(Reg);
+ // Remat may also enable register class inflation.
+ if (RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)))
+ InflateRegs.push_back(Reg);
+ }
if (MO.isDead())
continue;
if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
- !mri_->use_nodbg_empty(Reg)) {
+ !MRI->use_nodbg_empty(Reg)) {
isDead = false;
break;
}
@@ -1745,9 +1923,9 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
while (!DeadDefs.empty()) {
unsigned DeadDef = DeadDefs.back();
DeadDefs.pop_back();
- RemoveDeadDef(li_->getInterval(DeadDef), MI);
+ RemoveDeadDef(LIS->getInterval(DeadDef), MI);
}
- li_->RemoveMachineInstrFromMaps(mii);
+ LIS->RemoveMachineInstrFromMaps(mii);
mii = mbbi->erase(mii);
continue;
} else
@@ -1757,14 +1935,14 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
++mii;
// Check for now unnecessary kill flags.
- if (li_->isNotInMIMap(MI)) continue;
- SlotIndex DefIdx = li_->getInstructionIndex(MI).getDefIndex();
+ if (LIS->isNotInMIMap(MI)) continue;
+ SlotIndex DefIdx = LIS->getInstructionIndex(MI).getDefIndex();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isKill()) continue;
unsigned reg = MO.getReg();
- if (!reg || !li_->hasInterval(reg)) continue;
- if (!li_->getInterval(reg).killedAt(DefIdx)) {
+ if (!reg || !LIS->hasInterval(reg)) continue;
+ if (!LIS->getInterval(reg).killedAt(DefIdx)) {
MO.setIsKill(false);
continue;
}
@@ -1772,26 +1950,40 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
// remain alive.
if (!TargetRegisterInfo::isPhysicalRegister(reg))
continue;
- for (const unsigned *SR = tri_->getSubRegisters(reg);
+ for (const unsigned *SR = TRI->getSubRegisters(reg);
unsigned S = *SR; ++SR)
- if (li_->hasInterval(S) && li_->getInterval(S).liveAt(DefIdx))
- MI->addRegisterDefined(S, tri_);
+ if (LIS->hasInterval(S) && LIS->getInterval(S).liveAt(DefIdx))
+ MI->addRegisterDefined(S, TRI);
}
}
}
+ // After deleting a lot of copies, register classes may be less constrained.
+ // Removing sub-register opreands may alow GR32_ABCD -> GR32 and DPR_VFP2 ->
+ // DPR inflation.
+ array_pod_sort(InflateRegs.begin(), InflateRegs.end());
+ InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),
+ InflateRegs.end());
+ DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n");
+ for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) {
+ unsigned Reg = InflateRegs[i];
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ if (MRI->recomputeRegClass(Reg, *TM)) {
+ DEBUG(dbgs() << PrintReg(Reg) << " inflated to "
+ << MRI->getRegClass(Reg)->getName() << '\n');
+ ++NumInflated;
+ }
+ }
+
DEBUG(dump());
- DEBUG(ldv_->dump());
+ DEBUG(LDV->dump());
if (VerifyCoalescing)
- mf_->verify(this, "After register coalescing");
+ MF->verify(this, "After register coalescing");
return true;
}
/// print - Implement the dump method.
void RegisterCoalescer::print(raw_ostream &O, const Module* m) const {
- li_->print(O, m);
-}
-
-RegisterCoalescer *llvm::createRegisterCoalescer() {
- return new RegisterCoalescer();
+ LIS->print(O, m);
}
diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h
index 4131d91..472c483 100644
--- a/lib/CodeGen/RegisterCoalescer.h
+++ b/lib/CodeGen/RegisterCoalescer.h
@@ -12,198 +12,60 @@
//
//===----------------------------------------------------------------------===//
-#include "RegisterClassInfo.h"
-#include "llvm/Support/IncludeFile.h"
-#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/ADT/SmallPtrSet.h"
-
#ifndef LLVM_CODEGEN_REGISTER_COALESCER_H
#define LLVM_CODEGEN_REGISTER_COALESCER_H
namespace llvm {
- class MachineFunction;
- class RegallocQuery;
- class AnalysisUsage;
class MachineInstr;
class TargetRegisterInfo;
class TargetRegisterClass;
class TargetInstrInfo;
- class LiveDebugVariables;
- class VirtRegMap;
- class MachineLoopInfo;
-
- class CoalescerPair;
-
- /// An abstract interface for register coalescers. Coalescers must
- /// implement this interface to be part of the coalescer analysis
- /// group.
- class RegisterCoalescer : public MachineFunctionPass {
- MachineFunction* mf_;
- MachineRegisterInfo* mri_;
- const TargetMachine* tm_;
- const TargetRegisterInfo* tri_;
- const TargetInstrInfo* tii_;
- LiveIntervals *li_;
- LiveDebugVariables *ldv_;
- const MachineLoopInfo* loopInfo;
- AliasAnalysis *AA;
- RegisterClassInfo RegClassInfo;
-
- /// JoinedCopies - Keep track of copies eliminated due to coalescing.
- ///
- SmallPtrSet<MachineInstr*, 32> JoinedCopies;
-
- /// ReMatCopies - Keep track of copies eliminated due to remat.
- ///
- SmallPtrSet<MachineInstr*, 32> ReMatCopies;
-
- /// ReMatDefs - Keep track of definition instructions which have
- /// been remat'ed.
- SmallPtrSet<MachineInstr*, 8> ReMatDefs;
-
- /// joinIntervals - join compatible live intervals
- void joinIntervals();
-
- /// CopyCoalesceInMBB - Coalesce copies in the specified MBB, putting
- /// copies that cannot yet be coalesced into the "TryAgain" list.
- void CopyCoalesceInMBB(MachineBasicBlock *MBB,
- std::vector<MachineInstr*> &TryAgain);
-
- /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
- /// which are the src/dst of the copy instruction CopyMI. This returns true
- /// if the copy was successfully coalesced away. If it is not currently
- /// possible to coalesce this interval, but it may be possible if other
- /// things get coalesced, then it returns true by reference in 'Again'.
- bool JoinCopy(MachineInstr *TheCopy, bool &Again);
-
- /// JoinIntervals - Attempt to join these two intervals. On failure, this
- /// returns false. The output "SrcInt" will not have been modified, so we can
- /// use this information below to update aliases.
- bool JoinIntervals(CoalescerPair &CP);
-
- /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If
- /// the source value number is defined by a copy from the destination reg
- /// see if we can merge these two destination reg valno# into a single
- /// value number, eliminating a copy.
- bool AdjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
-
- /// HasOtherReachingDefs - Return true if there are definitions of IntB
- /// other than BValNo val# that can reach uses of AValno val# of IntA.
- bool HasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
- VNInfo *AValNo, VNInfo *BValNo);
-
- /// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy.
- /// If the source value number is defined by a commutable instruction and
- /// its other operand is coalesced to the copy dest register, see if we
- /// can transform the copy into a noop by commuting the definition.
- bool RemoveCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
-
- /// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
- /// computation, replace the copy by rematerialize the definition.
- /// If PreserveSrcInt is true, make sure SrcInt is valid after the call.
- bool ReMaterializeTrivialDef(LiveInterval &SrcInt, bool PreserveSrcInt,
- unsigned DstReg, unsigned DstSubIdx,
- MachineInstr *CopyMI);
-
- /// shouldJoinPhys - Return true if a physreg copy should be joined.
- bool shouldJoinPhys(CoalescerPair &CP);
-
- /// isWinToJoinCrossClass - Return true if it's profitable to coalesce
- /// two virtual registers from different register classes.
- bool isWinToJoinCrossClass(unsigned SrcReg,
- unsigned DstReg,
- const TargetRegisterClass *SrcRC,
- const TargetRegisterClass *DstRC,
- const TargetRegisterClass *NewRC);
-
- /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
- /// update the subregister number if it is not zero. If DstReg is a
- /// physical register and the existing subregister number of the def / use
- /// being updated is not zero, make sure to set it to the correct physical
- /// subregister.
- void UpdateRegDefsUses(const CoalescerPair &CP);
-
- /// RemoveDeadDef - If a def of a live interval is now determined dead,
- /// remove the val# it defines. If the live interval becomes empty, remove
- /// it as well.
- bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI);
-
- /// RemoveCopyFlag - If DstReg is no longer defined by CopyMI, clear the
- /// VNInfo copy flag for DstReg and all aliases.
- void RemoveCopyFlag(unsigned DstReg, const MachineInstr *CopyMI);
-
- /// markAsJoined - Remember that CopyMI has already been joined.
- void markAsJoined(MachineInstr *CopyMI);
-
- public:
- static char ID; // Class identification, replacement for typeinfo
- RegisterCoalescer() : MachineFunctionPass(ID) {
- initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
- }
-
- /// Register allocators must call this from their own
- /// getAnalysisUsage to cover the case where the coalescer is not
- /// a Pass in the proper sense and isn't managed by PassManager.
- /// PassManager needs to know which analyses to make available and
- /// which to invalidate when running the register allocator or any
- /// pass that might call coalescing. The long-term solution is to
- /// allow hierarchies of PassManagers.
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-
- virtual void releaseMemory();
-
- /// runOnMachineFunction - pass entry point
- virtual bool runOnMachineFunction(MachineFunction&);
-
- /// print - Implement the dump method.
- virtual void print(raw_ostream &O, const Module* = 0) const;
- };
/// CoalescerPair - A helper class for register coalescers. When deciding if
/// two registers can be coalesced, CoalescerPair can determine if a copy
/// instruction would become an identity copy after coalescing.
class CoalescerPair {
- const TargetInstrInfo &tii_;
- const TargetRegisterInfo &tri_;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
- /// dstReg_ - The register that will be left after coalescing. It can be a
+ /// DstReg - The register that will be left after coalescing. It can be a
/// virtual or physical register.
- unsigned dstReg_;
+ unsigned DstReg;
- /// srcReg_ - the virtual register that will be coalesced into dstReg.
- unsigned srcReg_;
+ /// SrcReg - the virtual register that will be coalesced into dstReg.
+ unsigned SrcReg;
- /// subReg_ - The subregister index of srcReg in dstReg_. It is possible the
- /// coalesce srcReg_ into a subreg of the larger dstReg_ when dstReg_ is a
+ /// subReg_ - The subregister index of srcReg in DstReg. It is possible the
+ /// coalesce SrcReg into a subreg of the larger DstReg when DstReg is a
/// virtual register.
- unsigned subIdx_;
+ unsigned SubIdx;
- /// partial_ - True when the original copy was a partial subregister copy.
- bool partial_;
+ /// Partial - True when the original copy was a partial subregister copy.
+ bool Partial;
- /// crossClass_ - True when both regs are virtual, and newRC is constrained.
- bool crossClass_;
+ /// CrossClass - True when both regs are virtual, and newRC is constrained.
+ bool CrossClass;
- /// flipped_ - True when DstReg and SrcReg are reversed from the oriignal copy
- /// instruction.
- bool flipped_;
+ /// Flipped - True when DstReg and SrcReg are reversed from the oriignal
+ /// copy instruction.
+ bool Flipped;
- /// newRC_ - The register class of the coalesced register, or NULL if dstReg_
+ /// NewRC - The register class of the coalesced register, or NULL if DstReg
/// is a physreg.
- const TargetRegisterClass *newRC_;
+ const TargetRegisterClass *NewRC;
public:
CoalescerPair(const TargetInstrInfo &tii, const TargetRegisterInfo &tri)
- : tii_(tii), tri_(tri), dstReg_(0), srcReg_(0), subIdx_(0),
- partial_(false), crossClass_(false), flipped_(false), newRC_(0) {}
+ : TII(tii), TRI(tri), DstReg(0), SrcReg(0), SubIdx(0),
+ Partial(false), CrossClass(false), Flipped(false), NewRC(0) {}
/// setRegisters - set registers to match the copy instruction MI. Return
/// false if MI is not a coalescable copy instruction.
bool setRegisters(const MachineInstr*);
- /// flip - Swap srcReg_ and dstReg_. Return false if swapping is impossible
- /// because dstReg_ is a physical register, or subIdx_ is set.
+ /// flip - Swap SrcReg and DstReg. Return false if swapping is impossible
+ /// because DstReg is a physical register, or SubIdx is set.
bool flip();
/// isCoalescable - Return true if MI is a copy instruction that will become
@@ -211,32 +73,33 @@ namespace llvm {
bool isCoalescable(const MachineInstr*) const;
/// isPhys - Return true if DstReg is a physical register.
- bool isPhys() const { return !newRC_; }
+ bool isPhys() const { return !NewRC; }
- /// isPartial - Return true if the original copy instruction did not copy the
- /// full register, but was a subreg operation.
- bool isPartial() const { return partial_; }
+ /// isPartial - Return true if the original copy instruction did not copy
+ /// the full register, but was a subreg operation.
+ bool isPartial() const { return Partial; }
- /// isCrossClass - Return true if DstReg is virtual and NewRC is a smaller register class than DstReg's.
- bool isCrossClass() const { return crossClass_; }
+ /// isCrossClass - Return true if DstReg is virtual and NewRC is a smaller
+ /// register class than DstReg's.
+ bool isCrossClass() const { return CrossClass; }
/// isFlipped - Return true when getSrcReg is the register being defined by
/// the original copy instruction.
- bool isFlipped() const { return flipped_; }
+ bool isFlipped() const { return Flipped; }
/// getDstReg - Return the register (virtual or physical) that will remain
/// after coalescing.
- unsigned getDstReg() const { return dstReg_; }
+ unsigned getDstReg() const { return DstReg; }
/// getSrcReg - Return the virtual register that will be coalesced away.
- unsigned getSrcReg() const { return srcReg_; }
+ unsigned getSrcReg() const { return SrcReg; }
/// getSubIdx - Return the subregister index in DstReg that SrcReg will be
/// coalesced into, or 0.
- unsigned getSubIdx() const { return subIdx_; }
+ unsigned getSubIdx() const { return SubIdx; }
/// getNewRC - Return the register class of the coalesced register.
- const TargetRegisterClass *getNewRC() const { return newRC_; }
+ const TargetRegisterClass *getNewRC() const { return NewRC; }
};
} // End llvm namespace
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 9e9a145..ca02aa1 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -206,6 +206,7 @@ void RegScavenger::forward() {
break;
}
assert(SubUsed && "Using an undefined register!");
+ (void)SubUsed;
}
assert((!EarlyClobberRegs.test(Reg) || MI->isRegTiedToDefOperand(i)) &&
"Using an early clobbered register!");
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 21375b2..1e9b5c8 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -26,7 +26,7 @@
using namespace llvm;
#ifndef NDEBUG
-cl::opt<bool> StressSchedOpt(
+static cl::opt<bool> StressSchedOpt(
"stress-sched", cl::Hidden, cl::init(false),
cl::desc("Stress test instruction scheduling"));
#endif
@@ -140,6 +140,7 @@ void SUnit::removePred(const SDep &D) {
break;
}
assert(FoundSucc && "Mismatching preds / succs lists!");
+ (void)FoundSucc;
Preds.erase(I);
// Update the bookkeeping.
if (P.getKind() == SDep::Data) {
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 446adfc..34b8ab0 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -36,7 +36,7 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
const MachineDominatorTree &mdt)
: ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
InstrItins(mf.getTarget().getInstrItineraryData()),
- Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()),
+ Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()),
LoopRegs(MLI, MDT), FirstDbgValue(0) {
DbgValues.clear();
}
@@ -134,6 +134,7 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
}
void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
+ LoopRegs.Deps.clear();
if (MachineLoop *ML = MLI.getLoopFor(BB))
if (BB == ML->getLoopLatch()) {
MachineBasicBlock *Header = ML->getHeader();
diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h
index 8a4ea85..666bdf5 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.h
+++ b/lib/CodeGen/ScheduleDAGInstrs.h
@@ -48,7 +48,8 @@ namespace llvm {
/// VisitLoop - Clear out any previous state and analyze the given loop.
///
void VisitLoop(const MachineLoop *Loop) {
- Deps.clear();
+ assert(Deps.empty() && "stale loop dependencies");
+
MachineBasicBlock *Header = Loop->getHeader();
SmallSet<unsigned, 8> LoopLiveIns;
for (MachineBasicBlock::livein_iterator LI = Header->livein_begin(),
@@ -109,7 +110,7 @@ namespace llvm {
/// initialized and destructed for each block.
std::vector<std::vector<SUnit *> > Defs;
std::vector<std::vector<SUnit *> > Uses;
-
+
/// PendingLoads - Remember where unknown loads are after the most recent
/// unknown store, as we iterate. As with Defs and Uses, this is here
/// to minimize construction/destruction.
@@ -127,7 +128,7 @@ namespace llvm {
protected:
/// DbgValues - Remember instruction that preceeds DBG_VALUE.
- typedef std::vector<std::pair<MachineInstr *, MachineInstr *> >
+ typedef std::vector<std::pair<MachineInstr *, MachineInstr *> >
DbgValueVector;
DbgValueVector DbgValues;
MachineInstr *FirstDbgValue;
diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index 0e005d3..b80c01e 100644
--- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -213,7 +213,6 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
freeUnits = freeUnit & (freeUnit - 1);
} while (freeUnits);
- assert(freeUnit && "No function unit available!");
if (IS->getReservationKind() == InstrStage::Required)
RequiredScoreboard[cycle + i] |= freeUnit;
else
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index 15932c0..2282f0e 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -21,3 +21,13 @@ add_llvm_library(LLVMSelectionDAG
TargetLowering.cpp
TargetSelectionDAGInfo.cpp
)
+
+add_llvm_library_dependencies(LLVMSelectionDAG
+ LLVMAnalysis
+ LLVMCodeGen
+ LLVMCore
+ LLVMMC
+ LLVMSupport
+ LLVMTarget
+ LLVMTransformUtils
+ )
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4f0d2ca..7b87868 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -216,6 +216,7 @@ namespace {
SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
SDValue visitBUILD_VECTOR(SDNode *N);
SDValue visitCONCAT_VECTORS(SDNode *N);
+ SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
SDValue visitVECTOR_SHUFFLE(SDNode *N);
SDValue visitMEMBARRIER(SDNode *N);
@@ -1105,6 +1106,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
+ case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
case ISD::MEMBARRIER: return visitMEMBARRIER(N);
}
@@ -1526,12 +1528,6 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- // If both operands are null we know that carry out will always be false.
- if (N0C && N0C->isNullValue() && N0 == N1)
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), DAG.getNode(ISD::CARRY_FALSE,
- N->getDebugLoc(),
- MVT::Glue));
-
// canonicalize constant to RHS
if (N0C && !N1C)
return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(),
@@ -3763,7 +3759,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
if (VT.isInteger() &&
(VT0 == MVT::i1 ||
(VT0.isInteger() &&
- TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent)) &&
+ TLI.getBooleanContents(false) == TargetLowering::ZeroOrOneBooleanContent)) &&
N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
SDValue XORNode;
if (VT == VT0)
@@ -4118,7 +4114,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// we know that the element size of the sext'd result matches the
// element size of the compare operands.
if (VT.getSizeInBits() == N0VT.getSizeInBits())
- return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+ return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
// If the desired elements are smaller or larger than the source
@@ -4132,7 +4128,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
N0VT.getVectorNumElements());
SDValue VsetCC =
- DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+ DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
@@ -4348,7 +4344,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// we know that the element size of the sext'd result matches the
// element size of the compare operands.
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
- DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get()),
DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
@@ -4364,7 +4360,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
N0VT.getVectorNumElements());
SDValue VsetCC =
- DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+ DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
@@ -4532,7 +4528,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// we know that the element size of the sext'd result matches the
// element size of the compare operands.
if (VT.getSizeInBits() == N0VT.getSizeInBits())
- return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+ return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
// If the desired elements are smaller or larger than the source
@@ -4546,7 +4542,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
N0VT.getVectorNumElements());
SDValue VsetCC =
- DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+ DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
@@ -6479,7 +6475,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
- const Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
+ Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
if (NewAlign < TLI.getTargetData()->getABITypeAlignment(NewVTTy))
return SDValue();
@@ -6542,7 +6538,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
unsigned LDAlign = LD->getAlignment();
unsigned STAlign = ST->getAlignment();
- const Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
+ Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
unsigned ABIAlign = TLI.getTargetData()->getABITypeAlignment(IntVTTy);
if (LDAlign < ABIAlign || STAlign < ABIAlign)
return SDValue();
@@ -6776,6 +6772,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
SDValue InVec = N->getOperand(0);
SDValue InVal = N->getOperand(1);
SDValue EltNo = N->getOperand(2);
+ DebugLoc dl = N->getDebugLoc();
// If the inserted element is an UNDEF, just use the input vector.
if (InVal.getOpcode() == ISD::UNDEF)
@@ -6787,32 +6784,40 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
return SDValue();
- // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new
- // vector with the inserted element.
- if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) {
- unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
- SmallVector<SDValue, 8> Ops(InVec.getNode()->op_begin(),
- InVec.getNode()->op_end());
- if (Elt < Ops.size())
- Ops[Elt] = InVal;
- return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
- VT, &Ops[0], Ops.size());
- }
- // If the invec is an UNDEF and if EltNo is a constant, create a new
- // BUILD_VECTOR with undef elements and the inserted element.
- if (InVec.getOpcode() == ISD::UNDEF &&
- isa<ConstantSDNode>(EltNo)) {
- EVT EltVT = VT.getVectorElementType();
+ // Check that we know which element is being inserted
+ if (!isa<ConstantSDNode>(EltNo))
+ return SDValue();
+ unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+
+ // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
+ // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
+ // vector elements.
+ SmallVector<SDValue, 8> Ops;
+ if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
+ Ops.append(InVec.getNode()->op_begin(),
+ InVec.getNode()->op_end());
+ } else if (InVec.getOpcode() == ISD::UNDEF) {
unsigned NElts = VT.getVectorNumElements();
- SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EltVT));
+ Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
+ } else {
+ return SDValue();
+ }
- unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
- if (Elt < Ops.size())
- Ops[Elt] = InVal;
- return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
- VT, &Ops[0], Ops.size());
+ // Insert the element
+ if (Elt < Ops.size()) {
+ // All the operands of BUILD_VECTOR must have the same type;
+ // we enforce that here.
+ EVT OpVT = Ops[0].getValueType();
+ if (InVal.getValueType() != OpVT)
+ InVal = OpVT.bitsGT(InVal.getValueType()) ?
+ DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
+ DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
+ Ops[Elt] = InVal;
}
- return SDValue();
+
+ // Return the new vector
+ return DAG.getNode(ISD::BUILD_VECTOR, dl,
+ VT, &Ops[0], Ops.size());
}
SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
@@ -6896,7 +6901,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// If Idx was -1 above, Elt is going to be -1, so just return undef.
if (Elt == -1)
- return DAG.getUNDEF(LN0->getBasePtr().getValueType());
+ return DAG.getUNDEF(LVT);
unsigned Align = LN0->getAlignment();
if (NewLoad) {
@@ -7028,6 +7033,36 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
+ EVT NVT = N->getValueType(0);
+ SDValue V = N->getOperand(0);
+
+ if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
+ // Handle only simple case where vector being inserted and vector
+ // being extracted are of same type, and are half size of larger vectors.
+ EVT BigVT = V->getOperand(0).getValueType();
+ EVT SmallVT = V->getOperand(1).getValueType();
+ if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
+ return SDValue();
+
+ // Combine:
+ // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
+ // Into:
+ // indicies are equal => V1
+ // otherwise => (extract_subvec V1, ExtIdx)
+ //
+ SDValue InsIdx = N->getOperand(1);
+ SDValue ExtIdx = V->getOperand(2);
+
+ if (InsIdx == ExtIdx)
+ return V->getOperand(1);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT,
+ V->getOperand(0), N->getOperand(1));
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
EVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
@@ -7447,7 +7482,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
const_cast<ConstantFP*>(FV->getConstantFPValue()),
const_cast<ConstantFP*>(TV->getConstantFPValue())
};
- const Type *FPTy = Elts[0]->getType();
+ Type *FPTy = Elts[0]->getType();
const TargetData &TD = *TLI.getTargetData();
// Create a ConstantArray of the two constants.
@@ -7465,10 +7500,13 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
SDValue Cond = DAG.getSetCC(DL,
TLI.getSetCCResultType(N0.getValueType()),
N0, N1, CC);
+ AddToWorkList(Cond.getNode());
SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(),
Cond, One, Zero);
+ AddToWorkList(CstOffset.getNode());
CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx,
CstOffset);
+ AddToWorkList(CPIdx.getNode());
return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(), false,
false, Alignment);
@@ -7553,7 +7591,8 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
// fold select C, 16, 0 -> shl C, 4
if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
- TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent) {
+ TLI.getBooleanContents(N0.getValueType().isVector()) ==
+ TargetLowering::ZeroOrOneBooleanContent) {
// If the caller doesn't want us to simplify this into a zext of a compare,
// don't do it.
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 54a7d43..e8f8c73 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -66,17 +66,22 @@ using namespace llvm;
void FastISel::startNewBlock() {
LocalValueMap.clear();
- // Start out as null, meaining no local-value instructions have
- // been emitted.
- LastLocalValue = 0;
+ EmitStartPt = 0;
- // Advance the last local value past any EH_LABEL instructions.
+ // Advance the emit start point past any EH_LABEL instructions.
MachineBasicBlock::iterator
I = FuncInfo.MBB->begin(), E = FuncInfo.MBB->end();
while (I != E && I->getOpcode() == TargetOpcode::EH_LABEL) {
- LastLocalValue = I;
+ EmitStartPt = I;
++I;
}
+ LastLocalValue = EmitStartPt;
+}
+
+void FastISel::flushLocalValueMap() {
+ LocalValueMap.clear();
+ LastLocalValue = EmitStartPt;
+ recomputeInsertPt();
}
bool FastISel::hasTrivialKill(const Value *V) const {
@@ -183,7 +188,7 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
(void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
APFloat::rmTowardZero, &isExact);
if (isExact) {
- APInt IntVal(IntBitWidth, 2, x);
+ APInt IntVal(IntBitWidth, x);
unsigned IntegerReg =
getRegForValue(ConstantInt::get(V->getContext(), IntVal));
@@ -422,12 +427,12 @@ bool FastISel::SelectGetElementPtr(const User *I) {
bool NIsKill = hasTrivialKill(I->getOperand(0));
- const Type *Ty = I->getOperand(0)->getType();
+ Type *Ty = I->getOperand(0)->getType();
MVT VT = TLI.getPointerTy();
for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1,
E = I->op_end(); OI != E; ++OI) {
const Value *Idx = *OI;
- if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
+ if (StructType *StTy = dyn_cast<StructType>(Ty)) {
unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
if (Field) {
// N = N + Offset
@@ -489,7 +494,7 @@ bool FastISel::SelectCall(const User *I) {
const CallInst *Call = cast<CallInst>(I);
// Handle simple inline asms.
- if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getArgOperand(0))) {
+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledValue())) {
// Don't attempt to handle constraints.
if (!IA->getConstraintString().empty())
return false;
@@ -526,13 +531,10 @@ bool FastISel::SelectCall(const User *I) {
unsigned Reg = 0;
unsigned Offset = 0;
if (const Argument *Arg = dyn_cast<Argument>(Address)) {
- if (Arg->hasByValAttr()) {
- // Byval arguments' frame index is recorded during argument lowering.
- // Use this info directly.
- Offset = FuncInfo.getByValArgumentFrameIndex(Arg);
- if (Offset)
- Reg = TRI.getFrameRegister(*FuncInfo.MF);
- }
+ // Some arguments' frame index is recorded during argument lowering.
+ Offset = FuncInfo.getArgumentFrameIndex(Arg);
+ if (Offset)
+ Reg = TRI.getFrameRegister(*FuncInfo.MF);
}
if (!Reg)
Reg = getRegForValue(Address);
@@ -645,6 +647,16 @@ bool FastISel::SelectCall(const User *I) {
}
}
+ // Usually, it does not make sense to initialize a value,
+ // make an unrelated function call and use the value, because
+ // it tends to be spilled on the stack. So, we move the pointer
+ // to the last local value to the beginning of the block, so that
+ // all the values which have already been materialized,
+ // appear after the call. It also makes sense to skip intrinsics
+ // since they tend to be inlined.
+ if (!isa<IntrinsicInst>(F))
+ flushLocalValueMap();
+
// An arbitrary call. Bail.
return false;
}
@@ -839,7 +851,7 @@ FastISel::SelectExtractValue(const User *U) {
return false;
const Value *Op0 = EVI->getOperand(0);
- const Type *AggTy = Op0->getType();
+ Type *AggTy = Op0->getType();
// Get the base result register.
unsigned ResultReg;
@@ -1074,7 +1086,7 @@ unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
if (MaterialReg == 0) {
// This is a bit ugly/slow, but failing here means falling out of
// fast-isel, which would be very slow.
- const IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(),
+ IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(),
VT.getSizeInBits());
MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm));
}
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index d518b5d..b052740 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -78,7 +78,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I)
if (const AllocaInst *AI = dyn_cast<AllocaInst>(I))
if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
- const Type *Ty = AI->getAllocatedType();
+ Type *Ty = AI->getAllocatedType();
uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
unsigned Align =
std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
@@ -216,7 +216,7 @@ unsigned FunctionLoweringInfo::CreateReg(EVT VT) {
/// In the case that the given value has struct or array type, this function
/// will assign registers for each member or element.
///
-unsigned FunctionLoweringInfo::CreateRegs(const Type *Ty) {
+unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, Ty, ValueVTs);
@@ -260,7 +260,7 @@ FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) {
/// ComputePHILiveOutRegInfo - Compute LiveOutInfo for a PHI's destination
/// register based on the LiveOutInfo of its operands.
void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
- const Type *Ty = PN->getType();
+ Type *Ty = PN->getType();
if (!Ty->isIntegerTy() || Ty->isVectorTy())
return;
@@ -351,20 +351,18 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
}
}
-/// setByValArgumentFrameIndex - Record frame index for the byval
+/// setArgumentFrameIndex - Record frame index for the byval
/// argument. This overrides previous frame index entry for this argument,
/// if any.
-void FunctionLoweringInfo::setByValArgumentFrameIndex(const Argument *A,
+void FunctionLoweringInfo::setArgumentFrameIndex(const Argument *A,
int FI) {
- assert (A->hasByValAttr() && "Argument does not have byval attribute!");
ByValArgFrameIndexMap[A] = FI;
}
-/// getByValArgumentFrameIndex - Get frame index for the byval argument.
+/// getArgumentFrameIndex - Get frame index for the byval argument.
/// If the argument does not have any assigned frame index then 0 is
/// returned.
-int FunctionLoweringInfo::getByValArgumentFrameIndex(const Argument *A) {
- assert (A->hasByValAttr() && "Argument does not have byval attribute!");
+int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) {
DenseMap<const Argument *, int>::iterator I =
ByValArgFrameIndexMap.find(A);
if (I != ByValArgFrameIndexMap.end())
@@ -454,3 +452,34 @@ void llvm::CopyCatchInfo(const BasicBlock *SuccBB, const BasicBlock *LPad,
break;
}
}
+
+/// AddLandingPadInfo - Extract the exception handling information from the
+/// landingpad instruction and add them to the specified machine module info.
+void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI,
+ MachineBasicBlock *MBB) {
+ MMI.addPersonality(MBB,
+ cast<Function>(I.getPersonalityFn()->stripPointerCasts()));
+
+ if (I.isCleanup())
+ MMI.addCleanup(MBB);
+
+ // FIXME: New EH - Add the clauses in reverse order. This isn't 100% correct,
+ // but we need to do it this way because of how the DWARF EH emitter
+ // processes the clauses.
+ for (unsigned i = I.getNumClauses(); i != 0; --i) {
+ Value *Val = I.getClause(i - 1);
+ if (I.isCatch(i - 1)) {
+ MMI.addCatchTypeInfo(MBB,
+ dyn_cast<GlobalVariable>(Val->stripPointerCasts()));
+ } else {
+ // Add filters in a list.
+ Constant *CVal = cast<Constant>(Val);
+ SmallVector<const GlobalVariable*, 4> FilterList;
+ for (User::op_iterator
+ II = CVal->op_begin(), IE = CVal->op_end(); II != IE; ++II)
+ FilterList.push_back(cast<GlobalVariable>((*II)->stripPointerCasts()));
+
+ MMI.addFilterTypeInfo(MBB, FilterList);
+ }
+ }
+}
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index f0f4743..2ff66f8 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -30,6 +30,12 @@
#include "llvm/Support/MathExtras.h"
using namespace llvm;
+/// MinRCSize - Smallest register class we allow when constraining virtual
+/// registers. If satisfying all register class constraints would require
+/// using a smaller register class, emit a COPY to a new virtual register
+/// instead.
+const unsigned MinRCSize = 4;
+
/// CountResults - The results of target nodes have register or immediate
/// operands first, then an optional chain, and optional glue operands (which do
/// not go into the resulting MachineInstr).
@@ -87,7 +93,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
UI != E; ++UI) {
SDNode *User = *UI;
bool Match = true;
- if (User->getOpcode() == ISD::CopyToReg &&
+ if (User->getOpcode() == ISD::CopyToReg &&
User->getOperand(2).getNode() == Node &&
User->getOperand(2).getResNo() == ResNo) {
unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
@@ -113,7 +119,8 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
if (!UseRC)
UseRC = RC;
else if (RC) {
- const TargetRegisterClass *ComRC = getCommonSubClass(UseRC, RC);
+ const TargetRegisterClass *ComRC =
+ TRI->getCommonSubClass(UseRC, RC);
// If multiple uses expect disjoint register classes, we emit
// copies in AddRegisterOperand.
if (ComRC)
@@ -139,7 +146,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
} else {
DstRC = TLI->getRegClassFor(VT);
}
-
+
// If all uses are reading from the src physical register and copying the
// register is either impossible or very expensive, then don't create a copy.
if (MatchReg && SrcRC->getCopyCost() < 0) {
@@ -167,7 +174,7 @@ unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node,
return 0;
SDNode *User = *Node->use_begin();
- if (User->getOpcode() == ISD::CopyToReg &&
+ if (User->getOpcode() == ISD::CopyToReg &&
User->getOperand(2).getNode() == Node &&
User->getOperand(2).getResNo() == ResNo) {
unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
@@ -202,7 +209,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
UI != E; ++UI) {
SDNode *User = *UI;
- if (User->getOpcode() == ISD::CopyToReg &&
+ if (User->getOpcode() == ISD::CopyToReg &&
User->getOperand(2).getNode() == Node &&
User->getOperand(2).getResNo() == i) {
unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
@@ -280,15 +287,16 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
MCID.OpInfo[IIOpNum].isOptionalDef();
// If the instruction requires a register in a different class, create
- // a new virtual register and copy the value into it.
+ // a new virtual register and copy the value into it, but first attempt to
+ // shrink VReg's register class within reason. For example, if VReg == GR32
+ // and II requires a GR32_NOSP, just constrain VReg to GR32_NOSP.
if (II) {
- const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg);
const TargetRegisterClass *DstRC = 0;
if (IIOpNum < II->getNumOperands())
DstRC = TII->getRegClass(*II, IIOpNum, TRI);
assert((DstRC || (MCID.isVariadic() && IIOpNum >= MCID.getNumOperands())) &&
"Don't have operand info for this instruction!");
- if (DstRC && !SrcRC->hasSuperClassEq(DstRC)) {
+ if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) {
unsigned NewVReg = MRI->createVirtualRegister(DstRC);
BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
@@ -326,7 +334,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
/// AddOperand - Add the specified operand to the specified machine instr. II
/// specifies the instruction information for the node, and IIOpNum is the
-/// operand number (in the II) that we are adding. IIOpNum and II are used for
+/// operand number (in the II) that we are adding. IIOpNum and II are used for
/// assertions only.
void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
unsigned IIOpNum,
@@ -356,7 +364,7 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
} else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) {
int Offset = CP->getOffset();
unsigned Align = CP->getAlignment();
- const Type *Type = CP->getType();
+ Type *Type = CP->getType();
// MachineConstantPool wants an explicit alignment.
if (Align == 0) {
Align = TM->getTargetData()->getPrefTypeAlignment(Type);
@@ -365,7 +373,7 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
Align = TM->getTargetData()->getTypeAllocSize(Type);
}
}
-
+
unsigned Idx;
MachineConstantPool *MCP = MF->getConstantPool();
if (CP->isMachineConstantPoolEntry())
@@ -389,35 +397,44 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
}
}
-/// getSuperRegisterRegClass - Returns the register class of a superreg A whose
-/// "SubIdx"'th sub-register class is the specified register class and whose
-/// type matches the specified type.
-static const TargetRegisterClass*
-getSuperRegisterRegClass(const TargetRegisterClass *TRC,
- unsigned SubIdx, EVT VT) {
- // Pick the register class of the superegister for this type
- for (TargetRegisterInfo::regclass_iterator I = TRC->superregclasses_begin(),
- E = TRC->superregclasses_end(); I != E; ++I)
- if ((*I)->hasType(VT) && (*I)->getSubRegisterRegClass(SubIdx) == TRC)
- return *I;
- assert(false && "Couldn't find the register class");
- return 0;
+unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
+ EVT VT, DebugLoc DL) {
+ const TargetRegisterClass *VRC = MRI->getRegClass(VReg);
+ const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx);
+
+ // RC is a sub-class of VRC that supports SubIdx. Try to constrain VReg
+ // within reason.
+ if (RC && RC != VRC)
+ RC = MRI->constrainRegClass(VReg, RC, MinRCSize);
+
+ // VReg has been adjusted. It can be used with SubIdx operands now.
+ if (RC)
+ return VReg;
+
+ // VReg couldn't be reasonably constrained. Emit a COPY to a new virtual
+ // register instead.
+ RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT), SubIdx);
+ assert(RC && "No legal register class for VT supports that SubIdx");
+ unsigned NewReg = MRI->createVirtualRegister(RC);
+ BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg)
+ .addReg(VReg);
+ return NewReg;
}
/// EmitSubregNode - Generate machine code for subreg nodes.
///
-void InstrEmitter::EmitSubregNode(SDNode *Node,
+void InstrEmitter::EmitSubregNode(SDNode *Node,
DenseMap<SDValue, unsigned> &VRBaseMap,
bool IsClone, bool IsCloned) {
unsigned VRBase = 0;
unsigned Opc = Node->getMachineOpcode();
-
+
// If the node is only used by a CopyToReg and the dest reg is a vreg, use
// the CopyToReg'd destination register instead of creating a new vreg.
for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
UI != E; ++UI) {
SDNode *User = *UI;
- if (User->getOpcode() == ISD::CopyToReg &&
+ if (User->getOpcode() == ISD::CopyToReg &&
User->getOperand(2).getNode() == Node) {
unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
@@ -426,12 +443,14 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
}
}
}
-
+
if (Opc == TargetOpcode::EXTRACT_SUBREG) {
- // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub
+ // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub. There are no
+ // constraints on the %dst register, COPY can target all legal register
+ // classes.
unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ const TargetRegisterClass *TRC = TLI->getRegClassFor(Node->getValueType(0));
- // Figure out the register class to create for the destreg.
unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
MachineInstr *DefMI = MRI->getVRegDef(VReg);
unsigned SrcReg, DstReg, DefSubIdx;
@@ -443,62 +462,57 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
// r1026 = extract_subreg r1025, 4
// to a copy
// r1026 = copy r1024
- const TargetRegisterClass *TRC = MRI->getRegClass(SrcReg);
VRBase = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg);
} else {
- const TargetRegisterClass *TRC = MRI->getRegClass(VReg);
- const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx);
- assert(SRC && "Invalid subregister index in EXTRACT_SUBREG");
-
- // Figure out the register class to create for the destreg.
- // Note that if we're going to directly use an existing register,
- // it must be precisely the required class, and not a subclass
- // thereof.
- if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) {
- // Create the reg
- assert(SRC && "Couldn't find source register class");
- VRBase = MRI->createVirtualRegister(SRC);
- }
+ // VReg may not support a SubIdx sub-register, and we may need to
+ // constrain its register class or issue a COPY to a compatible register
+ // class.
+ VReg = ConstrainForSubReg(VReg, SubIdx,
+ Node->getOperand(0).getValueType(),
+ Node->getDebugLoc());
- // Create the extract_subreg machine instruction.
- MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
- TII->get(TargetOpcode::COPY), VRBase);
+ // Create the destreg if it is missing.
+ if (VRBase == 0)
+ VRBase = MRI->createVirtualRegister(TRC);
- // Add source, and subreg index
- AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false,
- IsClone, IsCloned);
- assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg())&&
- "Cannot yet extract from physregs");
- MI->getOperand(1).setSubReg(SubIdx);
- MBB->insert(InsertPos, MI);
+ // Create the extract_subreg machine instruction.
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), VRBase).addReg(VReg, 0, SubIdx);
}
} else if (Opc == TargetOpcode::INSERT_SUBREG ||
Opc == TargetOpcode::SUBREG_TO_REG) {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
SDValue N2 = Node->getOperand(2);
- unsigned SubReg = getVR(N1, VRBaseMap);
unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue();
- const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
- const TargetRegisterClass *SRC =
- getSuperRegisterRegClass(TRC, SubIdx, Node->getValueType(0));
-
- // Figure out the register class to create for the destreg.
- // Note that if we're going to directly use an existing register,
- // it must be precisely the required class, and not a subclass
- // thereof.
- if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) {
- // Create the reg
- assert(SRC && "Couldn't find source register class");
+
+ // Figure out the register class to create for the destreg. It should be
+ // the largest legal register class supporting SubIdx sub-registers.
+ // RegisterCoalescer will constrain it further if it decides to eliminate
+ // the INSERT_SUBREG instruction.
+ //
+ // %dst = INSERT_SUBREG %src, %sub, SubIdx
+ //
+ // is lowered by TwoAddressInstructionPass to:
+ //
+ // %dst = COPY %src
+ // %dst:SubIdx = COPY %sub
+ //
+ // There is no constraint on the %src register class.
+ //
+ const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getValueType(0));
+ SRC = TRI->getSubClassWithSubReg(SRC, SubIdx);
+ assert(SRC && "No register class supports VT and SubIdx for INSERT_SUBREG");
+
+ if (VRBase == 0 || !SRC->hasSubClassEq(MRI->getRegClass(VRBase)))
VRBase = MRI->createVirtualRegister(SRC);
- }
// Create the insert_subreg or subreg_to_reg machine instruction.
MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc));
MI->addOperand(MachineOperand::CreateReg(VRBase, true));
-
+
// If creating a subreg_to_reg, then the first input operand
// is an implicit value immediate, otherwise it's a register
if (Opc == TargetOpcode::SUBREG_TO_REG) {
@@ -514,7 +528,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
MBB->insert(InsertPos, MI);
} else
llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg");
-
+
SDValue Op(Node, 0);
bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
(void)isNew; // Silence compiler warning.
@@ -643,9 +657,9 @@ void InstrEmitter::
EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
DenseMap<SDValue, unsigned> &VRBaseMap) {
unsigned Opc = Node->getMachineOpcode();
-
+
// Handle subreg insert/extract specially
- if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
Opc == TargetOpcode::INSERT_SUBREG ||
Opc == TargetOpcode::SUBREG_TO_REG) {
EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned);
@@ -667,7 +681,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
if (Opc == TargetOpcode::IMPLICIT_DEF)
// We want a unique VR for each IMPLICIT_DEF use.
return;
-
+
const MCInstrDesc &II = TII->get(Opc);
unsigned NumResults = CountResults(Node);
unsigned NodeOperands = CountOperands(Node);
@@ -712,12 +726,12 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// Then mark unused registers as dead.
MI->setPhysRegsDeadExcept(UsedRegs, *TRI);
}
-
+
// Add result register values for things that are defined by this
// instruction.
if (NumResults)
CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap);
-
+
// Emit all of the actual operands of this instruction, adding them to the
// instruction as appropriate.
bool HasOptPRefs = II.getNumDefs() > NumResults;
@@ -751,7 +765,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
MI->addRegisterDead(Reg, TRI);
}
}
-
+
// If the instruction has implicit defs and the node doesn't, mark the
// implicit def as dead. If the node has any glue outputs, we don't do this
// because we don't know what implicit defs are being used by glued nodes.
@@ -761,6 +775,12 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
i != e; ++i)
MI->addRegisterDead(IDList[i-II.getNumDefs()], TRI);
}
+
+ // Run post-isel target hook to adjust this instruction if needed.
+#ifdef NDEBUG
+ if (II.hasPostISelHook())
+#endif
+ TLI->AdjustInstrPostInstrSelection(MI, Node);
}
/// EmitSpecialNode - Generate machine code for a target-independent node and
@@ -788,7 +808,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
SrcReg = R->getReg();
else
SrcReg = getVR(SrcVal, VRBaseMap);
-
+
unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
if (SrcReg == DestReg) // Coalesced away the copy? Ignore.
break;
@@ -808,12 +828,12 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
TII->get(TargetOpcode::EH_LABEL)).addSym(S);
break;
}
-
+
case ISD::INLINEASM: {
unsigned NumOps = Node->getNumOperands();
if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
--NumOps; // Ignore the glue operand.
-
+
// Create the inline asm machine instruction.
MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
TII->get(TargetOpcode::INLINEASM));
@@ -822,7 +842,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString);
const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol();
MI->addOperand(MachineOperand::CreateES(AsmStr));
-
+
// Add the HasSideEffect and isAlignStack bits.
int64_t ExtraInfo =
cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))->
@@ -834,10 +854,10 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
unsigned Flags =
cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
-
+
MI->addOperand(MachineOperand::CreateImm(Flags));
++i; // Skip the ID value.
-
+
switch (InlineAsm::getKind(Flags)) {
default: llvm_unreachable("Bad flags!");
case InlineAsm::Kind_RegDef:
@@ -873,13 +893,13 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
break;
}
}
-
+
// Get the mdnode from the asm if it exists and add it to the instruction.
SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode);
const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD();
if (MD)
MI->addOperand(MachineOperand::CreateMetadata(MD));
-
+
MBB->insert(InsertPos, MI);
break;
}
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 19fc044..c081f38 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -77,6 +77,12 @@ class InstrEmitter {
DenseMap<SDValue, unsigned> &VRBaseMap,
bool IsDebug, bool IsClone, bool IsCloned);
+ /// ConstrainForSubReg - Try to constrain VReg to a register class that
+ /// supports SubIdx sub-registers. Emit a copy if that isn't possible.
+ /// Return the virtual register to use.
+ unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
+ EVT VT, DebugLoc DL);
+
/// EmitSubregNode - Generate machine code for subreg nodes.
///
void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index d06e2bd..63255ae 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -53,10 +53,15 @@ class SelectionDAGLegalize {
// Libcall insertion helpers.
- /// LastCALLSEQ - This keeps track of the CALLSEQ_END node that has been
+ /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been
/// legalized. We use this to ensure that calls are properly serialized
/// against each other, including inserted libcalls.
- SmallVector<SDValue, 8> LastCALLSEQ;
+ SDValue LastCALLSEQ_END;
+
+ /// IsLegalizingCall - This member is used *only* for purposes of providing
+ /// helpful assertions that a libcall isn't created while another call is
+ /// being legalized (which could lead to non-serialized call sequences).
+ bool IsLegalizingCall;
/// LegalizedNodes - For nodes that are of legal width, and that have more
/// than one use, this map indicates what regularized operand to use. This
@@ -149,15 +154,6 @@ private:
void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
-
- SDValue getLastCALLSEQ() { return LastCALLSEQ.back(); }
- void setLastCALLSEQ(const SDValue s) { LastCALLSEQ.back() = s; }
- void pushLastCALLSEQ(SDValue s) {
- LastCALLSEQ.push_back(s);
- }
- void popLastCALLSEQ() {
- LastCALLSEQ.pop_back();
- }
};
}
@@ -199,7 +195,8 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag)
}
void SelectionDAGLegalize::LegalizeDAG() {
- pushLastCALLSEQ(DAG.getEntryNode());
+ LastCALLSEQ_END = DAG.getEntryNode();
+ IsLegalizingCall = false;
// The legalize process is inherently a bottom-up recursive process (users
// legalize their uses before themselves). Given infinite stack space, we
@@ -227,15 +224,14 @@ void SelectionDAGLegalize::LegalizeDAG() {
/// FindCallEndFromCallStart - Given a chained node that is part of a call
/// sequence, find the CALLSEQ_END node that terminates the call sequence.
static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) {
- int next_depth = depth;
+ // Nested CALLSEQ_START/END constructs aren't yet legal,
+ // but we can DTRT and handle them correctly here.
if (Node->getOpcode() == ISD::CALLSEQ_START)
- next_depth = depth + 1;
- if (Node->getOpcode() == ISD::CALLSEQ_END) {
- assert(depth > 0 && "negative depth!");
- if (depth == 1)
+ depth++;
+ else if (Node->getOpcode() == ISD::CALLSEQ_END) {
+ depth--;
+ if (depth == 0)
return Node;
- else
- next_depth = depth - 1;
}
if (Node->use_empty())
return 0; // No CallSeqEnd
@@ -266,7 +262,7 @@ static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) {
SDNode *User = *UI;
for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
if (User->getOperand(i) == TheChain)
- if (SDNode *Result = FindCallEndFromCallStart(User, next_depth))
+ if (SDNode *Result = FindCallEndFromCallStart(User, depth))
return Result;
}
return 0;
@@ -287,7 +283,6 @@ static SDNode *FindCallStartFromCallEnd(SDNode *Node) {
case ISD::CALLSEQ_START:
if (!nested)
return Node;
- Node = Node->getOperand(0).getNode();
nested--;
break;
case ISD::CALLSEQ_END:
@@ -295,7 +290,7 @@ static SDNode *FindCallStartFromCallEnd(SDNode *Node) {
break;
}
}
- return (Node->getOpcode() == ISD::CALLSEQ_START) ? Node : 0;
+ return 0;
}
/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to
@@ -365,7 +360,7 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
// smaller type.
TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) &&
TLI.ShouldShrinkFPConstant(OrigVT)) {
- const Type *SType = SVT.getTypeForEVT(*DAG.getContext());
+ Type *SType = SVT.getTypeForEVT(*DAG.getContext());
LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType));
VT = SVT;
Extend = true;
@@ -819,6 +814,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Action = TLI.getOperationAction(Node->getOpcode(), InnerType);
break;
}
+ case ISD::ATOMIC_STORE: {
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(2).getValueType());
+ break;
+ }
case ISD::SELECT_CC:
case ISD::SETCC:
case ISD::BR_CC: {
@@ -872,7 +872,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
if (Action == TargetLowering::Legal)
Action = TargetLowering::Expand;
break;
- case ISD::TRAMPOLINE:
+ case ISD::INIT_TRAMPOLINE:
+ case ISD::ADJUST_TRAMPOLINE:
case ISD::FRAMEADDR:
case ISD::RETURNADDR:
// These operations lie about being legal: when they claim to be legal,
@@ -912,12 +913,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
case ISD::BR_JT:
case ISD::BR_CC:
case ISD::BRCOND:
- assert(LastCALLSEQ.size() == 1 && "branch inside CALLSEQ_BEGIN/END?");
- // Branches tweak the chain to include LastCALLSEQ
+ // Branches tweak the chain to include LastCALLSEQ_END
Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0],
- getLastCALLSEQ());
+ LastCALLSEQ_END);
Ops[0] = LegalizeOp(Ops[0]);
- setLastCALLSEQ(DAG.getEntryNode());
+ LastCALLSEQ_END = DAG.getEntryNode();
break;
case ISD::SHL:
case ISD::SRL:
@@ -989,6 +989,31 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
#endif
assert(0 && "Do not know how to legalize this operator!");
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::SHL: {
+ // Scalarize vector SRA/SRL/SHL.
+ EVT VT = Node->getValueType(0);
+ assert(VT.isVector() && "Unable to legalize non-vector shift");
+ assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal");
+ unsigned NumElem = VT.getVectorNumElements();
+
+ SmallVector<SDValue, 8> Scalars;
+ for (unsigned Idx = 0; Idx < NumElem; Idx++) {
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ VT.getScalarType(),
+ Node->getOperand(0), DAG.getIntPtrConstant(Idx));
+ SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ VT.getScalarType(),
+ Node->getOperand(1), DAG.getIntPtrConstant(Idx));
+ Scalars.push_back(DAG.getNode(Node->getOpcode(), dl,
+ VT.getScalarType(), Ex, Sh));
+ }
+ Result = DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
+ &Scalars[0], Scalars.size());
+ break;
+ }
+
case ISD::BUILD_VECTOR:
switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) {
default: assert(0 && "This action is not supported yet!");
@@ -1006,7 +1031,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
break;
case ISD::CALLSEQ_START: {
SDNode *CallEnd = FindCallEndFromCallStart(Node);
- assert(CallEnd && "didn't find CALLSEQ_END!");
// Recursively Legalize all of the inputs of the call end that do not lead
// to this call start. This ensures that any libcalls that need be inserted
@@ -1023,9 +1047,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// Merge in the last call to ensure that this call starts after the last
// call ended.
- if (getLastCALLSEQ().getOpcode() != ISD::EntryToken) {
+ if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) {
Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- Tmp1, getLastCALLSEQ());
+ Tmp1, LastCALLSEQ_END);
Tmp1 = LegalizeOp(Tmp1);
}
@@ -1046,29 +1070,25 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// sequence have been legalized, legalize the call itself. During this
// process, no libcalls can/will be inserted, guaranteeing that no calls
// can overlap.
+ assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!");
// Note that we are selecting this call!
- setLastCALLSEQ(SDValue(CallEnd, 0));
+ LastCALLSEQ_END = SDValue(CallEnd, 0);
+ IsLegalizingCall = true;
// Legalize the call, starting from the CALLSEQ_END.
- LegalizeOp(getLastCALLSEQ());
+ LegalizeOp(LastCALLSEQ_END);
+ assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!");
return Result;
}
case ISD::CALLSEQ_END:
- {
- SDNode *myCALLSEQ_BEGIN = FindCallStartFromCallEnd(Node);
-
- // If the CALLSEQ_START node hasn't been legalized first, legalize it.
- // This will cause this node to be legalized as well as handling libcalls
- // right.
- if (getLastCALLSEQ().getNode() != Node) {
- LegalizeOp(SDValue(myCALLSEQ_BEGIN, 0));
- DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
- assert(I != LegalizedNodes.end() &&
- "Legalizing the call start should have legalized this node!");
- return I->second;
- }
-
- pushLastCALLSEQ(SDValue(myCALLSEQ_BEGIN, 0));
+ // If the CALLSEQ_START node hasn't been legalized first, legalize it. This
+ // will cause this node to be legalized as well as handling libcalls right.
+ if (LastCALLSEQ_END.getNode() != Node) {
+ LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0));
+ DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+ assert(I != LegalizedNodes.end() &&
+ "Legalizing the call start should have legalized this node!");
+ return I->second;
}
// Otherwise, the call start has been legalized and everything is going
@@ -1096,8 +1116,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Result.getResNo());
}
}
+ assert(IsLegalizingCall && "Call sequence imbalance between start/end?");
// This finishes up call legalization.
- popLastCALLSEQ();
+ IsLegalizingCall = false;
// If the CALLSEQ_END node has a flag, remember that we legalized it.
AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0));
@@ -1124,7 +1145,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// If this is an unaligned load and the target doesn't support it,
// expand it.
if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
- const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
if (LD->getAlignment() < ABIAlignment){
Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
@@ -1311,7 +1332,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// If this is an unaligned load and the target doesn't support it,
// expand it.
if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
- const Type *Ty =
+ Type *Ty =
LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment =
TLI.getTargetData()->getABITypeAlignment(Ty);
@@ -1491,7 +1512,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// If this is an unaligned store and the target doesn't support it,
// expand it.
if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
- const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
if (ST->getAlignment() < ABIAlignment)
Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
@@ -1596,7 +1617,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// If this is an unaligned store and the target doesn't support it,
// expand it.
if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
- const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
if (ST->getAlignment() < ABIAlignment)
Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
@@ -1611,82 +1632,101 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
EVT WideScalarVT = Tmp3.getValueType().getScalarType();
EVT NarrowScalarVT = StVT.getScalarType();
- // The Store type is illegal, must scalarize the vector store.
- SmallVector<SDValue, 8> Stores;
- bool ScalarLegal = TLI.isTypeLegal(WideScalarVT);
- if (!TLI.isTypeLegal(StVT) && StVT.isVector() && ScalarLegal) {
+ if (StVT.isVector()) {
unsigned NumElem = StVT.getVectorNumElements();
+ // The type of the data we want to save
+ EVT RegVT = Tmp3.getValueType();
+ EVT RegSclVT = RegVT.getScalarType();
+ // The type of data as saved in memory.
+ EVT MemSclVT = StVT.getScalarType();
+
+ bool RegScalarLegal = TLI.isTypeLegal(RegSclVT);
+ bool MemScalarLegal = TLI.isTypeLegal(MemSclVT);
+
+ // We need to expand this store. If the register element type
+ // is legal then we can scalarize the vector and use
+ // truncating stores.
+ if (RegScalarLegal) {
+ // Cast floats into integers
+ unsigned ScalarSize = MemSclVT.getSizeInBits();
+ EVT EltVT = EVT::getIntegerVT(*DAG.getContext(), ScalarSize);
+
+ // Round odd types to the next pow of two.
+ if (!isPowerOf2_32(ScalarSize))
+ ScalarSize = NextPowerOf2(ScalarSize);
+
+ // Store Stride in bytes
+ unsigned Stride = ScalarSize/8;
+ // Extract each of the elements from the original vector
+ // and save them into memory individually.
+ SmallVector<SDValue, 8> Stores;
+ for (unsigned Idx = 0; Idx < NumElem; Idx++) {
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ RegSclVT, Tmp3, DAG.getIntPtrConstant(Idx));
+
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(Stride));
+
+ // This scalar TruncStore may be illegal, but we lehalize it
+ // later.
+ SDValue Store = DAG.getTruncStore(Tmp1, dl, Ex, Tmp2,
+ ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT,
+ isVolatile, isNonTemporal, Alignment);
- unsigned ScalarSize = StVT.getScalarType().getSizeInBits();
- // Round odd types to the next pow of two.
- if (!isPowerOf2_32(ScalarSize))
- ScalarSize = NextPowerOf2(ScalarSize);
- // Types smaller than 8 bits are promoted to 8 bits.
- ScalarSize = std::max<unsigned>(ScalarSize, 8);
- // Store stride
- unsigned Stride = ScalarSize/8;
- assert(isPowerOf2_32(Stride) && "Stride must be a power of two");
-
- for (unsigned Idx=0; Idx<NumElem; Idx++) {
- SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- WideScalarVT, Tmp3, DAG.getIntPtrConstant(Idx));
-
-
- EVT NVT = EVT::getIntegerVT(*DAG.getContext(), ScalarSize);
+ Stores.push_back(Store);
+ }
- Ex = DAG.getNode(ISD::TRUNCATE, dl, NVT, Ex);
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(Stride));
- SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2,
- ST->getPointerInfo().getWithOffset(Idx*Stride),
- isVolatile, isNonTemporal, Alignment);
- Stores.push_back(Store);
+ Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &Stores[0], Stores.size());
+ break;
}
- Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &Stores[0], Stores.size());
- break;
- }
- // The Store type is illegal, must scalarize the vector store.
- // However, the scalar type is illegal. Must bitcast the result
- // and store it in smaller parts.
- if (!TLI.isTypeLegal(StVT) && StVT.isVector()) {
- unsigned WideNumElem = StVT.getVectorNumElements();
- unsigned Stride = NarrowScalarVT.getSizeInBits()/8;
-
- unsigned SizeRatio =
- (WideScalarVT.getSizeInBits() / NarrowScalarVT.getSizeInBits());
-
- EVT CastValueVT = EVT::getVectorVT(*DAG.getContext(), NarrowScalarVT,
- SizeRatio*WideNumElem);
-
- // Cast the wide elem vector to wider vec with smaller elem type.
- // Example <2 x i64> -> <4 x i32>
- Tmp3 = DAG.getNode(ISD::BITCAST, dl, CastValueVT, Tmp3);
-
- for (unsigned Idx=0; Idx<WideNumElem*SizeRatio; Idx++) {
- // Extract elment i
- SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- NarrowScalarVT, Tmp3, DAG.getIntPtrConstant(Idx));
- // bump pointer.
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(Stride));
-
- // Store if, this element is:
- // - First element on big endian, or
- // - Last element on little endian
- if (( TLI.isBigEndian() && (Idx%SizeRatio == 0)) ||
- ((!TLI.isBigEndian() && (Idx%SizeRatio == SizeRatio-1)))) {
- SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2,
- ST->getPointerInfo().getWithOffset(Idx*Stride),
- isVolatile, isNonTemporal, Alignment);
- Stores.push_back(Store);
+ // The scalar register type is illegal.
+ // For example saving <2 x i64> -> <2 x i32> on a x86.
+ // In here we bitcast the value into a vector of smaller parts and
+ // save it using smaller scalars.
+ if (!RegScalarLegal && MemScalarLegal) {
+ // Store Stride in bytes
+ unsigned Stride = MemSclVT.getSizeInBits()/8;
+
+ unsigned SizeRatio =
+ (RegSclVT.getSizeInBits() / MemSclVT.getSizeInBits());
+
+ EVT CastValueVT = EVT::getVectorVT(*DAG.getContext(),
+ MemSclVT,
+ SizeRatio * NumElem);
+
+ // Cast the wide elem vector to wider vec with smaller elem type.
+ // Example <2 x i64> -> <4 x i32>
+ Tmp3 = DAG.getNode(ISD::BITCAST, dl, CastValueVT, Tmp3);
+
+ SmallVector<SDValue, 8> Stores;
+ for (unsigned Idx=0; Idx < NumElem * SizeRatio; Idx++) {
+ // Extract the Ith element.
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ NarrowScalarVT, Tmp3, DAG.getIntPtrConstant(Idx));
+ // Bump pointer.
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(Stride));
+
+ // Store if, this element is:
+ // - First element on big endian, or
+ // - Last element on little endian
+ if (( TLI.isBigEndian() && (Idx % SizeRatio == 0)) ||
+ ((!TLI.isBigEndian() && (Idx % SizeRatio == SizeRatio-1)))) {
+ SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2,
+ ST->getPointerInfo().getWithOffset(Idx*Stride),
+ isVolatile, isNonTemporal, Alignment);
+ Stores.push_back(Store);
+ }
}
+ Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &Stores[0], Stores.size());
+ break;
}
- Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &Stores[0], Stores.size());
- break;
- }
+
+ assert(false && "Unable to legalize the vector trunc store!");
+ }// is vector
// TRUNCSTORE:i16 i32 -> STORE i16
@@ -1999,7 +2039,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
unsigned SrcSize = SrcOp.getValueType().getSizeInBits();
unsigned SlotSize = SlotVT.getSizeInBits();
unsigned DestSize = DestVT.getSizeInBits();
- const Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
+ Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
unsigned DestAlign = TLI.getTargetData()->getPrefTypeAlignment(DestType);
// Emit a store to the stack slot. Use a truncstore if the input value is
@@ -2106,7 +2146,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
}
} else {
assert(Node->getOperand(i).getOpcode() == ISD::UNDEF);
- const Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext());
+ Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext());
CV.push_back(UndefValue::get(OpNTy));
}
}
@@ -2150,6 +2190,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
// and leave the Hi part unset.
SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
bool isSigned) {
+ assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
// The input chain to this libcall is the entry node of the function.
// Legalizing the call will automatically add the previous call to the
// dependence.
@@ -2159,7 +2200,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
TargetLowering::ArgListEntry Entry;
for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
EVT ArgVT = Node->getOperand(i).getValueType();
- const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy;
Entry.isSExt = isSigned;
Entry.isZExt = !isSigned;
@@ -2169,7 +2210,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
TLI.getPointerTy());
// Splice the libcall in wherever FindInputOutputChains tells us to.
- const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+ Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
// isTailCall may be true since the callee does not reference caller stack
// frame. Check if it's in the right position.
@@ -2185,7 +2226,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
return DAG.getRoot();
// Legalize the call sequence, starting with the chain. This will advance
- // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
+ // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
// was added by LowerCallTo (guaranteeing proper serialization of calls).
LegalizeOp(CallInfo.second);
return CallInfo.first;
@@ -2210,7 +2251,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
TLI.getPointerTy());
- const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
std::pair<SDValue,SDValue> CallInfo =
TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
false, 0, TLI.getLibcallCallingConv(LC), false,
@@ -2231,13 +2272,14 @@ std::pair<SDValue, SDValue>
SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
SDNode *Node,
bool isSigned) {
+ assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
SDValue InChain = Node->getOperand(0);
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
EVT ArgVT = Node->getOperand(i).getValueType();
- const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Node->getOperand(i);
Entry.Ty = ArgTy;
Entry.isSExt = isSigned;
@@ -2248,7 +2290,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
TLI.getPointerTy());
// Splice the libcall in wherever FindInputOutputChains tells us to.
- const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+ Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
@@ -2256,7 +2298,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
Callee, Args, DAG, Node->getDebugLoc());
// Legalize the call sequence, starting with the chain. This will advance
- // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
+ // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
// was added by LowerCallTo (guaranteeing proper serialization of calls).
LegalizeOp(CallInfo.second);
return CallInfo;
@@ -2360,13 +2402,13 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
SDValue InChain = DAG.getEntryNode();
EVT RetVT = Node->getValueType(0);
- const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
EVT ArgVT = Node->getOperand(i).getValueType();
- const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy;
Entry.isSExt = isSigned;
Entry.isZExt = !isSigned;
@@ -2397,7 +2439,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
LegalizeOp(CallInfo.second);
// Remainder is loaded back from the stack frame.
- SDValue Rem = DAG.getLoad(RetVT, dl, getLastCALLSEQ(), FIPtr,
+ SDValue Rem = DAG.getLoad(RetVT, dl, LastCALLSEQ_END, FIPtr,
MachinePointerInfo(), false, false, 0);
Results.push_back(CallInfo.first);
Results.push_back(Rem);
@@ -2955,8 +2997,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
Results.push_back(DAG.getConstant(0, MVT::i32));
Results.push_back(Node->getOperand(0));
break;
+ case ISD::ATOMIC_FENCE:
case ISD::MEMBARRIER: {
// If the target didn't lower this, lower it to '__sync_synchronize()' call
+ // FIXME: handle "fence singlethread" more efficiently.
TargetLowering::ArgListTy Args;
std::pair<SDValue, SDValue> CallResult =
TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
@@ -2969,6 +3013,32 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
Results.push_back(CallResult.second);
break;
}
+ case ISD::ATOMIC_LOAD: {
+ // There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP.
+ SDValue Zero = DAG.getConstant(0, Node->getValueType(0));
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl,
+ cast<AtomicSDNode>(Node)->getMemoryVT(),
+ Node->getOperand(0),
+ Node->getOperand(1), Zero, Zero,
+ cast<AtomicSDNode>(Node)->getMemOperand(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getSynchScope());
+ Results.push_back(Swap.getValue(0));
+ Results.push_back(Swap.getValue(1));
+ break;
+ }
+ case ISD::ATOMIC_STORE: {
+ // There is no libcall for atomic store; fake it with ATOMIC_SWAP.
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
+ cast<AtomicSDNode>(Node)->getMemoryVT(),
+ Node->getOperand(0),
+ Node->getOperand(1), Node->getOperand(2),
+ cast<AtomicSDNode>(Node)->getMemOperand(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getSynchScope());
+ Results.push_back(Swap.getValue(1));
+ break;
+ }
// By default, atomic intrinsics are marked Legal and lowered. Targets
// which don't support them directly, however, may want libcalls, in which
// case they mark them Expand, and we get here.
@@ -3727,8 +3797,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()),
Tmp2, Tmp3, Tmp4, dl);
- assert(LastCALLSEQ.size() == 1 && "branch inside CALLSEQ_BEGIN/END?");
- setLastCALLSEQ(DAG.getEntryNode());
+ LastCALLSEQ_END = DAG.getEntryNode();
assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!");
Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index e6835d8..7c1cc69 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -55,6 +55,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
#endif
llvm_unreachable("Do not know how to soften the result of this operator!");
+ case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break;
case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break;
case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break;
case ISD::ConstantFP:
@@ -107,6 +108,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) {
return BitConvertToInteger(N->getOperand(0));
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_MERGE_VALUES(SDNode *N,
+ unsigned ResNo) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ return BitConvertToInteger(Op);
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {
// Convert the inputs to integers, and build a new pair out of them.
return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(),
@@ -827,11 +834,11 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
#endif
llvm_unreachable("Do not know how to expand the result of this operator!");
- case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
+ case ISD::MERGE_VALUES: ExpandRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break;
case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
@@ -879,10 +886,10 @@ void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo,
assert(NVT.getSizeInBits() == integerPartWidth &&
"Do not know how to expand this float constant!");
APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt();
- Lo = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1,
- &C.getRawData()[1])), NVT);
- Hi = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1,
- &C.getRawData()[0])), NVT);
+ Lo = DAG.getConstantFP(APFloat(APInt(integerPartWidth, C.getRawData()[1])),
+ NVT);
+ Hi = DAG.getConstantFP(APFloat(APInt(integerPartWidth, C.getRawData()[0])),
+ NVT);
}
void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
@@ -1201,7 +1208,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
static const uint64_t TwoE32[] = { 0x41f0000000000000LL, 0 };
static const uint64_t TwoE64[] = { 0x43f0000000000000LL, 0 };
static const uint64_t TwoE128[] = { 0x47f0000000000000LL, 0 };
- const uint64_t *Parts = 0;
+ ArrayRef<uint64_t> Parts;
switch (SrcVT.getSimpleVT().SimpleTy) {
default:
@@ -1218,7 +1225,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
}
Lo = DAG.getNode(ISD::FADD, dl, VT, Hi,
- DAG.getConstantFP(APFloat(APInt(128, 2, Parts)),
+ DAG.getConstantFP(APFloat(APInt(128, Parts)),
MVT::ppcf128));
Lo = DAG.getNode(ISD::SELECT_CC, dl, VT, Src, DAG.getConstant(0, SrcVT),
Lo, Hi, DAG.getCondCode(ISD::SETLT));
@@ -1291,8 +1298,7 @@ void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS,
GetExpandedFloat(NewLHS, LHSLo, LHSHi);
GetExpandedFloat(NewRHS, RHSLo, RHSHi);
- EVT VT = NewLHS.getValueType();
- assert(VT == MVT::ppcf128 && "Unsupported setcc type!");
+ assert(NewLHS.getValueType() == MVT::ppcf128 && "Unsupported setcc type!");
// FIXME: This generated code sucks. We want to generate
// FCMPU crN, hi1, hi2
@@ -1373,7 +1379,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
"Logic only correct for ppcf128!");
const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
- APFloat APF = APFloat(APInt(128, 2, TwoE31));
+ APFloat APF = APFloat(APInt(128, TwoE31));
SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128);
// X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
// FIXME: generated code sucks.
@@ -1445,6 +1451,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
ST->getValue().getValueType());
assert(NVT.isByteSized() && "Expanded type not byte sized!");
assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?");
+ (void)NVT;
SDValue Lo, Hi;
GetExpandedOp(ST->getValue(), Lo, Hi);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index e7c77dd..a5c4c2d 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -48,6 +48,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
N->dump(&DAG); dbgs() << "\n";
#endif
llvm_unreachable("Do not know how to promote this operator!");
+ case ISD::MERGE_VALUES:Res = PromoteIntRes_MERGE_VALUES(N, ResNo); break;
case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break;
case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break;
case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break;
@@ -63,6 +64,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N));break;
case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break;
+ case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break;
case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break;
case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break;
case ISD::SHL: Res = PromoteIntRes_SHL(N); break;
@@ -84,6 +86,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
Res = PromoteIntRes_BUILD_VECTOR(N); break;
case ISD::SCALAR_TO_VECTOR:
Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::CONCAT_VECTORS:
+ Res = PromoteIntRes_CONCAT_VECTORS(N); break;
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
@@ -114,6 +118,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SMULO:
case ISD::UMULO: Res = PromoteIntRes_XMULO(N, ResNo); break;
+ case ISD::ATOMIC_LOAD:
+ Res = PromoteIntRes_Atomic0(cast<AtomicSDNode>(N)); break;
+
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
case ISD::ATOMIC_LOAD_AND:
@@ -136,6 +143,12 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
SetPromotedInteger(SDValue(N, ResNo), Res);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_MERGE_VALUES(SDNode *N,
+ unsigned ResNo) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ return GetPromotedInteger(Op);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) {
// Sign-extend the new bits, and continue the assertion.
SDValue Op = SExtPromotedInteger(N->getOperand(0));
@@ -150,12 +163,26 @@ SDValue DAGTypeLegalizer::PromoteIntRes_AssertZext(SDNode *N) {
Op.getValueType(), Op, N->getOperand(1));
}
+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) {
+ EVT ResVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
+ N->getMemoryVT(), ResVT,
+ N->getChain(), N->getBasePtr(),
+ N->getMemOperand(), N->getOrdering(),
+ N->getSynchScope());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
SDValue Op2 = GetPromotedInteger(N->getOperand(2));
SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
N->getMemoryVT(),
N->getChain(), N->getBasePtr(),
- Op2, N->getMemOperand());
+ Op2, N->getMemOperand(), N->getOrdering(),
+ N->getSynchScope());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -167,7 +194,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) {
SDValue Op3 = GetPromotedInteger(N->getOperand(3));
SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
N->getMemoryVT(), N->getChain(), N->getBasePtr(),
- Op2, Op3, N->getMemOperand());
+ Op2, Op3, N->getMemOperand(), N->getOrdering(),
+ N->getSynchScope());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -457,6 +485,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
LHS.getValueType(), N->getOperand(0),LHS,RHS);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) {
+ SDValue Mask = GetPromotedInteger(N->getOperand(0));
+ SDValue LHS = GetPromotedInteger(N->getOperand(1));
+ SDValue RHS = GetPromotedInteger(N->getOperand(2));
+ return DAG.getNode(ISD::VSELECT, N->getDebugLoc(),
+ LHS.getValueType(), Mask, LHS, RHS);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {
SDValue LHS = GetPromotedInteger(N->getOperand(2));
SDValue RHS = GetPromotedInteger(N->getOperand(3));
@@ -467,16 +503,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
EVT SVT = TLI.getSetCCResultType(N->getOperand(0).getValueType());
- assert(isTypeLegal(SVT) && "Illegal SetCC type!");
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+ // Only use the result of getSetCCResultType if it is legal,
+ // otherwise just use the promoted result type (NVT).
+ if (!TLI.isTypeLegal(SVT))
+ SVT = NVT;
+
DebugLoc dl = N->getDebugLoc();
+ assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() &&
+ "Vector compare must return a vector result!");
// Get the SETCC result using the canonical SETCC type.
- SDValue SetCC = DAG.getNode(ISD::SETCC, dl, SVT, N->getOperand(0),
+ SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0),
N->getOperand(1), N->getOperand(2));
- // Convert to the expected type.
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
assert(NVT.bitsLE(SVT) && "Integer type overpromoted?");
+ // Convert to the expected type.
return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC);
}
@@ -707,6 +751,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
llvm_unreachable("Do not know how to promote this operator's operand!");
case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break;
+ case ISD::ATOMIC_STORE:
+ Res = PromoteIntOp_ATOMIC_STORE(cast<AtomicSDNode>(N));
+ break;
case ISD::BITCAST: Res = PromoteIntOp_BITCAST(N); break;
case ISD::BR_CC: Res = PromoteIntOp_BR_CC(N, OpNo); break;
case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break;
@@ -721,6 +768,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::MEMBARRIER: Res = PromoteIntOp_MEMBARRIER(N); break;
case ISD::SCALAR_TO_VECTOR:
Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break;
+ case ISD::VSELECT:
case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break;
case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break;
case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break;
@@ -791,6 +839,13 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) {
return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), Op);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N) {
+ SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ return DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), N->getMemoryVT(),
+ N->getChain(), N->getBasePtr(), Op2, N->getMemOperand(),
+ N->getOrdering(), N->getSynchScope());
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
// This should only occur in unusual situations like bitcasting to an
// x86_fp80, so just turn it into a store+load
@@ -913,14 +968,17 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
- assert(OpNo == 0 && "Only know how to promote condition");
+ assert(OpNo == 0 && "Only know how to promote the condition!");
+ SDValue Cond = N->getOperand(0);
+ EVT OpTy = N->getOperand(1).getValueType();
// Promote all the way up to the canonical SetCC type.
- EVT SVT = TLI.getSetCCResultType(N->getOperand(1).getValueType());
- SDValue Cond = PromoteTargetBoolean(N->getOperand(0), SVT);
+ EVT SVT = TLI.getSetCCResultType(N->getOpcode() == ISD::SELECT ?
+ OpTy.getScalarType() : OpTy);
+ Cond = PromoteTargetBoolean(Cond, SVT);
- return SDValue(DAG.UpdateNodeOperands(N, Cond,
- N->getOperand(1), N->getOperand(2)), 0);
+ return SDValue(DAG.UpdateNodeOperands(N, Cond, N->getOperand(1),
+ N->getOperand(2)), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) {
@@ -1024,7 +1082,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
#endif
llvm_unreachable("Do not know how to expand the result of this operator!");
- case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
@@ -1055,6 +1113,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::UDIV: ExpandIntRes_UDIV(N, Lo, Hi); break;
case ISD::UREM: ExpandIntRes_UREM(N, Lo, Hi); break;
case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break;
+ case ISD::ATOMIC_LOAD: ExpandIntRes_ATOMIC_LOAD(N, Lo, Hi); break;
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
@@ -1546,6 +1605,12 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,
ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
}
+void DAGTypeLegalizer::ExpandIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Res = DisintegrateMERGE_VALUES(N, ResNo);
+ SplitInteger(Res, Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
@@ -2176,9 +2241,9 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT VT = N->getValueType(0);
- const Type *RetTy = VT.getTypeForEVT(*DAG.getContext());
+ Type *RetTy = VT.getTypeForEVT(*DAG.getContext());
EVT PtrVT = TLI.getPointerTy();
- const Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext());
+ Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext());
DebugLoc dl = N->getDebugLoc();
// A divide for UMULO should be faster than a function call.
@@ -2222,7 +2287,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
TargetLowering::ArgListEntry Entry;
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
EVT ArgVT = N->getOperand(i).getValueType();
- const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = N->getOperand(i);
Entry.Ty = ArgTy;
Entry.isSExt = true;
@@ -2321,6 +2386,20 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
}
}
+void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = cast<AtomicSDNode>(N)->getMemoryVT();
+ SDValue Zero = DAG.getConstant(0, VT);
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, VT,
+ N->getOperand(0),
+ N->getOperand(1), Zero, Zero,
+ cast<AtomicSDNode>(N)->getMemOperand(),
+ cast<AtomicSDNode>(N)->getOrdering(),
+ cast<AtomicSDNode>(N)->getSynchScope());
+ ReplaceValueWith(SDValue(N, 0), Swap.getValue(0));
+ ReplaceValueWith(SDValue(N, 1), Swap.getValue(1));
+}
//===----------------------------------------------------------------------===//
// Integer Operand Expansion
@@ -2365,6 +2444,8 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::ROTR: Res = ExpandIntOp_Shift(N); break;
case ISD::RETURNADDR:
case ISD::FRAMEADDR: Res = ExpandIntOp_RETURNADDR(N); break;
+
+ case ISD::ATOMIC_STORE: Res = ExpandIntOp_ATOMIC_STORE(N); break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -2742,6 +2823,19 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
return MakeLibCall(LC, DstVT, &Op, 1, true, dl);
}
+SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
+ cast<AtomicSDNode>(N)->getMemoryVT(),
+ N->getOperand(0),
+ N->getOperand(1), N->getOperand(2),
+ cast<AtomicSDNode>(N)->getMemOperand(),
+ cast<AtomicSDNode>(N)->getOrdering(),
+ cast<AtomicSDNode>(N)->getSynchScope());
+ return Swap.getValue(1);
+}
+
+
SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
SDValue InOp0 = N->getOperand(0);
EVT InVT = InOp0.getValueType();
@@ -2775,7 +2869,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) {
-
ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N);
EVT VT = N->getValueType(0);
DebugLoc dl = N->getDebugLoc();
@@ -2830,6 +2923,46 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue Op0 = N->getOperand(1);
+ SDValue Op1 = N->getOperand(1);
+ assert(Op0.getValueType() == Op1.getValueType() &&
+ "Invalid input vector types");
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+
+ EVT OutElemTy = NOutVT.getVectorElementType();
+
+ unsigned NumElem0 = Op0.getValueType().getVectorNumElements();
+ unsigned NumElem1 = Op1.getValueType().getVectorNumElements();
+ unsigned NumOutElem = NOutVT.getVectorNumElements();
+ assert(NumElem0 + NumElem1 == NumOutElem &&
+ "Invalid number of incoming elements");
+
+ // Take the elements from the first vector.
+ SmallVector<SDValue, 8> Ops(NumOutElem);
+ for (unsigned i = 0; i < NumElem0; ++i) {
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ Op0.getValueType().getScalarType(), Op0,
+ DAG.getIntPtrConstant(i));
+ Ops[i] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
+ }
+
+ // Take the elements from the second vector
+ for (unsigned i = 0; i < NumElem1; ++i) {
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ Op1.getValueType().getScalarType(), Op1,
+ DAG.getIntPtrConstant(i));
+ Ops[i + NumElem0] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size());
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
EVT OutVT = N->getValueType(0);
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
@@ -2838,14 +2971,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
EVT NOutVTElem = NOutVT.getVectorElementType();
DebugLoc dl = N->getDebugLoc();
-
- SDValue ConvertedVector = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
- N->getOperand(0));
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
SDValue ConvElem = DAG.getNode(ISD::ANY_EXTEND, dl,
NOutVTElem, N->getOperand(1));
- return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,NOutVT,
- ConvertedVector, ConvElem, N->getOperand(2));
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NOutVT,
+ V0, ConvElem, N->getOperand(2));
}
SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
@@ -2855,20 +2986,23 @@ SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
V0->getValueType(0).getScalarType(), V0, V1);
- return DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), Ext);
-
+ // EXTRACT_VECTOR_ELT can return types which are wider than the incoming
+ // element types. If this is the case then we need to expand the outgoing
+ // value and not truncate it.
+ return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0));
}
SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
-
DebugLoc dl = N->getDebugLoc();
+ unsigned NumElems = N->getNumOperands();
EVT RetSclrTy = N->getValueType(0).getVectorElementType();
SmallVector<SDValue, 8> NewOps;
+ NewOps.reserve(NumElems);
// For each incoming vector
- for (unsigned VecIdx = 0, E = N->getNumOperands(); VecIdx!= E; ++VecIdx) {
+ for (unsigned VecIdx = 0; VecIdx != NumElems; ++VecIdx) {
SDValue Incoming = GetPromotedInteger(N->getOperand(VecIdx));
EVT SclrTy = Incoming->getValueType(0).getVectorElementType();
unsigned NumElem = Incoming->getValueType(0).getVectorNumElements();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index ba658b0..a4bb577 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -946,6 +946,13 @@ bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) {
return true;
}
+SDValue DAGTypeLegalizer::DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo) {
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+ if (i != ResNo)
+ ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i)));
+ return SDValue(N, ResNo);
+}
+
/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
/// which is split into two not necessarily identical pieces.
void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) {
@@ -1046,7 +1053,7 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
TLI.getPointerTy());
- const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
std::pair<SDValue,SDValue> CallInfo =
TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
false, 0, TLI.getLibcallCallingConv(LC), false,
@@ -1067,7 +1074,7 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
TargetLowering::ArgListEntry Entry;
for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
EVT ArgVT = Node->getOperand(i).getValueType();
- const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Node->getOperand(i);
Entry.Ty = ArgTy;
Entry.isSExt = isSigned;
@@ -1078,7 +1085,7 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
TLI.getPointerTy());
// Splice the libcall in wherever FindInputOutputChains tells us to.
- const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+ Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
@@ -1093,24 +1100,8 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
/// type i1, the bits of which conform to getBooleanContents.
SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) {
DebugLoc dl = Bool.getDebugLoc();
- ISD::NodeType ExtendCode;
- switch (TLI.getBooleanContents()) {
- default:
- assert(false && "Unknown BooleanContent!");
- case TargetLowering::UndefinedBooleanContent:
- // Extend to VT by adding rubbish bits.
- ExtendCode = ISD::ANY_EXTEND;
- break;
- case TargetLowering::ZeroOrOneBooleanContent:
- // Extend to VT by adding zero bits.
- ExtendCode = ISD::ZERO_EXTEND;
- break;
- case TargetLowering::ZeroOrNegativeOneBooleanContent: {
- // Extend to VT by copying the sign bit.
- ExtendCode = ISD::SIGN_EXTEND;
- break;
- }
- }
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(VT.isVector()));
return DAG.getNode(ExtendCode, dl, VT, Bool);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 952797d..abacdac 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -148,15 +148,22 @@ private:
SDValue CreateStackStoreLoad(SDValue Op, EVT DestVT);
bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult);
bool CustomWidenLowerNode(SDNode *N, EVT VT);
+
+ /// DisintegrateMERGE_VALUES - Replace each result of the given MERGE_VALUES
+ /// node with the corresponding input operand, except for the result 'ResNo',
+ /// which is returned.
+ SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo);
+
SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index);
SDValue JoinIntegers(SDValue Lo, SDValue Hi);
SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);
SDValue MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
const SDValue *Ops, unsigned NumOps, bool isSigned,
DebugLoc dl);
- std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
- SDNode *Node, bool isSigned);
- std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
+
+ std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node, bool isSigned);
+ std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
SDValue PromoteTargetBoolean(SDValue Bool, EVT VT);
void ReplaceValueWith(SDValue From, SDValue To);
@@ -206,8 +213,10 @@ private:
// Integer Result Promotion.
void PromoteIntegerResult(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_AssertSext(SDNode *N);
SDValue PromoteIntRes_AssertZext(SDNode *N);
+ SDValue PromoteIntRes_Atomic0(AtomicSDNode *N);
SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
SDValue PromoteIntRes_Atomic2(AtomicSDNode *N);
SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N);
@@ -215,6 +224,7 @@ private:
SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N);
+ SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N);
SDValue PromoteIntRes_BITCAST(SDNode *N);
SDValue PromoteIntRes_BSWAP(SDNode *N);
SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
@@ -232,6 +242,7 @@ private:
SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_SDIV(SDNode *N);
SDValue PromoteIntRes_SELECT(SDNode *N);
+ SDValue PromoteIntRes_VSELECT(SDNode *N);
SDValue PromoteIntRes_SELECT_CC(SDNode *N);
SDValue PromoteIntRes_SETCC(SDNode *N);
SDValue PromoteIntRes_SHL(SDNode *N);
@@ -249,6 +260,7 @@ private:
// Integer Operand Promotion.
bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo);
SDValue PromoteIntOp_ANY_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N);
SDValue PromoteIntOp_BITCAST(SDNode *N);
SDValue PromoteIntOp_BUILD_PAIR(SDNode *N);
SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo);
@@ -264,6 +276,7 @@ private:
SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_VSETCC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_Shift(SDNode *N);
SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);
SDValue PromoteIntOp_SINT_TO_FP(SDNode *N);
@@ -289,6 +302,8 @@ private:
// Integer Result Expansion.
void ExpandIntegerResult(SDNode *N, unsigned ResNo);
+ void ExpandIntRes_MERGE_VALUES (SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ANY_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -320,6 +335,8 @@ private:
void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
+
void ExpandShiftByConstant(SDNode *N, unsigned Amt,
SDValue &Lo, SDValue &Hi);
bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -339,6 +356,7 @@ private:
SDValue ExpandIntOp_TRUNCATE(SDNode *N);
SDValue ExpandIntOp_UINT_TO_FP(SDNode *N);
SDValue ExpandIntOp_RETURNADDR(SDNode *N);
+ SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N);
void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
ISD::CondCode &CCCode, DebugLoc dl);
@@ -362,6 +380,7 @@ private:
// Result Float to Integer Conversion.
void SoftenFloatResult(SDNode *N, unsigned OpNo);
+ SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_BITCAST(SDNode *N);
SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N);
@@ -488,6 +507,7 @@ private:
// Vector Result Scalarization: <1 x ty> -> ty.
void ScalarizeVectorResult(SDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
SDValue ScalarizeVecRes_BinOp(SDNode *N);
SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
SDValue ScalarizeVecRes_InregOp(SDNode *N);
@@ -559,6 +579,7 @@ private:
SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue SplitVecOp_VSETCC(SDNode *N);
SDValue SplitVecOp_FP_ROUND(SDNode *N);
//===--------------------------------------------------------------------===//
@@ -581,6 +602,7 @@ private:
// Widen Vector Result Promotion.
void WidenVectorResult(SDNode *N, unsigned ResNo);
+ SDValue WidenVecRes_MERGE_VALUES(SDNode* N, unsigned ResNo);
SDValue WidenVecRes_BITCAST(SDNode* N);
SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
@@ -677,7 +699,8 @@ private:
void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi);
// Generic Result Splitting.
- void SplitRes_MERGE_VALUES(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi);
void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -699,6 +722,8 @@ private:
}
// Generic Result Expansion.
+ void ExpandRes_MERGE_VALUES (SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi);
void ExpandRes_BITCAST (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandRes_BUILD_PAIR (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandRes_EXTRACT_ELEMENT (SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 85ea6b6..8e7e498 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -31,6 +31,11 @@ using namespace llvm;
// These routines assume that the Lo/Hi part is stored first in memory on
// little/big-endian machines, followed by the Hi/Lo part. This means that
// they cannot be used as is on vectors, for which Lo is always stored first.
+void DAGTypeLegalizer::ExpandRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ GetExpandedOp(Op, Lo, Hi);
+}
void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
EVT OutVT = N->getValueType(0);
@@ -426,37 +431,34 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
// bytes; for integers and floats it is Lo first if and only if the machine is
// little-endian).
-void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N,
+void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
SDValue &Lo, SDValue &Hi) {
- // A MERGE_VALUES node can produce any number of values. We know that the
- // first illegal one needs to be expanded into Lo/Hi.
- unsigned i;
-
- // The string of legal results gets turned into input operands, which have
- // the same type.
- for (i = 0; isTypeLegal(N->getValueType(i)); ++i)
- ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i)));
-
- // The first illegal result must be the one that needs to be expanded.
- GetSplitOp(N->getOperand(i), Lo, Hi);
-
- // Legalize the rest of the results into the input operands whether they are
- // legal or not.
- unsigned e = N->getNumValues();
- for (++i; i != e; ++i)
- ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i)));
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ GetSplitOp(Op, Lo, Hi);
}
void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo,
SDValue &Hi) {
- SDValue LL, LH, RL, RH;
+ SDValue LL, LH, RL, RH, CL, CH;
DebugLoc dl = N->getDebugLoc();
GetSplitOp(N->getOperand(1), LL, LH);
GetSplitOp(N->getOperand(2), RL, RH);
SDValue Cond = N->getOperand(0);
- Lo = DAG.getNode(ISD::SELECT, dl, LL.getValueType(), Cond, LL, RL);
- Hi = DAG.getNode(ISD::SELECT, dl, LH.getValueType(), Cond, LH, RH);
+ CL = CH = Cond;
+ if (Cond.getValueType().isVector()) {
+ assert(Cond.getValueType().getVectorElementType() == MVT::i1 &&
+ "Condition legalized before result?");
+ unsigned NumElements = Cond.getValueType().getVectorNumElements();
+ EVT VCondTy = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElements / 2);
+ CL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond,
+ DAG.getIntPtrConstant(0));
+ CH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond,
+ DAG.getIntPtrConstant(NumElements / 2));
+ }
+
+ Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL);
+ Hi = DAG.getNode(N->getOpcode(), dl, LH.getValueType(), CH, LH, RH);
}
void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index ffff10c..f815b00 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -61,6 +61,9 @@ class VectorLegalizer {
// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
// SINT_TO_FLOAT and SHR on vectors isn't legal.
SDValue ExpandUINT_TO_FLOAT(SDValue Op);
+ // Implement vselect in terms of XOR, AND, OR when blend is not supported
+ // by the target.
+ SDValue ExpandVSELECT(SDValue Op);
SDValue ExpandFNEG(SDValue Op);
// Implements vector promotion; this is essentially just bitcasting the
// operands to a different type and bitcasting the result back to the
@@ -157,8 +160,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::CTLZ:
case ISD::CTPOP:
case ISD::SELECT:
+ case ISD::VSELECT:
case ISD::SELECT_CC:
- case ISD::VSETCC:
+ case ISD::SETCC:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
case ISD::TRUNCATE:
@@ -210,11 +214,13 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
// FALL THROUGH
}
case TargetLowering::Expand:
- if (Node->getOpcode() == ISD::UINT_TO_FP)
+ if (Node->getOpcode() == ISD::VSELECT)
+ Result = ExpandVSELECT(Op);
+ else if (Node->getOpcode() == ISD::UINT_TO_FP)
Result = ExpandUINT_TO_FLOAT(Op);
else if (Node->getOpcode() == ISD::FNEG)
Result = ExpandFNEG(Op);
- else if (Node->getOpcode() == ISD::VSETCC)
+ else if (Node->getOpcode() == ISD::SETCC)
Result = UnrollVSETCC(Op);
else
Result = DAG.UnrollVectorOp(Op.getNode());
@@ -256,9 +262,41 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
return DAG.getNode(ISD::BITCAST, dl, VT, Op);
}
-SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
+SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
+ // Implement VSELECT in terms of XOR, AND, OR
+ // on platforms which do not support blend natively.
+ EVT VT = Op.getOperand(0).getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Mask = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+
+ // If we can't even use the basic vector operations of
+ // AND,OR,XOR, we will have to scalarize the op.
+ if (!TLI.isOperationLegalOrCustom(ISD::AND, VT) ||
+ !TLI.isOperationLegalOrCustom(ISD::XOR, VT) ||
+ !TLI.isOperationLegalOrCustom(ISD::OR, VT))
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ assert(VT.getSizeInBits() == Op.getOperand(1).getValueType().getSizeInBits()
+ && "Invalid mask size");
+ // Bitcast the operands to be the same type as the mask.
+ // This is needed when we select between FP types because
+ // the mask is a vector of integers.
+ Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
+ Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
+
+ SDValue AllOnes = DAG.getConstant(
+ APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), VT);
+ SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
+
+ Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
+ Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
+ return DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
+}
+SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
EVT VT = Op.getOperand(0).getValueType();
DebugLoc DL = Op.getDebugLoc();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index b5698f9..107a42b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -44,8 +44,10 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
N->dump(&DAG);
dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to scalarize the result of this operator!");
+ report_fatal_error("Do not know how to scalarize the result of this "
+ "operator!\n");
+ case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break;
case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break;
case ISD::BUILD_VECTOR: R = N->getOperand(0); break;
case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;
@@ -62,8 +64,6 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break;
case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break;
case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
- case ISD::VSETCC: R = ScalarizeVecRes_VSETCC(N); break;
-
case ISD::ANY_EXTEND:
case ISD::CTLZ:
case ISD::CTPOP:
@@ -129,6 +129,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
LHS.getValueType(), LHS, RHS);
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N,
+ unsigned ResNo) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ return GetScalarizedVector(Op);
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) {
EVT NewVT = N->getValueType(0).getVectorElementType();
return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
@@ -237,6 +243,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) {
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() ==
+ N->getOperand(0).getValueType().isVector() &&
+ "Scalar/Vector type mismatch");
+
+ if (N->getValueType(0).isVector()) return ScalarizeVecRes_VSETCC(N);
+
SDValue LHS = GetScalarizedVector(N->getOperand(0));
SDValue RHS = GetScalarizedVector(N->getOperand(1));
DebugLoc DL = N->getDebugLoc();
@@ -259,35 +271,23 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operand types must be vectors");
+
SDValue LHS = GetScalarizedVector(N->getOperand(0));
SDValue RHS = GetScalarizedVector(N->getOperand(1));
EVT NVT = N->getValueType(0).getVectorElementType();
- EVT SVT = TLI.getSetCCResultType(LHS.getValueType());
DebugLoc DL = N->getDebugLoc();
// Turn it into a scalar SETCC.
- SDValue Res = DAG.getNode(ISD::SETCC, DL, SVT, LHS, RHS, N->getOperand(2));
-
- // VSETCC always returns a sign-extended value, while SETCC may not. The
- // SETCC result type may not match the vector element type. Correct these.
- if (NVT.bitsLE(SVT)) {
- // The SETCC result type is bigger than the vector element type.
- // Ensure the SETCC result is sign-extended.
- if (TLI.getBooleanContents() !=
- TargetLowering::ZeroOrNegativeOneBooleanContent)
- Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, SVT, Res,
- DAG.getValueType(MVT::i1));
- // Truncate to the final type.
- return DAG.getNode(ISD::TRUNCATE, DL, NVT, Res);
- }
-
- // The SETCC result type is smaller than the vector element type.
- // If the SetCC result is not sign-extended, chop it down to MVT::i1.
- if (TLI.getBooleanContents() !=
- TargetLowering::ZeroOrNegativeOneBooleanContent)
- Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Res);
- // Sign extend to the final type.
- return DAG.getNode(ISD::SIGN_EXTEND, DL, NVT, Res);
+ SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS,
+ N->getOperand(2));
+ // Vectors may have a different boolean contents to scalars. Promote the
+ // value appropriately.
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(true));
+ return DAG.getNode(ExtendCode, DL, NVT, Res);
}
@@ -415,7 +415,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
#endif
llvm_unreachable("Do not know how to split the result of this operator!");
- case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
+ case ISD::VSELECT:
case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
@@ -432,7 +433,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
break;
case ISD::SETCC:
- case ISD::VSETCC:
SplitVecRes_SETCC(N, Lo, Hi);
break;
case ISD::VECTOR_SHUFFLE:
@@ -524,12 +524,11 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
// Handle some special cases efficiently.
switch (getTypeAction(InVT)) {
- default:
- assert(false && "Unknown type action!");
case TargetLowering::TypeLegal:
case TargetLowering::TypePromoteInteger:
case TargetLowering::TypeSoftenFloat:
case TargetLowering::TypeScalarizeVector:
+ case TargetLowering::TypeWidenVector:
break;
case TargetLowering::TypeExpandInteger:
case TargetLowering::TypeExpandFloat:
@@ -670,7 +669,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
// Store the new element. This may be larger than the vector element type,
// so use a truncating store.
SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
- const Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
+ Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
unsigned Alignment =
TLI.getTargetData()->getPrefTypeAlignment(VecType);
Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT,
@@ -740,6 +739,10 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
}
void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operand types must be vectors");
+
EVT LoVT, HiVT;
DebugLoc DL = N->getDebugLoc();
GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
@@ -965,7 +968,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
dbgs() << "\n";
#endif
llvm_unreachable("Do not know how to split this operator's operand!");
-
+ case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break;
case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
@@ -1163,6 +1166,26 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
&Elts[0], Elts.size());
}
+SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operand types must be vectors");
+ // The result has a legal vector type, but the input needs splitting.
+ SDValue Lo0, Hi0, Lo1, Hi1, LoRes, HiRes;
+ DebugLoc DL = N->getDebugLoc();
+ GetSplitVector(N->getOperand(0), Lo0, Hi0);
+ GetSplitVector(N->getOperand(1), Lo1, Hi1);
+ unsigned PartElements = Lo0.getValueType().getVectorNumElements();
+ EVT PartResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, PartElements);
+ EVT WideResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 2*PartElements);
+
+ LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));
+ HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2));
+ SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes);
+ return PromoteTargetBoolean(Con, N->getValueType(0));
+}
+
+
SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
// The result has a legal vector type, but the input needs splitting.
EVT ResVT = N->getValueType(0);
@@ -1205,6 +1228,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
#endif
llvm_unreachable("Do not know how to widen the result of this operator!");
+ case ISD::MERGE_VALUES: Res = WidenVecRes_MERGE_VALUES(N, ResNo); break;
case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break;
case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
@@ -1222,10 +1246,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VECTOR_SHUFFLE:
Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N));
break;
- case ISD::VSETCC:
- Res = WidenVecRes_VSETCC(N);
- break;
-
case ISD::ADD:
case ISD::AND:
case ISD::BSWAP:
@@ -1557,6 +1577,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
WidenVT, WidenLHS, DAG.getValueType(ExtVT));
}
+SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) {
+ SDValue WidenVec = DisintegrateMERGE_VALUES(N, ResNo);
+ return GetWidenedVector(WidenVec);
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
SDValue InOp = N->getOperand(0);
EVT InVT = InOp.getValueType();
@@ -1661,6 +1686,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
DebugLoc dl = N->getDebugLoc();
unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ unsigned NumInElts = InVT.getVectorNumElements();
unsigned NumOperands = N->getNumOperands();
bool InputWidened = false; // Indicates we need to widen the input.
@@ -1686,17 +1712,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
if (N->getOperand(i).getOpcode() != ISD::UNDEF)
break;
- if (i > NumOperands)
+ if (i == NumOperands)
// Everything but the first operand is an UNDEF so just return the
// widened first operand.
return GetWidenedVector(N->getOperand(0));
if (NumOperands == 2) {
// Replace concat of two operands with a shuffle.
- SmallVector<int, 16> MaskOps(WidenNumElts);
- for (unsigned i=0; i < WidenNumElts/2; ++i) {
+ SmallVector<int, 16> MaskOps(WidenNumElts, -1);
+ for (unsigned i = 0; i < NumInElts; ++i) {
MaskOps[i] = i;
- MaskOps[i+WidenNumElts/2] = i+WidenNumElts;
+ MaskOps[i + NumInElts] = i + WidenNumElts;
}
return DAG.getVectorShuffle(WidenVT, dl,
GetWidenedVector(N->getOperand(0)),
@@ -1708,7 +1734,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
// Fall back to use extracts and build vector.
EVT EltVT = WidenVT.getVectorElementType();
- unsigned NumInElts = InVT.getVectorNumElements();
SmallVector<SDValue, 16> Ops(WidenNumElts);
unsigned Idx = 0;
for (unsigned i=0; i < NumOperands; ++i) {
@@ -1916,6 +1941,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {
}
SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() ==
+ N->getOperand(0).getValueType().isVector() &&
+ "Scalar/Vector type mismatch");
+ if (N->getValueType(0).isVector()) return WidenVecRes_VSETCC(N);
+
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
@@ -1954,6 +1984,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {
}
SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operands must be vectors");
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned WidenNumElts = WidenVT.getVectorNumElements();
@@ -1970,7 +2003,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
assert(InOp1.getValueType() == WidenInVT &&
InOp2.getValueType() == WidenInVT &&
"Input not widened to expected type!");
- return DAG.getNode(ISD::VSETCC, N->getDebugLoc(),
+ (void)WidenInVT;
+ return DAG.getNode(ISD::SETCC, N->getDebugLoc(),
WidenVT, InOp1, InOp2, N->getOperand(2));
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 12b1838..e757def 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -2621,6 +2621,39 @@ bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
return false;
}
+/// canClobberReachingPhysRegUse - True if SU would clobber one of it's
+/// successor's explicit physregs whose definition can reach DepSU.
+/// i.e. DepSU should not be scheduled above SU.
+static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
+ ScheduleDAGRRList *scheduleDAG,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ const unsigned *ImpDefs
+ = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
+ if(!ImpDefs)
+ return false;
+
+ for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end();
+ SI != SE; ++SI) {
+ SUnit *SuccSU = SI->getSUnit();
+ for (SUnit::const_pred_iterator PI = SuccSU->Preds.begin(),
+ PE = SuccSU->Preds.end(); PI != PE; ++PI) {
+ if (!PI->isAssignedRegDep())
+ continue;
+
+ for (const unsigned *ImpDef = ImpDefs; *ImpDef; ++ImpDef) {
+ // Return true if SU clobbers this physical register use and the
+ // definition of the register reaches from DepSU. IsReachable queries a
+ // topological forward sort of the DAG (following the successors).
+ if (TRI->regsOverlap(*ImpDef, PI->getReg()) &&
+ scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
/// physical register defs.
static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
@@ -2837,7 +2870,8 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() {
SuccOpc == TargetOpcode::INSERT_SUBREG ||
SuccOpc == TargetOpcode::SUBREG_TO_REG)
continue;
- if ((!canClobber(SuccSU, DUSU) ||
+ if (!canClobberReachingPhysRegUse(SuccSU, SU, scheduleDAG, TII, TRI) &&
+ (!canClobber(SuccSU, DUSU) ||
(isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
(!SU->isCommutable && SuccSU->isCommutable)) &&
!scheduleDAG->IsReachable(SuccSU, SU)) {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 35ea0bb..20bea8e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -403,7 +403,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(CP->getAlignment());
ID.AddInteger(CP->getOffset());
if (CP->isMachineConstantPoolEntry())
- CP->getMachineCPVal()->AddSelectionDAGCSEId(ID);
+ CP->getMachineCPVal()->addSelectionDAGCSEId(ID);
else
ID.AddPointer(CP->getConstVal());
ID.AddInteger(CP->getTargetFlags());
@@ -432,7 +432,9 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
case ISD::ATOMIC_LOAD_MIN:
case ISD::ATOMIC_LOAD_MAX:
case ISD::ATOMIC_LOAD_UMIN:
- case ISD::ATOMIC_LOAD_UMAX: {
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_LOAD:
+ case ISD::ATOMIC_STORE: {
const AtomicSDNode *AT = cast<AtomicSDNode>(N);
ID.AddInteger(AT->getMemoryVT().getRawBits());
ID.AddInteger(AT->getRawSubclassData());
@@ -769,11 +771,14 @@ static void VerifyNodeCommon(SDNode *N) {
assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() &&
"Wrong number of operands!");
EVT EltVT = N->getValueType(0).getVectorElementType();
- for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I)
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) {
assert((I->getValueType() == EltVT ||
(EltVT.isInteger() && I->getValueType().isInteger() &&
EltVT.bitsLE(I->getValueType()))) &&
"Wrong operand type!");
+ assert(I->getValueType() == N->getOperand(0).getValueType() &&
+ "Operands must all have the same type");
+ }
break;
}
}
@@ -821,7 +826,7 @@ static void VerifyMachineNode(SDNode *N) {
/// given type.
///
unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
- const Type *Ty = VT == MVT::iPTR ?
+ Type *Ty = VT == MVT::iPTR ?
PointerType::get(Type::getInt8Ty(*getContext()), 0) :
VT.getTypeForEVT(*getContext());
@@ -876,6 +881,12 @@ void SelectionDAG::clear() {
DbgInfo->clear();
}
+SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
+ return VT.bitsGT(Op.getValueType()) ?
+ getNode(ISD::ANY_EXTEND, DL, VT, Op) :
+ getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
return VT.bitsGT(Op.getValueType()) ?
getNode(ISD::SIGN_EXTEND, DL, VT, Op) :
@@ -925,13 +936,25 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
assert(VT.isInteger() && "Cannot create FP integer constant!");
EVT EltVT = VT.getScalarType();
- assert(Val.getBitWidth() == EltVT.getSizeInBits() &&
- "APInt size does not match type size!");
+ const ConstantInt *Elt = &Val;
+ // In some cases the vector type is legal but the element type is illegal and
+ // needs to be promoted, for example v8i8 on ARM. In this case, promote the
+ // inserted value (the type does not need to match the vector element type).
+ // Any extra bits introduced will be truncated away.
+ if (VT.isVector() && TLI.getTypeAction(*getContext(), EltVT) ==
+ TargetLowering::TypePromoteInteger) {
+ EltVT = TLI.getTypeToTransformTo(*getContext(), EltVT);
+ APInt NewVal = Elt->getValue().zext(EltVT.getSizeInBits());
+ Elt = ConstantInt::get(*getContext(), NewVal);
+ }
+
+ assert(Elt->getBitWidth() == EltVT.getSizeInBits() &&
+ "APInt size does not match type size!");
unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant;
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0);
- ID.AddPointer(&Val);
+ ID.AddPointer(Elt);
void *IP = 0;
SDNode *N = NULL;
if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
@@ -939,7 +962,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
return SDValue(N, 0);
if (!N) {
- N = new (NodeAllocator) ConstantSDNode(isT, &Val, EltVT);
+ N = new (NodeAllocator) ConstantSDNode(isT, Elt, EltVT);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
}
@@ -1131,7 +1154,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
ID.AddInteger(Alignment);
ID.AddInteger(Offset);
- C->AddSelectionDAGCSEId(ID);
+ C->addSelectionDAGCSEId(ID);
ID.AddInteger(TargetFlags);
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
@@ -1432,7 +1455,7 @@ SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
unsigned ByteSize = VT.getStoreSize();
- const Type *Ty = VT.getTypeForEVT(*getContext());
+ Type *Ty = VT.getTypeForEVT(*getContext());
unsigned StackAlign =
std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign);
@@ -1445,8 +1468,8 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
unsigned Bytes = std::max(VT1.getStoreSizeInBits(),
VT2.getStoreSizeInBits())/8;
- const Type *Ty1 = VT1.getTypeForEVT(*getContext());
- const Type *Ty2 = VT2.getTypeForEVT(*getContext());
+ Type *Ty1 = VT1.getTypeForEVT(*getContext());
+ Type *Ty2 = VT2.getTypeForEVT(*getContext());
const TargetData *TD = TLI.getTargetData();
unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1),
TD->getPrefTypeAlignment(Ty2));
@@ -1718,8 +1741,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
// The boolean result conforms to getBooleanContents. Fall through.
case ISD::SETCC:
// If we know the result of a setcc has the top bits zero, use this info.
- if (TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent &&
- BitWidth > 1)
+ if (TLI.getBooleanContents(Op.getValueType().isVector()) ==
+ TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1)
KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
return;
case ISD::SHL:
@@ -2153,7 +2176,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
// The boolean result conforms to getBooleanContents. Fall through.
case ISD::SETCC:
// If setcc returns 0/-1, all bits are sign bits.
- if (TLI.getBooleanContents() ==
+ if (TLI.getBooleanContents(Op.getValueType().isVector()) ==
TargetLowering::ZeroOrNegativeOneBooleanContent)
return VTBits;
break;
@@ -2437,7 +2460,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
APFloat::rmTowardZero, &ignored);
if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual
break;
- APInt api(VT.getSizeInBits(), 2, x);
+ APInt api(VT.getSizeInBits(), x);
return getConstant(api, VT);
}
case ISD::BITCAST:
@@ -2777,6 +2800,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
"Vector element counts must match in FP_ROUND_INREG");
assert(EVT.bitsLE(VT) && "Not rounding down!");
+ (void)EVT;
if (cast<VTSDNode>(N2)->getVT() == VT) return N1; // Not actually rounding.
break;
}
@@ -2884,6 +2908,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!");
assert(!N1.getValueType().isVector() && !VT.isVector() &&
(N1.getValueType().isInteger() == VT.isInteger()) &&
+ N1.getValueType() != VT &&
"Wrong types for EXTRACT_ELEMENT!");
// EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding
@@ -3425,7 +3450,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
return SDValue();
if (DstAlignCanChange) {
- const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+ Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
if (NewAlign > Align) {
// Give the stack frame object a larger alignment if needed.
@@ -3514,7 +3539,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
return SDValue();
if (DstAlignCanChange) {
- const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+ Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
if (NewAlign > Align) {
// Give the stack frame object a larger alignment if needed.
@@ -3589,7 +3614,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
return SDValue();
if (DstAlignCanChange) {
- const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+ Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
if (NewAlign > Align) {
// Give the stack frame object a larger alignment if needed.
@@ -3782,7 +3807,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
return Result;
// Emit a library call.
- const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext());
+ Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = Dst; Entry.Ty = IntPtrTy;
@@ -3815,7 +3840,9 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
SDValue Chain, SDValue Ptr, SDValue Cmp,
SDValue Swp, MachinePointerInfo PtrInfo,
- unsigned Alignment) {
+ unsigned Alignment,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(MemVT);
@@ -3823,18 +3850,23 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
// For now, atomics are considered to be volatile always.
+ // FIXME: Volatile isn't really correct; we should keep track of atomic
+ // orderings in the memoperand.
Flags |= MachineMemOperand::MOVolatile;
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment);
- return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO);
+ return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO,
+ Ordering, SynchScope);
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
SDValue Chain,
SDValue Ptr, SDValue Cmp,
- SDValue Swp, MachineMemOperand *MMO) {
+ SDValue Swp, MachineMemOperand *MMO,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op");
assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
@@ -3851,7 +3883,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
return SDValue(E, 0);
}
SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain,
- Ptr, Cmp, Swp, MMO);
+ Ptr, Cmp, Swp, MMO, Ordering,
+ SynchScope);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
return SDValue(N, 0);
@@ -3861,27 +3894,39 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
SDValue Chain,
SDValue Ptr, SDValue Val,
const Value* PtrVal,
- unsigned Alignment) {
+ unsigned Alignment,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(MemVT);
MachineFunction &MF = getMachineFunction();
- unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ // A monotonic store does not load; a release store "loads" in the sense
+ // that other stores cannot be sunk past it.
+ // (An atomicrmw obviously both loads and stores.)
+ unsigned Flags = MachineMemOperand::MOStore;
+ if (Opcode != ISD::ATOMIC_STORE || Ordering > Monotonic)
+ Flags |= MachineMemOperand::MOLoad;
// For now, atomics are considered to be volatile always.
+ // FIXME: Volatile isn't really correct; we should keep track of atomic
+ // orderings in the memoperand.
Flags |= MachineMemOperand::MOVolatile;
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
MemVT.getStoreSize(), Alignment);
- return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO);
+ return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO,
+ Ordering, SynchScope);
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
SDValue Chain,
SDValue Ptr, SDValue Val,
- MachineMemOperand *MMO) {
+ MachineMemOperand *MMO,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
assert((Opcode == ISD::ATOMIC_LOAD_ADD ||
Opcode == ISD::ATOMIC_LOAD_SUB ||
Opcode == ISD::ATOMIC_LOAD_AND ||
@@ -3892,12 +3937,14 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
Opcode == ISD::ATOMIC_LOAD_MAX ||
Opcode == ISD::ATOMIC_LOAD_UMIN ||
Opcode == ISD::ATOMIC_LOAD_UMAX ||
- Opcode == ISD::ATOMIC_SWAP) &&
+ Opcode == ISD::ATOMIC_SWAP ||
+ Opcode == ISD::ATOMIC_STORE) &&
"Invalid Atomic Op");
EVT VT = Val.getValueType();
- SDVTList VTs = getVTList(VT, MVT::Other);
+ SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) :
+ getVTList(VT, MVT::Other);
FoldingSetNodeID ID;
ID.AddInteger(MemVT.getRawBits());
SDValue Ops[] = {Chain, Ptr, Val};
@@ -3908,7 +3955,63 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
return SDValue(E, 0);
}
SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain,
- Ptr, Val, MMO);
+ Ptr, Val, MMO,
+ Ordering, SynchScope);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+ EVT VT, SDValue Chain,
+ SDValue Ptr,
+ const Value* PtrVal,
+ unsigned Alignment,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(MemVT);
+
+ MachineFunction &MF = getMachineFunction();
+ // A monotonic load does not store; an acquire load "stores" in the sense
+ // that other loads cannot be hoisted past it.
+ unsigned Flags = MachineMemOperand::MOLoad;
+ if (Ordering > Monotonic)
+ Flags |= MachineMemOperand::MOStore;
+
+ // For now, atomics are considered to be volatile always.
+ // FIXME: Volatile isn't really correct; we should keep track of atomic
+ // orderings in the memoperand.
+ Flags |= MachineMemOperand::MOVolatile;
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
+ MemVT.getStoreSize(), Alignment);
+
+ return getAtomic(Opcode, dl, MemVT, VT, Chain, Ptr, MMO,
+ Ordering, SynchScope);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+ EVT VT, SDValue Chain,
+ SDValue Ptr,
+ MachineMemOperand *MMO,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op");
+
+ SDVTList VTs = getVTList(VT, MVT::Other);
+ FoldingSetNodeID ID;
+ ID.AddInteger(MemVT.getRawBits());
+ SDValue Ops[] = {Chain, Ptr};
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<AtomicSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain,
+ Ptr, MMO, Ordering, SynchScope);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
return SDValue(N, 0);
@@ -5769,6 +5872,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
#endif
case ISD::PREFETCH: return "Prefetch";
case ISD::MEMBARRIER: return "MemBarrier";
+ case ISD::ATOMIC_FENCE: return "AtomicFence";
case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap";
case ISD::ATOMIC_SWAP: return "AtomicSwap";
case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd";
@@ -5781,6 +5885,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax";
case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin";
case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax";
+ case ISD::ATOMIC_LOAD: return "AtomicLoad";
+ case ISD::ATOMIC_STORE: return "AtomicStore";
case ISD::PCMARKER: return "PCMarker";
case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
case ISD::SRCVALUE: return "SrcValue";
@@ -5896,8 +6002,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FPOWI: return "fpowi";
case ISD::SETCC: return "setcc";
- case ISD::VSETCC: return "vsetcc";
case ISD::SELECT: return "select";
+ case ISD::VSELECT: return "vselect";
case ISD::SELECT_CC: return "select_cc";
case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt";
case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt";
@@ -5985,7 +6091,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::CTLZ: return "ctlz";
// Trampolines
- case ISD::TRAMPOLINE: return "trampoline";
+ case ISD::INIT_TRAMPOLINE: return "init_trampoline";
+ case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline";
case ISD::CONDCODE:
switch (cast<CondCodeSDNode>(this)->get()) {
@@ -6245,8 +6352,7 @@ void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
const SelectionDAG *G, unsigned depth,
- unsigned indent)
-{
+ unsigned indent) {
if (depth == 0)
return;
@@ -6340,6 +6446,10 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
&Operands[0], Operands.size()));
break;
+ case ISD::VSELECT:
+ Scalars.push_back(getNode(ISD::SELECT, dl, EltVT,
+ &Operands[0], Operands.size()));
+ break;
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
@@ -6427,6 +6537,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
Align = TD->getPreferredAlignment(GVar);
}
}
+ if (!Align)
+ Align = TLI.getTargetData()->getABITypeAlignment(GV->getType());
}
return MinAlign(Align, GVOffset);
}
@@ -6528,7 +6640,7 @@ unsigned GlobalAddressSDNode::getAddressSpace() const {
}
-const Type *ConstantPoolSDNode::getType() const {
+Type *ConstantPoolSDNode::getType() const {
if (isMachineConstantPoolEntry())
return Val.MachineCPVal->getType();
return Val.ConstVal->getType();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 81b03ee..7ed46a6 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -578,7 +578,7 @@ namespace {
: ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
RegsForValue(LLVMContext &Context, const TargetLowering &tli,
- unsigned Reg, const Type *Ty) {
+ unsigned Reg, Type *Ty) {
ComputeValueVTs(tli, Ty, ValueVTs);
for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
@@ -788,6 +788,18 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
if (HasMatching)
Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
+ else if (!Regs.empty() &&
+ TargetRegisterInfo::isVirtualRegister(Regs.front())) {
+ // Put the register class of the virtual registers in the flag word. That
+ // way, later passes can recompute register class constraints for inline
+ // assembly as well as normal instructions.
+ // Don't do this for tied operands that can use the regclass information
+ // from the def.
+ const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+ const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
+ Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
+ }
+
SDValue Res = DAG.getTargetConstant(Flag, MVT::i32);
Ops.push_back(Res);
@@ -805,6 +817,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
AA = &aa;
GFI = gfi;
TD = DAG.getTarget().getTargetData();
+ LPadToCallSiteMap.clear();
}
/// clear - Clear out the current SelectionDAG and the associated
@@ -956,7 +969,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
}
}
-// getValue - Return an SDValue for the given Value.
+/// getValue - Return an SDValue for the given Value.
SDValue SelectionDAGBuilder::getValue(const Value *V) {
// If we already have an SDValue for this value, use it. It's important
// to do this first, so that we don't create a CopyFromReg if we already
@@ -971,7 +984,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
unsigned InReg = It->second;
RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
SDValue Chain = DAG.getEntryNode();
- N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL);
+ N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
resolveDanglingDebugInfo(V, N);
return N;
}
@@ -1069,7 +1082,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
return DAG.getBlockAddress(BA, VT);
- const VectorType *VecTy = cast<VectorType>(V->getType());
+ VectorType *VecTy = cast<VectorType>(V->getType());
unsigned NumElements = VecTy->getNumElements();
// Now that we know the number and type of the elements, get that number of
@@ -1277,15 +1290,17 @@ uint32_t SelectionDAGBuilder::getEdgeWeight(MachineBasicBlock *Src,
BranchProbabilityInfo *BPI = FuncInfo.BPI;
if (!BPI)
return 0;
- BasicBlock *SrcBB = const_cast<BasicBlock*>(Src->getBasicBlock());
- BasicBlock *DstBB = const_cast<BasicBlock*>(Dst->getBasicBlock());
+ const BasicBlock *SrcBB = Src->getBasicBlock();
+ const BasicBlock *DstBB = Dst->getBasicBlock();
return BPI->getEdgeWeight(SrcBB, DstBB);
}
-void SelectionDAGBuilder::addSuccessorWithWeight(MachineBasicBlock *Src,
- MachineBasicBlock *Dst) {
- uint32_t weight = getEdgeWeight(Src, Dst);
- Src->addSuccessor(Dst, weight);
+void SelectionDAGBuilder::
+addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
+ uint32_t Weight /* = 0 */) {
+ if (!Weight)
+ Weight = getEdgeWeight(Src, Dst);
+ Src->addSuccessor(Dst, Weight);
}
@@ -1558,8 +1573,8 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
}
// Update successor info
- addSuccessorWithWeight(SwitchBB, CB.TrueBB);
- addSuccessorWithWeight(SwitchBB, CB.FalseBB);
+ addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight);
+ addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
// Set NextBlock to be the MBB immediately after the current one, if any.
// This is used to avoid emitting unnecessary branches to the next block.
@@ -1677,7 +1692,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
UsePtrType = true;
else {
for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
- if ((uint64_t)((int64_t)B.Cases[i].Mask >> VT.getSizeInBits()) + 1 >= 2) {
+ if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) {
// Switch table case range are encoded into series of masks.
// Just use pointer type, it's guaranteed to fit.
UsePtrType = true;
@@ -1808,6 +1823,49 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) {
}
+void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
+ llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
+}
+
+void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
+ assert(FuncInfo.MBB->isLandingPad() &&
+ "Call to landingpad not in landing pad!");
+
+ MachineBasicBlock *MBB = FuncInfo.MBB;
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ AddLandingPadInfo(LP, MMI, MBB);
+
+ SmallVector<EVT, 2> ValueVTs;
+ ComputeValueVTs(TLI, LP.getType(), ValueVTs);
+
+ // Insert the EXCEPTIONADDR instruction.
+ assert(FuncInfo.MBB->isLandingPad() &&
+ "Call to eh.exception not in landing pad!");
+ SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+ SDValue Ops[2];
+ Ops[0] = DAG.getRoot();
+ SDValue Op1 = DAG.getNode(ISD::EXCEPTIONADDR, getCurDebugLoc(), VTs, Ops, 1);
+ SDValue Chain = Op1.getValue(1);
+
+ // Insert the EHSELECTION instruction.
+ VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+ Ops[0] = Op1;
+ Ops[1] = Chain;
+ SDValue Op2 = DAG.getNode(ISD::EHSELECTION, getCurDebugLoc(), VTs, Ops, 2);
+ Chain = Op2.getValue(1);
+ Op2 = DAG.getSExtOrTrunc(Op2, getCurDebugLoc(), MVT::i32);
+
+ Ops[0] = Op1;
+ Ops[1] = Op2;
+ SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
+ &Ops[0], 2);
+
+ std::pair<SDValue, SDValue> RetPair = std::make_pair(Res, Chain);
+ setValue(&LP, RetPair.first);
+ DAG.setRoot(RetPair.second);
+}
+
/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
/// small case ranges).
bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
@@ -1866,8 +1924,8 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
ISD::SETEQ);
// Update successor info.
- SwitchBB->addSuccessor(Small.BB);
- SwitchBB->addSuccessor(Default);
+ addSuccessorWithWeight(SwitchBB, Small.BB);
+ addSuccessorWithWeight(SwitchBB, Default);
// Insert the true branch.
SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other,
@@ -1923,7 +1981,11 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
CC = ISD::SETLE;
LHS = I->Low; MHS = SV; RHS = I->High;
}
- CaseBlock CB(CC, LHS, RHS, MHS, I->BB, FallThrough, CurBlock);
+
+ uint32_t ExtraWeight = I->ExtraWeight;
+ CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough,
+ /* me */ CurBlock,
+ /* trueweight */ ExtraWeight / 2, /* falseweight */ ExtraWeight / 2);
// If emitting the first comparison, just call visitSwitchCase to emit the
// code into the current block. Otherwise, push the CaseBlock onto the
@@ -1953,10 +2015,10 @@ static APInt ComputeRange(const APInt &First, const APInt &Last) {
}
/// handleJTSwitchCase - Emit jumptable for current switch case range
-bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
- CaseRecVector& WorkList,
- const Value* SV,
- MachineBasicBlock* Default,
+bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
+ CaseRecVector &WorkList,
+ const Value *SV,
+ MachineBasicBlock *Default,
MachineBasicBlock *SwitchBB) {
Case& FrontCase = *CR.Range.first;
Case& BackCase = *(CR.Range.second-1);
@@ -1965,8 +2027,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue();
APInt TSize(First.getBitWidth(), 0);
- for (CaseItr I = CR.Range.first, E = CR.Range.second;
- I!=E; ++I)
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I)
TSize += I->size();
if (!areJTsAllowed(TLI) || TSize.ult(4))
@@ -2044,7 +2105,6 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
visitJumpTableHeader(JT, JTH, SwitchBB);
JTCases.push_back(JumpTableBlock(JTH, JT));
-
return true;
}
@@ -2318,12 +2378,17 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
const SwitchInst& SI) {
size_t numCmps = 0;
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
// Start with "simple" cases
for (size_t i = 1; i < SI.getNumSuccessors(); ++i) {
- MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)];
+ BasicBlock *SuccBB = SI.getSuccessor(i);
+ MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
+
+ uint32_t ExtraWeight = BPI ? BPI->getEdgeWeight(SI.getParent(), SuccBB) : 0;
+
Cases.push_back(Case(SI.getSuccessorValue(i),
SI.getSuccessorValue(i),
- SMBB));
+ SMBB, ExtraWeight));
}
std::sort(Cases.begin(), Cases.end(), CaseCmp());
@@ -2343,6 +2408,16 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
I->High = J->High;
J = Cases.erase(J);
+
+ if (BranchProbabilityInfo *BPI = FuncInfo.BPI) {
+ uint32_t CurWeight = currentBB->getBasicBlock() ?
+ BPI->getEdgeWeight(SI.getParent(), currentBB->getBasicBlock()) : 16;
+ uint32_t NextWeight = nextBB->getBasicBlock() ?
+ BPI->getEdgeWeight(SI.getParent(), nextBB->getBasicBlock()) : 16;
+
+ BPI->setEdgeWeight(SI.getParent(), currentBB->getBasicBlock(),
+ CurWeight + NextWeight);
+ }
} else {
I = J++;
}
@@ -2379,7 +2454,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
// If there is only the default destination, branch to it if it is not the
// next basic block. Otherwise, just fall through.
- if (SI.getNumOperands() == 2) {
+ if (SI.getNumCases() == 1) {
// Update machine-CFG edges.
// If this is not a fall-through branch, emit the branch.
@@ -2399,12 +2474,12 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
size_t numCmps = Clusterify(Cases, SI);
DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
<< ". Total compares: " << numCmps << '\n');
- numCmps = 0;
+ (void)numCmps;
// Get the Value to be switched on and default basic blocks, which will be
// inserted into CaseBlock records, representing basic blocks in the binary
// search tree.
- const Value *SV = SI.getOperand(0);
+ const Value *SV = SI.getCondition();
// Push the initial CaseRec onto the worklist
CaseRecVector WorkList;
@@ -2458,7 +2533,7 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
void SelectionDAGBuilder::visitFSub(const User &I) {
// -0.0 - X --> fneg
- const Type *Ty = I.getType();
+ Type *Ty = I.getType();
if (isa<Constant>(I.getOperand(0)) &&
I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) {
SDValue Op2 = getValue(I.getOperand(1));
@@ -2562,10 +2637,12 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
SDValue Cond = getValue(I.getOperand(0));
SDValue TrueVal = getValue(I.getOperand(1));
SDValue FalseVal = getValue(I.getOperand(2));
+ ISD::NodeType OpCode = Cond.getValueType().isVector() ?
+ ISD::VSELECT : ISD::SELECT;
for (unsigned i = 0; i != NumValues; ++i)
- Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(),
- TrueVal.getNode()->getValueType(TrueVal.getResNo()+i),
+ Values[i] = DAG.getNode(OpCode, getCurDebugLoc(),
+ TrueVal.getNode()->getValueType(TrueVal.getResNo()+i),
Cond,
SDValue(TrueVal.getNode(),
TrueVal.getResNo() + i),
@@ -2778,7 +2855,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
// Analyze the access pattern of the vector to see if we can extract
// two subvectors and do the shuffle. The analysis is done by calculating
// the range of elements the mask access on both vectors.
- int MinRange[2] = { SrcNumElts+1, SrcNumElts+1};
+ int MinRange[2] = { static_cast<int>(SrcNumElts+1),
+ static_cast<int>(SrcNumElts+1)};
int MaxRange[2] = {-1, -1};
for (unsigned i = 0; i != MaskNumElts; ++i) {
@@ -2886,8 +2964,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
const Value *Op0 = I.getOperand(0);
const Value *Op1 = I.getOperand(1);
- const Type *AggTy = I.getType();
- const Type *ValTy = Op1->getType();
+ Type *AggTy = I.getType();
+ Type *ValTy = Op1->getType();
bool IntoUndef = isa<UndefValue>(Op0);
bool FromUndef = isa<UndefValue>(Op1);
@@ -2927,8 +3005,8 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
const Value *Op0 = I.getOperand(0);
- const Type *AggTy = Op0->getType();
- const Type *ValTy = I.getType();
+ Type *AggTy = Op0->getType();
+ Type *ValTy = I.getType();
bool OutOfUndef = isa<UndefValue>(Op0);
unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
@@ -2961,12 +3039,12 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
SDValue N = getValue(I.getOperand(0));
- const Type *Ty = I.getOperand(0)->getType();
+ Type *Ty = I.getOperand(0)->getType();
for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
OI != E; ++OI) {
const Value *Idx = *OI;
- if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
+ if (StructType *StTy = dyn_cast<StructType>(Ty)) {
unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
if (Field) {
// N = N + Offset
@@ -3037,7 +3115,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
if (FuncInfo.StaticAllocaMap.count(&I))
return; // getValue will auto-populate this.
- const Type *Ty = I.getAllocatedType();
+ Type *Ty = I.getAllocatedType();
uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
unsigned Align =
std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
@@ -3084,10 +3162,13 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
}
void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
+ if (I.isAtomic())
+ return visitAtomicLoad(I);
+
const Value *SV = I.getOperand(0);
SDValue Ptr = getValue(SV);
- const Type *Ty = I.getType();
+ Type *Ty = I.getType();
bool isVolatile = I.isVolatile();
bool isNonTemporal = I.getMetadata("nontemporal") != 0;
@@ -3161,6 +3242,9 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
}
void SelectionDAGBuilder::visitStore(const StoreInst &I) {
+ if (I.isAtomic())
+ return visitAtomicStore(I);
+
const Value *SrcV = I.getOperand(0);
const Value *PtrV = I.getOperand(1);
@@ -3211,6 +3295,179 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
DAG.setRoot(StoreNode);
}
+static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order,
+ SynchronizationScope Scope,
+ bool Before, DebugLoc dl,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ // Fence, if necessary
+ if (Before) {
+ if (Order == AcquireRelease || Order == SequentiallyConsistent)
+ Order = Release;
+ else if (Order == Acquire || Order == Monotonic)
+ return Chain;
+ } else {
+ if (Order == AcquireRelease)
+ Order = Acquire;
+ else if (Order == Release || Order == Monotonic)
+ return Chain;
+ }
+ SDValue Ops[3];
+ Ops[0] = Chain;
+ Ops[1] = DAG.getConstant(Order, TLI.getPointerTy());
+ Ops[2] = DAG.getConstant(Scope, TLI.getPointerTy());
+ return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3);
+}
+
+void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
+ DebugLoc dl = getCurDebugLoc();
+ AtomicOrdering Order = I.getOrdering();
+ SynchronizationScope Scope = I.getSynchScope();
+
+ SDValue InChain = getRoot();
+
+ if (TLI.getInsertFencesForAtomic())
+ InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
+ DAG, TLI);
+
+ SDValue L =
+ DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl,
+ getValue(I.getCompareOperand()).getValueType().getSimpleVT(),
+ InChain,
+ getValue(I.getPointerOperand()),
+ getValue(I.getCompareOperand()),
+ getValue(I.getNewValOperand()),
+ MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */,
+ TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+ Scope);
+
+ SDValue OutChain = L.getValue(1);
+
+ if (TLI.getInsertFencesForAtomic())
+ OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+ DAG, TLI);
+
+ setValue(&I, L);
+ DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
+ DebugLoc dl = getCurDebugLoc();
+ ISD::NodeType NT;
+ switch (I.getOperation()) {
+ default: llvm_unreachable("Unknown atomicrmw operation"); return;
+ case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
+ case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break;
+ case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break;
+ case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break;
+ case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break;
+ case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break;
+ case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break;
+ case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break;
+ case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break;
+ case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
+ case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
+ }
+ AtomicOrdering Order = I.getOrdering();
+ SynchronizationScope Scope = I.getSynchScope();
+
+ SDValue InChain = getRoot();
+
+ if (TLI.getInsertFencesForAtomic())
+ InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
+ DAG, TLI);
+
+ SDValue L =
+ DAG.getAtomic(NT, dl,
+ getValue(I.getValOperand()).getValueType().getSimpleVT(),
+ InChain,
+ getValue(I.getPointerOperand()),
+ getValue(I.getValOperand()),
+ I.getPointerOperand(), 0 /* Alignment */,
+ TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+ Scope);
+
+ SDValue OutChain = L.getValue(1);
+
+ if (TLI.getInsertFencesForAtomic())
+ OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+ DAG, TLI);
+
+ setValue(&I, L);
+ DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitFence(const FenceInst &I) {
+ DebugLoc dl = getCurDebugLoc();
+ SDValue Ops[3];
+ Ops[0] = getRoot();
+ Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy());
+ Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy());
+ DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3));
+}
+
+void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
+ DebugLoc dl = getCurDebugLoc();
+ AtomicOrdering Order = I.getOrdering();
+ SynchronizationScope Scope = I.getSynchScope();
+
+ SDValue InChain = getRoot();
+
+ EVT VT = EVT::getEVT(I.getType());
+
+ if (I.getAlignment() * 8 < VT.getSizeInBits())
+ report_fatal_error("Cannot generate unaligned atomic load");
+
+ SDValue L =
+ DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
+ getValue(I.getPointerOperand()),
+ I.getPointerOperand(), I.getAlignment(),
+ TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+ Scope);
+
+ SDValue OutChain = L.getValue(1);
+
+ if (TLI.getInsertFencesForAtomic())
+ OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+ DAG, TLI);
+
+ setValue(&I, L);
+ DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
+ DebugLoc dl = getCurDebugLoc();
+
+ AtomicOrdering Order = I.getOrdering();
+ SynchronizationScope Scope = I.getSynchScope();
+
+ SDValue InChain = getRoot();
+
+ EVT VT = EVT::getEVT(I.getValueOperand()->getType());
+
+ if (I.getAlignment() * 8 < VT.getSizeInBits())
+ report_fatal_error("Cannot generate unaligned atomic store");
+
+ if (TLI.getInsertFencesForAtomic())
+ InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
+ DAG, TLI);
+
+ SDValue OutChain =
+ DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT,
+ InChain,
+ getValue(I.getPointerOperand()),
+ getValue(I.getValueOperand()),
+ I.getPointerOperand(), I.getAlignment(),
+ TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+ Scope);
+
+ if (TLI.getInsertFencesForAtomic())
+ OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+ DAG, TLI);
+
+ DAG.setRoot(OutChain);
+}
+
/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
/// node.
void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
@@ -3290,7 +3547,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
}
if (!I.getType()->isVoidTy()) {
- if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
+ if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
EVT VT = TLI.getValueType(PTy);
Result = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), VT, Result);
}
@@ -3337,25 +3594,6 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) {
return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);
}
-/// Inlined utility function to implement binary input atomic intrinsics for
-/// visitIntrinsicCall: I is a call instruction
-/// Op is the associated NodeType for I
-const char *
-SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I,
- ISD::NodeType Op) {
- SDValue Root = getRoot();
- SDValue L =
- DAG.getAtomic(Op, getCurDebugLoc(),
- getValue(I.getArgOperand(1)).getValueType().getSimpleVT(),
- Root,
- getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1)),
- I.getArgOperand(0));
- setValue(&I, L);
- DAG.setRoot(L.getValue(1));
- return 0;
-}
-
// implVisitAluOverflow - Lower arithmetic overflow instrinsics.
const char *
SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) {
@@ -4154,17 +4392,12 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
return false;
unsigned Reg = 0;
- if (Arg->hasByValAttr()) {
- // Byval arguments' frame index is recorded during argument lowering.
- // Use this info directly.
- Reg = TRI->getFrameRegister(MF);
- Offset = FuncInfo.getByValArgumentFrameIndex(Arg);
- // If byval argument ofset is not recorded then ignore this.
- if (!Offset)
- Reg = 0;
- }
+ // Some arguments' frame index is recorded during argument lowering.
+ Offset = FuncInfo.getArgumentFrameIndex(Arg);
+ if (Offset)
+ Reg = TRI->getFrameRegister(MF);
- if (N.getNode()) {
+ if (!Reg && N.getNode()) {
if (N.getOpcode() == ISD::CopyFromReg)
Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
else
@@ -4295,7 +4528,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
MDNode *Variable = DI.getVariable();
const Value *Address = DI.getAddress();
- if (!Address || !DIVariable(DI.getVariable()).Verify())
+ if (!Address || !DIVariable(Variable).Verify())
return 0;
// Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder
@@ -4385,7 +4618,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// debug info exists.
++SDNodeOrder;
SDDbgValue *SDV;
- if (isa<ConstantInt>(V) || isa<ConstantFP>(V)) {
+ if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) {
SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, 0, false);
} else {
@@ -4514,9 +4747,24 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
MMI.setCurrentCallSite(CI->getZExtValue());
return 0;
}
+ case Intrinsic::eh_sjlj_functioncontext: {
+ // Get and store the index of the function context.
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ AllocaInst *FnCtx =
+ cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
+ int FI = FuncInfo.StaticAllocaMap[FnCtx];
+ MFI->setFunctionContextIndex(FI);
+ return 0;
+ }
case Intrinsic::eh_sjlj_setjmp: {
- setValue(&I, DAG.getNode(ISD::EH_SJLJ_SETJMP, dl, MVT::i32, getRoot(),
- getValue(I.getArgOperand(0))));
+ SDValue Ops[2];
+ Ops[0] = getRoot();
+ Ops[1] = getValue(I.getArgOperand(0));
+ SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, dl,
+ DAG.getVTList(MVT::i32, MVT::Other),
+ Ops, 2);
+ setValue(&I, Op.getValue(0));
+ DAG.setRoot(Op.getValue(1));
return 0;
}
case Intrinsic::eh_sjlj_longjmp: {
@@ -4778,12 +5026,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
Ops[4] = DAG.getSrcValue(I.getArgOperand(0));
Ops[5] = DAG.getSrcValue(F);
- Res = DAG.getNode(ISD::TRAMPOLINE, dl,
- DAG.getVTList(TLI.getPointerTy(), MVT::Other),
- Ops, 6);
+ Res = DAG.getNode(ISD::INIT_TRAMPOLINE, dl, MVT::Other, Ops, 6);
- setValue(&I, Res);
- DAG.setRoot(Res.getValue(1));
+ DAG.setRoot(Res);
+ return 0;
+ }
+ case Intrinsic::adjust_trampoline: {
+ setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, dl,
+ TLI.getPointerTy(),
+ getValue(I.getArgOperand(0))));
return 0;
}
case Intrinsic::gcroot:
@@ -4857,51 +5108,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
rw==1)); /* write */
return 0;
}
- case Intrinsic::memory_barrier: {
- SDValue Ops[6];
- Ops[0] = getRoot();
- for (int x = 1; x < 6; ++x)
- Ops[x] = getValue(I.getArgOperand(x - 1));
-
- DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6));
- return 0;
- }
- case Intrinsic::atomic_cmp_swap: {
- SDValue Root = getRoot();
- SDValue L =
- DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(),
- getValue(I.getArgOperand(1)).getValueType().getSimpleVT(),
- Root,
- getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1)),
- getValue(I.getArgOperand(2)),
- MachinePointerInfo(I.getArgOperand(0)));
- setValue(&I, L);
- DAG.setRoot(L.getValue(1));
- return 0;
- }
- case Intrinsic::atomic_load_add:
- return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_ADD);
- case Intrinsic::atomic_load_sub:
- return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_SUB);
- case Intrinsic::atomic_load_or:
- return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_OR);
- case Intrinsic::atomic_load_xor:
- return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_XOR);
- case Intrinsic::atomic_load_and:
- return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_AND);
- case Intrinsic::atomic_load_nand:
- return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_NAND);
- case Intrinsic::atomic_load_max:
- return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MAX);
- case Intrinsic::atomic_load_min:
- return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MIN);
- case Intrinsic::atomic_load_umin:
- return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMIN);
- case Intrinsic::atomic_load_umax:
- return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX);
- case Intrinsic::atomic_swap:
- return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP);
case Intrinsic::invariant_start:
case Intrinsic::lifetime_start:
@@ -4918,9 +5124,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
bool isTailCall,
MachineBasicBlock *LandingPad) {
- const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
- const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
- const Type *RetTy = FTy->getReturnType();
+ PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ Type *RetTy = FTy->getReturnType();
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
MCSymbol *BeginLabel = 0;
@@ -4949,7 +5155,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
FTy->getReturnType());
MachineFunction &MF = DAG.getMachineFunction();
DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
- const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
+ Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy());
Entry.Node = DemoteStackSlot;
@@ -4997,6 +5203,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
unsigned CallSiteIndex = MMI.getCurrentCallSite();
if (CallSiteIndex) {
MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
+ LPadToCallSiteMap[LandingPad].push_back(CallSiteIndex);
+
// Now that the call site is handled, stop tracking it.
MMI.setCurrentCallSite(0);
}
@@ -5037,7 +5245,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
// The instruction result is the result of loading from the
// hidden sret parameter.
SmallVector<EVT, 1> PVTs;
- const Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
+ Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
ComputeValueVTs(TLI, PtrRetTy, PVTs);
assert(PVTs.size() == 1 && "Pointers should fit in one register");
@@ -5130,7 +5338,7 @@ static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) {
}
static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
- const Type *LoadTy,
+ Type *LoadTy,
SelectionDAGBuilder &Builder) {
// Check to see if this load can be trivially constant folded, e.g. if the
@@ -5193,7 +5401,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) {
bool ActuallyDoIt = true;
MVT LoadVT;
- const Type *LoadTy;
+ Type *LoadTy;
switch (Size->getZExtValue()) {
default:
LoadVT = MVT::Other;
@@ -5261,14 +5469,14 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
// See if any floating point values are being passed to this function. This is
// used to emit an undefined reference to fltused on Windows.
- const FunctionType *FT =
+ FunctionType *FT =
cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0));
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
if (FT->isVarArg() &&
!MMI.callsExternalVAFunctionWithFloatingPointArguments()) {
for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
- const Type* T = I.getArgOperand(i)->getType();
- for (po_iterator<const Type*> i = po_begin(T), e = po_end(T);
+ Type* T = I.getArgOperand(i)->getType();
+ for (po_iterator<Type*> i = po_begin(T), e = po_end(T);
i != e; ++i) {
if (!i->isFloatingPointTy()) continue;
MMI.setCallsExternalVAFunctionWithFloatingPointArguments(true);
@@ -5412,20 +5620,20 @@ public:
if (isa<BasicBlock>(CallOperandVal))
return TLI.getPointerTy();
- const llvm::Type *OpTy = CallOperandVal->getType();
+ llvm::Type *OpTy = CallOperandVal->getType();
// FIXME: code duplicated from TargetLowering::ParseConstraints().
// If this is an indirect operand, the operand is a pointer to the
// accessed type.
if (isIndirect) {
- const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+ llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
if (!PtrTy)
report_fatal_error("Indirect operand for inline asm not a pointer!");
OpTy = PtrTy->getElementType();
}
// Look for vector wrapped in a struct. e.g. { <16 x i8> }.
- if (const StructType *STy = dyn_cast<StructType>(OpTy))
+ if (StructType *STy = dyn_cast<StructType>(OpTy))
if (STy->getNumElements() == 1)
OpTy = STy->getElementType(0);
@@ -5637,9 +5845,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// The return value of the call is this value. As such, there is no
// corresponding argument.
- assert(!CS.getType()->isVoidTy() &&
- "Bad inline asm!");
- if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
+ assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
+ if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
OpVT = TLI.getValueType(STy->getElementType(ResNo));
} else {
assert(ResNo == 0 && "Asm only has one result!");
@@ -5707,9 +5914,11 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
if (OpInfo.ConstraintVT != Input.ConstraintVT) {
std::pair<unsigned, const TargetRegisterClass*> MatchRC =
- TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT);
+ TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
std::pair<unsigned, const TargetRegisterClass*> InputRC =
- TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT);
+ TLI.getRegForInlineAsmConstraint(Input.ConstraintCode,
+ Input.ConstraintVT);
if ((OpInfo.ConstraintVT.isInteger() !=
Input.ConstraintVT.isInteger()) ||
(MatchRC.second != InputRC.second)) {
@@ -5750,7 +5959,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
} else {
// Otherwise, create a stack slot and emit a store to it before the
// asm.
- const Type *Ty = OpVal->getType();
+ Type *Ty = OpVal->getType();
uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty);
MachineFunction &MF = DAG.getMachineFunction();
@@ -6111,7 +6320,7 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
/// FIXME: When all targets are
/// migrated to using LowerCall, this hook should be integrated into SDISel.
std::pair<SDValue, SDValue>
-TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
+TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
bool RetSExt, bool RetZExt, bool isVarArg,
bool isInreg, unsigned NumFixedArgs,
CallingConv::ID CallConv, bool isTailCall,
@@ -6128,7 +6337,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
for (unsigned Value = 0, NumValues = ValueVTs.size();
Value != NumValues; ++Value) {
EVT VT = ValueVTs[Value];
- const Type *ArgTy = VT.getTypeForEVT(RetTy->getContext());
+ Type *ArgTy = VT.getTypeForEVT(RetTy->getContext());
SDValue Op = SDValue(Args[i].Node.getNode(),
Args[i].Node.getResNo() + Value);
ISD::ArgFlagsTy Flags;
@@ -6145,8 +6354,8 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
Flags.setSRet();
if (Args[i].isByVal) {
Flags.setByVal();
- const PointerType *Ty = cast<PointerType>(Args[i].Ty);
- const Type *ElementTy = Ty->getElementType();
+ PointerType *Ty = cast<PointerType>(Args[i].Ty);
+ Type *ElementTy = Ty->getElementType();
Flags.setByValSize(getTargetData()->getTypeAllocSize(ElementTy));
// For ByVal, alignment should come from FE. BE will guess if this
// info is not there but there are cases it cannot get right.
@@ -6356,7 +6565,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
for (unsigned Value = 0, NumValues = ValueVTs.size();
Value != NumValues; ++Value) {
EVT VT = ValueVTs[Value];
- const Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
+ Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
ISD::ArgFlagsTy Flags;
unsigned OriginalAlignment =
TD->getABITypeAlignment(ArgTy);
@@ -6371,8 +6580,8 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
Flags.setSRet();
if (F.paramHasAttr(Idx, Attribute::ByVal)) {
Flags.setByVal();
- const PointerType *Ty = cast<PointerType>(I->getType());
- const Type *ElementTy = Ty->getElementType();
+ PointerType *Ty = cast<PointerType>(I->getType());
+ Type *ElementTy = Ty->getElementType();
Flags.setByValSize(TD->getTypeAllocSize(ElementTy));
// For ByVal, alignment should be passed from FE. BE will guess if
// this info is not there but there are cases it cannot get right.
@@ -6487,15 +6696,22 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
if (ArgValues.empty())
continue;
- // Note down frame index for byval arguments.
- if (I->hasByValAttr())
- if (FrameIndexSDNode *FI =
- dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
- FuncInfo->setByValArgumentFrameIndex(I, FI->getIndex());
+ // Note down frame index.
+ if (FrameIndexSDNode *FI =
+ dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
+ FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues,
SDB->getCurDebugLoc());
+
SDB->setValue(I, Res);
+ if (!EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
+ if (LoadSDNode *LNode =
+ dyn_cast<LoadSDNode>(Res.getOperand(0).getNode()))
+ if (FrameIndexSDNode *FI =
+ dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
+ FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
+ }
// If this argument is live outside of the entry block, insert a copy from
// wherever we got it to the vreg that other BB's will reference it as.
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index a0884eb..0a21ca3 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -132,10 +132,13 @@ private:
Constant* Low;
Constant* High;
MachineBasicBlock* BB;
+ uint32_t ExtraWeight;
+
+ Case() : Low(0), High(0), BB(0), ExtraWeight(0) { }
+ Case(Constant* low, Constant* high, MachineBasicBlock* bb,
+ uint32_t extraweight) : Low(low), High(high), BB(bb),
+ ExtraWeight(extraweight) { }
- Case() : Low(0), High(0), BB(0) { }
- Case(Constant* low, Constant* high, MachineBasicBlock* bb) :
- Low(low), High(high), BB(bb) { }
APInt size() const {
const APInt &rHigh = cast<ConstantInt>(High)->getValue();
const APInt &rLow = cast<ConstantInt>(Low)->getValue();
@@ -203,20 +206,30 @@ private:
CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs,
const Value *cmpmiddle,
MachineBasicBlock *truebb, MachineBasicBlock *falsebb,
- MachineBasicBlock *me)
+ MachineBasicBlock *me,
+ uint32_t trueweight = 0, uint32_t falseweight = 0)
: CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
- TrueBB(truebb), FalseBB(falsebb), ThisBB(me) {}
+ TrueBB(truebb), FalseBB(falsebb), ThisBB(me),
+ TrueWeight(trueweight), FalseWeight(falseweight) { }
+
// CC - the condition code to use for the case block's setcc node
ISD::CondCode CC;
+
// CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit.
// Emit by default LHS op RHS. MHS is used for range comparisons:
// If MHS is not null: (LHS <= MHS) and (MHS <= RHS).
const Value *CmpLHS, *CmpMHS, *CmpRHS;
+
// TrueBB/FalseBB - the block to branch to if the setcc is true/false.
MachineBasicBlock *TrueBB, *FalseBB;
+
// ThisBB - the block into which to emit the code for the setcc and branches
MachineBasicBlock *ThisBB;
+
+ // TrueWeight/FalseWeight - branch weights.
+ uint32_t TrueWeight, FalseWeight;
};
+
struct JumpTable {
JumpTable(unsigned R, unsigned J, MachineBasicBlock *M,
MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {}
@@ -307,6 +320,9 @@ public:
/// GFI - Garbage collection metadata for the function.
GCFunctionInfo *GFI;
+ /// LPadToCallSiteMap - Map a landing pad to the call site indexes.
+ DenseMap<MachineBasicBlock*, SmallVector<unsigned, 4> > LPadToCallSiteMap;
+
/// HasTailCall - This is set to true if a call in the current
/// block has been translated as a tail call. In this case,
/// no subsequent DAG nodes should be created.
@@ -436,7 +452,8 @@ private:
MachineBasicBlock *SwitchBB);
uint32_t getEdgeWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst);
- void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst);
+ void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
+ uint32_t Weight = 0);
public:
void visitSwitchCase(CaseBlock &CB,
MachineBasicBlock *SwitchBB);
@@ -453,6 +470,7 @@ public:
private:
// These all get lowered before this pass.
void visitInvoke(const InvokeInst &I);
+ void visitResume(const ResumeInst &I);
void visitUnwind(const UnwindInst &I);
void visitBinary(const User &I, unsigned OpCode);
@@ -497,6 +515,7 @@ private:
void visitExtractValue(const ExtractValueInst &I);
void visitInsertValue(const InsertValueInst &I);
+ void visitLandingPad(const LandingPadInst &I);
void visitGetElementPtr(const User &I);
void visitSelect(const User &I);
@@ -504,10 +523,15 @@ private:
void visitAlloca(const AllocaInst &I);
void visitLoad(const LoadInst &I);
void visitStore(const StoreInst &I);
+ void visitAtomicCmpXchg(const AtomicCmpXchgInst &I);
+ void visitAtomicRMW(const AtomicRMWInst &I);
+ void visitFence(const FenceInst &I);
void visitPHI(const PHINode &I);
void visitCall(const CallInst &I);
bool visitMemCmpCall(const CallInst &I);
-
+ void visitAtomicLoad(const LoadInst &I);
+ void visitAtomicStore(const StoreInst &I);
+
void visitInlineAsm(ImmutableCallSite CS);
const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
@@ -531,7 +555,6 @@ private:
llvm_unreachable("UserOp2 should not exist at instruction selection time!");
}
- const char *implVisitBinaryAtomic(const CallInst& I, ISD::NodeType Op);
const char *implVisitAluOverflow(const CallInst &I, ISD::NodeType Op);
void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 87bb296..68b9146 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -177,6 +177,13 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return 0;
}
+void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
+ SDNode *Node) const {
+ assert(!MI->getDesc().hasPostISelHook() &&
+ "If a target marks an instruction with 'hasPostISelHook', "
+ "it must implement TargetLowering::AdjustInstrPostInstrSelection!");
+}
+
//===----------------------------------------------------------------------===//
// SelectionDAGISel code
//===----------------------------------------------------------------------===//
@@ -463,6 +470,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
GroupName = "Instruction Selection and Scheduling";
std::string BlockName;
int BlockNumber = -1;
+ (void)BlockNumber;
#ifdef NDEBUG
if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs ||
ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs ||
@@ -677,21 +685,26 @@ void SelectionDAGISel::DoInstructionSelection() {
/// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and
/// do other setup for EH landing-pad blocks.
void SelectionDAGISel::PrepareEHLandingPad() {
+ MachineBasicBlock *MBB = FuncInfo->MBB;
+
// Add a label to mark the beginning of the landing pad. Deletion of the
// landing pad can thus be detected via the MachineModuleInfo.
- MCSymbol *Label = MF->getMMI().addLandingPad(FuncInfo->MBB);
+ MCSymbol *Label = MF->getMMI().addLandingPad(MBB);
+ // Assign the call site to the landing pad's begin label.
+ MF->getMMI().setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]);
+
const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL);
- BuildMI(*FuncInfo->MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
+ BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
.addSym(Label);
// Mark exception register as live in.
unsigned Reg = TLI.getExceptionAddressRegister();
- if (Reg) FuncInfo->MBB->addLiveIn(Reg);
+ if (Reg) MBB->addLiveIn(Reg);
// Mark exception selector register as live in.
Reg = TLI.getExceptionSelectorRegister();
- if (Reg) FuncInfo->MBB->addLiveIn(Reg);
+ if (Reg) MBB->addLiveIn(Reg);
// FIXME: Hack around an exception handling flaw (PR1508): the personality
// function and list of typeids logically belong to the invoke (or, if you
@@ -704,7 +717,7 @@ void SelectionDAGISel::PrepareEHLandingPad() {
// in exceptions not being caught because no typeids are associated with
// the invoke. This may not be the only way things can go wrong, but it
// is the only way we try to work around for the moment.
- const BasicBlock *LLVMBB = FuncInfo->MBB->getBasicBlock();
+ const BasicBlock *LLVMBB = MBB->getBasicBlock();
const BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());
if (Br && Br->isUnconditional()) { // Critical edge?
@@ -719,8 +732,6 @@ void SelectionDAGISel::PrepareEHLandingPad() {
}
}
-
-
/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified
/// load into the specified FoldInst. Note that we could have a sequence where
/// multiple LLVM IR instructions are folded into the same machineinstr. For
@@ -741,7 +752,7 @@ bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
// isn't one of the folded instructions, then we can't succeed here. Handle
// this by scanning the single-use users of the load until we get to FoldInst.
unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs.
-
+
const Instruction *TheUser = LI->use_back();
while (TheUser != FoldInst && // Scan up until we find FoldInst.
// Stay in the right block.
@@ -750,10 +761,15 @@ bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
// If there are multiple or no uses of this instruction, then bail out.
if (!TheUser->hasOneUse())
return false;
-
+
TheUser = TheUser->use_back();
}
-
+
+ // If we didn't find the fold instruction, then we failed to collapse the
+ // sequence.
+ if (TheUser != FoldInst)
+ return false;
+
// Don't try to fold volatile loads. Target has to deal with alignment
// constraints.
if (LI->isVolatile()) return false;
@@ -802,6 +818,7 @@ static bool isFoldedOrDeadInstruction(const Instruction *I,
return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded.
!isa<TerminatorInst>(I) && // Terminators aren't folded.
!isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded.
+ !isa<LandingPadInst>(I) && // Landingpad instructions aren't folded.
!FuncInfo->isExportedInst(I); // Exported instrs must be computed.
}
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 2626ac3..907d8d9 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -317,7 +317,7 @@ static void InitLibcallNames(const char **Names) {
Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8";
Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1";
Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2";
- Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and-xor_4";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and_xor_4";
Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8";
Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1";
Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2";
@@ -609,6 +609,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
ExceptionPointerRegister = 0;
ExceptionSelectorRegister = 0;
BooleanContents = UndefinedBooleanContent;
+ BooleanVectorContents = UndefinedBooleanContent;
SchedPreferenceInfo = Sched::Latency;
JumpBufSize = 0;
JumpBufAlignment = 0;
@@ -617,6 +618,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
PrefLoopAlignment = 0;
MinStackArgumentAlignment = 1;
ShouldFoldAtomicFences = false;
+ InsertFencesForAtomic = false;
InitLibcallNames(LibcallRoutineNames);
InitCmpLibcallCCs(CmpLibcallCCs);
@@ -914,7 +916,8 @@ const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
}
-MVT::SimpleValueType TargetLowering::getSetCCResultType(EVT VT) const {
+EVT TargetLowering::getSetCCResultType(EVT VT) const {
+ assert(!VT.isVector() && "No default SetCC type for vectors!");
return PointerTy.SimpleTy;
}
@@ -996,7 +999,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
/// type of the given function. This does not require a DAG or a return value,
/// and is suitable for use before any DAGs for the function are constructed.
/// TODO: Move this out of TargetLowering.cpp.
-void llvm::GetReturnInfo(const Type* ReturnType, Attributes attr,
+void llvm::GetReturnInfo(Type* ReturnType, Attributes attr,
SmallVectorImpl<ISD::OutputArg> &Outs,
const TargetLowering &TLI,
SmallVectorImpl<uint64_t> *Offsets) {
@@ -1054,7 +1057,7 @@ void llvm::GetReturnInfo(const Type* ReturnType, Attributes attr,
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. This is the actual
/// alignment, not its logarithm.
-unsigned TargetLowering::getByValTypeAlignment(const Type *Ty) const {
+unsigned TargetLowering::getByValTypeAlignment(Type *Ty) const {
return TD->getCallFrameTypeAlignment(Ty);
}
@@ -1764,17 +1767,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
break;
}
case ISD::AssertZext: {
- // Demand all the bits of the input that are demanded in the output.
- // The low bits are obvious; the high bits are demanded because we're
- // asserting that they're zero here.
- if (SimplifyDemandedBits(Op.getOperand(0), NewMask,
+ // AssertZext demands all of the high bits, plus any of the low bits
+ // demanded by its users.
+ EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ APInt InMask = APInt::getLowBitsSet(BitWidth,
+ VT.getSizeInBits());
+ if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask,
KnownZero, KnownOne, TLO, Depth+1))
return true;
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
- APInt InMask = APInt::getLowBitsSet(BitWidth,
- VT.getSizeInBits());
KnownZero |= ~InMask & NewMask;
break;
}
@@ -2191,7 +2193,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
} else if (N1C->getAPIntValue() == 1 &&
(VT == MVT::i1 ||
- getBooleanContents() == ZeroOrOneBooleanContent)) {
+ getBooleanContents(false) == ZeroOrOneBooleanContent)) {
SDValue Op0 = N0;
if (Op0.getOpcode() == ISD::TRUNCATE)
Op0 = Op0.getOperand(0);
@@ -2758,16 +2760,8 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
// If none of the value types for this register class are valid, we
// can't use it. For example, 64-bit reg classes on 32-bit targets.
- bool isLegal = false;
- for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
- I != E; ++I) {
- if (isTypeLegal(*I)) {
- isLegal = true;
- break;
- }
- }
-
- if (!isLegal) continue;
+ if (!isLegalRC(RC))
+ continue;
for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
I != E; ++I) {
@@ -2840,7 +2834,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
// corresponding argument.
assert(!CS.getType()->isVoidTy() &&
"Bad inline asm!");
- if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
+ if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
OpInfo.ConstraintVT = getValueType(STy->getElementType(ResNo));
} else {
assert(ResNo == 0 && "Asm only has one result!");
@@ -2857,16 +2851,16 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
}
if (OpInfo.CallOperandVal) {
- const llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
+ llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
if (OpInfo.isIndirect) {
- const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+ llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
if (!PtrTy)
report_fatal_error("Indirect operand for inline asm not a pointer!");
OpTy = PtrTy->getElementType();
}
// Look for vector wrapped in a struct. e.g. { <16 x i8> }.
- if (const StructType *STy = dyn_cast<StructType>(OpTy))
+ if (StructType *STy = dyn_cast<StructType>(OpTy))
if (STy->getNumElements() == 1)
OpTy = STy->getElementType(0);
@@ -3187,7 +3181,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
- const Type *Ty) const {
+ Type *Ty) const {
// The default implementation of this implements a conservative RISCy, r+r and
// r+i addr mode.
diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp
index 5a253a4..2609256 100644
--- a/lib/CodeGen/ShadowStackGC.cpp
+++ b/lib/CodeGen/ShadowStackGC.cpp
@@ -61,7 +61,7 @@ namespace {
private:
bool IsNullValue(Value *V);
Constant *GetFrameMap(Function &F);
- const Type* GetConcreteStackEntryType(Function &F);
+ Type* GetConcreteStackEntryType(Function &F);
void CollectRoots(Function &F);
static GetElementPtrInst *CreateGEP(LLVMContext &Context,
IRBuilder<> &B, Value *BasePtr,
@@ -109,13 +109,15 @@ namespace {
State = 1;
case 1:
- // Find all 'return' and 'unwind' instructions.
+ // Find all 'return', 'resume', and 'unwind' instructions.
while (StateBB != StateE) {
BasicBlock *CurBB = StateBB++;
- // Branches and invokes do not escape, only unwind and return do.
+ // Branches and invokes do not escape, only unwind, resume, and return
+ // do.
TerminatorInst *TI = CurBB->getTerminator();
- if (!isa<UnwindInst>(TI) && !isa<ReturnInst>(TI))
+ if (!isa<UnwindInst>(TI) && !isa<ReturnInst>(TI) &&
+ !isa<ResumeInst>(TI))
continue;
Builder.SetInsertPoint(TI->getParent(), TI);
@@ -139,9 +141,19 @@ namespace {
return 0;
// Create a cleanup block.
- BasicBlock *CleanupBB = BasicBlock::Create(F.getContext(),
- CleanupBBName, &F);
- UnwindInst *UI = new UnwindInst(F.getContext(), CleanupBB);
+ LLVMContext &C = F.getContext();
+ BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F);
+ Type *ExnTy = StructType::get(Type::getInt8PtrTy(C),
+ Type::getInt32Ty(C), NULL);
+ Constant *PersFn =
+ F.getParent()->
+ getOrInsertFunction("__gcc_personality_v0",
+ FunctionType::get(Type::getInt32Ty(C), true));
+ LandingPadInst *LPad = LandingPadInst::Create(ExnTy, PersFn, 1,
+ "cleanup.lpad",
+ CleanupBB);
+ LPad->setCleanup(true);
+ ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB);
// Transform the 'call' instructions into 'invoke's branching to the
// cleanup block. Go in reverse order to make prettier BB names.
@@ -172,7 +184,7 @@ namespace {
delete CI;
}
- Builder.SetInsertPoint(UI->getParent(), UI);
+ Builder.SetInsertPoint(RI->getParent(), RI);
return &Builder;
}
}
@@ -190,7 +202,7 @@ ShadowStackGC::ShadowStackGC() : Head(0), StackEntryTy(0) {
Constant *ShadowStackGC::GetFrameMap(Function &F) {
// doInitialization creates the abstract type of this value.
- const Type *VoidPtr = Type::getInt8PtrTy(F.getContext());
+ Type *VoidPtr = Type::getInt8PtrTy(F.getContext());
// Truncate the ShadowStackDescriptor if some metadata is null.
unsigned NumMeta = 0;
@@ -203,7 +215,7 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) {
}
Metadata.resize(NumMeta);
- const Type *Int32Ty = Type::getInt32Ty(F.getContext());
+ Type *Int32Ty = Type::getInt32Ty(F.getContext());
Constant *BaseElts[] = {
ConstantInt::get(Int32Ty, Roots.size(), false),
@@ -216,7 +228,7 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) {
};
Type *EltTys[] = { DescriptorElts[0]->getType(),DescriptorElts[1]->getType()};
- StructType *STy = StructType::createNamed("gc_map."+utostr(NumMeta), EltTys);
+ StructType *STy = StructType::create(EltTys, "gc_map."+utostr(NumMeta));
Constant *FrameMap = ConstantStruct::get(STy, DescriptorElts);
@@ -241,17 +253,17 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) {
ConstantInt::get(Type::getInt32Ty(F.getContext()), 0),
ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)
};
- return ConstantExpr::getGetElementPtr(GV, GEPIndices, 2);
+ return ConstantExpr::getGetElementPtr(GV, GEPIndices);
}
-const Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) {
+Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) {
// doInitialization creates the generic version of this type.
std::vector<Type*> EltTys;
EltTys.push_back(StackEntryTy);
for (size_t I = 0; I != Roots.size(); I++)
EltTys.push_back(Roots[I].second->getAllocatedType());
- return StructType::createNamed("gc_stackentry."+F.getName().str(), EltTys);
+ return StructType::create(EltTys, "gc_stackentry."+F.getName().str());
}
/// doInitialization - If this module uses the GC intrinsics, find them now. If
@@ -267,7 +279,7 @@ bool ShadowStackGC::initializeCustomLowering(Module &M) {
EltTys.push_back(Type::getInt32Ty(M.getContext()));
// Specifies length of variable length array.
EltTys.push_back(Type::getInt32Ty(M.getContext()));
- FrameMapTy = StructType::createNamed("gc_map", EltTys);
+ FrameMapTy = StructType::create(EltTys, "gc_map");
PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy);
// struct StackEntry {
@@ -276,13 +288,13 @@ bool ShadowStackGC::initializeCustomLowering(Module &M) {
// void *Roots[]; // Stack roots (in-place array, so we pretend).
// };
- StackEntryTy = StructType::createNamed(M.getContext(), "gc_stackentry");
+ StackEntryTy = StructType::create(M.getContext(), "gc_stackentry");
EltTys.clear();
EltTys.push_back(PointerType::getUnqual(StackEntryTy));
EltTys.push_back(FrameMapPtrTy);
StackEntryTy->setBody(EltTys);
- const PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy);
+ PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy);
// Get the root chain if it already exists.
Head = M.getGlobalVariable("llvm_gc_root_chain");
@@ -340,7 +352,7 @@ ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr,
Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0),
ConstantInt::get(Type::getInt32Ty(Context), Idx),
ConstantInt::get(Type::getInt32Ty(Context), Idx2) };
- Value* Val = B.CreateGEP(BasePtr, Indices, Indices + 3, Name);
+ Value* Val = B.CreateGEP(BasePtr, Indices, Name);
assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
@@ -352,7 +364,7 @@ ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr,
int Idx, const char *Name) {
Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0),
ConstantInt::get(Type::getInt32Ty(Context), Idx) };
- Value *Val = B.CreateGEP(BasePtr, Indices, Indices + 2, Name);
+ Value *Val = B.CreateGEP(BasePtr, Indices, Name);
assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
@@ -373,7 +385,7 @@ bool ShadowStackGC::performCustomLowering(Function &F) {
// Build the constant map and figure the type of the shadow stack entry.
Value *FrameMap = GetFrameMap(F);
- const Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F);
+ Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F);
// Build the shadow stack entry at the very start of the function.
BasicBlock::iterator IP = F.getEntryBlock().begin();
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 65a33da..ded2459d 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -21,26 +21,31 @@
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include <set>
using namespace llvm;
+static cl::opt<bool> DisableOldSjLjEH("disable-old-sjlj-eh", cl::Hidden,
+ cl::desc("Disable the old SjLj EH preparation pass"));
+
STATISTIC(NumInvokes, "Number of invokes replaced");
STATISTIC(NumUnwinds, "Number of unwinds replaced");
STATISTIC(NumSpilled, "Number of registers live across unwind edges");
namespace {
class SjLjEHPass : public FunctionPass {
-
const TargetLowering *TLI;
-
- const Type *FunctionContextTy;
+ Type *FunctionContextTy;
Constant *RegisterFn;
Constant *UnregisterFn;
Constant *BuiltinSetjmpFn;
@@ -53,8 +58,9 @@ namespace {
Constant *ExceptionFn;
Constant *CallSiteFn;
Constant *DispatchSetupFn;
-
+ Constant *FuncCtxFn;
Value *CallSite;
+ DenseMap<InvokeInst*, BasicBlock*> LPadSuccMap;
public:
static char ID; // Pass identification, replacement for typeid
explicit SjLjEHPass(const TargetLowering *tli = NULL)
@@ -62,16 +68,22 @@ namespace {
bool doInitialization(Module &M);
bool runOnFunction(Function &F);
- virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {}
const char *getPassName() const {
return "SJLJ Exception Handling preparation";
}
private:
+ bool setupEntryBlockAndCallSites(Function &F);
+ Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads);
+ void lowerIncomingArguments(Function &F);
+ void lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst*> Invokes);
+
void insertCallSiteStore(Instruction *I, int Number, Value *CallSite);
void markInvokeCallSite(InvokeInst *II, int InvokeNo, Value *CallSite,
SwitchInst *CatchSwitch);
void splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes);
+ void splitLandingPad(InvokeInst *II);
bool insertSjLjEHSupport(Function &F);
};
} // end anonymous namespace
@@ -116,6 +128,7 @@ bool SjLjEHPass::doInitialization(Module &M) {
CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite);
DispatchSetupFn
= Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_dispatch_setup);
+ FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext);
PersonalityFn = 0;
return true;
@@ -131,6 +144,42 @@ void SjLjEHPass::insertCallSiteStore(Instruction *I, int Number,
new StoreInst(CallSiteNoC, CallSite, true, I); // volatile
}
+/// splitLandingPad - Split a landing pad. This takes considerable care because
+/// of PHIs and other nasties. The problem is that the jump table needs to jump
+/// to the landing pad block. However, the landing pad block can be jumped to
+/// only by an invoke instruction. So we clone the landingpad instruction into
+/// its own basic block, have the invoke jump to there. The landingpad
+/// instruction's basic block's successor is now the target for the jump table.
+///
+/// But because of PHI nodes, we need to create another basic block for the jump
+/// table to jump to. This is definitely a hack, because the values for the PHI
+/// nodes may not be defined on the edge from the jump table. But that's okay,
+/// because the jump table is simply a construct to mimic what is happening in
+/// the CFG. So the values are mysteriously there, even though there is no value
+/// for the PHI from the jump table's edge (hence calling this a hack).
+void SjLjEHPass::splitLandingPad(InvokeInst *II) {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(II->getUnwindDest(), II->getParent(),
+ ".1", ".2", this, NewBBs);
+
+ // Create an empty block so that the jump table has something to jump to
+ // which doesn't have any PHI nodes.
+ BasicBlock *LPad = NewBBs[0];
+ BasicBlock *Succ = *succ_begin(LPad);
+ BasicBlock *JumpTo = BasicBlock::Create(II->getContext(), "jt.land",
+ LPad->getParent(), Succ);
+ LPad->getTerminator()->eraseFromParent();
+ BranchInst::Create(JumpTo, LPad);
+ BranchInst::Create(Succ, JumpTo);
+ LPadSuccMap[II] = JumpTo;
+
+ for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ Value *Val = PN->removeIncomingValue(LPad, false);
+ PN->addIncoming(Val, JumpTo);
+ }
+}
+
/// markInvokeCallSite - Insert code to mark the call_site for this invoke
void SjLjEHPass::markInvokeCallSite(InvokeInst *II, int InvokeNo,
Value *CallSite,
@@ -140,11 +189,15 @@ void SjLjEHPass::markInvokeCallSite(InvokeInst *II, int InvokeNo,
// The runtime comes back to the dispatcher with the call_site - 1 in
// the context. Odd, but there it is.
ConstantInt *SwitchValC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
- InvokeNo - 1);
+ InvokeNo - 1);
// If the unwind edge has phi nodes, split the edge.
if (isa<PHINode>(II->getUnwindDest()->begin())) {
- SplitCriticalEdge(II, 1, this);
+ // FIXME: New EH - This if-condition will be always true in the new scheme.
+ if (II->getUnwindDest()->isLandingPad())
+ splitLandingPad(II);
+ else
+ SplitCriticalEdge(II, 1, this);
// If there are any phi nodes left, they must have a single predecessor.
while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) {
@@ -161,7 +214,12 @@ void SjLjEHPass::markInvokeCallSite(InvokeInst *II, int InvokeNo,
CallInst::Create(CallSiteFn, CallSiteNoC, "", II);
// Add a switch case to our unwind block.
- CatchSwitch->addCase(SwitchValC, II->getUnwindDest());
+ if (BasicBlock *SuccBB = LPadSuccMap[II]) {
+ CatchSwitch->addCase(SwitchValC, SuccBB);
+ } else {
+ CatchSwitch->addCase(SwitchValC, II->getUnwindDest());
+ }
+
// We still want this to look like an invoke so we emit the LSDA properly,
// so we don't transform the invoke into a call here.
}
@@ -187,10 +245,16 @@ splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) {
for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
InvokeInst *II = Invokes[i];
SplitCriticalEdge(II, 0, this);
- SplitCriticalEdge(II, 1, this);
+
+ // FIXME: New EH - This if-condition will be always true in the new scheme.
+ if (II->getUnwindDest()->isLandingPad())
+ splitLandingPad(II);
+ else
+ SplitCriticalEdge(II, 1, this);
+
assert(!isa<PHINode>(II->getNormalDest()) &&
!isa<PHINode>(II->getUnwindDest()) &&
- "critical edge splitting left single entry phi nodes?");
+ "Critical edge splitting left single entry phi nodes?");
}
Function *F = Invokes.back()->getParent()->getParent();
@@ -204,7 +268,7 @@ splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) {
++AfterAllocaInsertPt;
for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
AI != E; ++AI) {
- const Type *Ty = AI->getType();
+ Type *Ty = AI->getType();
// Aggregate types can't be cast, but are legal argument types, so we have
// to handle them differently. We use an extract/insert pair as a
// lightweight method to achieve the same goal.
@@ -283,9 +347,8 @@ splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) {
bool NeedsSpill = false;
for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
- if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
+ if (UnwindBlock != BB && LiveBBs.count(UnwindBlock))
NeedsSpill = true;
- }
}
// If we decided we need a spill, do it.
@@ -299,6 +362,44 @@ splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) {
}
}
+/// CreateLandingPadLoad - Load the exception handling values and insert them
+/// into a structure.
+static Instruction *CreateLandingPadLoad(Function &F, Value *ExnAddr,
+ Value *SelAddr,
+ BasicBlock::iterator InsertPt) {
+ Value *Exn = new LoadInst(ExnAddr, "exn", false,
+ InsertPt);
+ Type *Ty = Type::getInt8PtrTy(F.getContext());
+ Exn = CastInst::Create(Instruction::IntToPtr, Exn, Ty, "", InsertPt);
+ Value *Sel = new LoadInst(SelAddr, "sel", false, InsertPt);
+
+ Ty = StructType::get(Exn->getType(), Sel->getType(), NULL);
+ InsertValueInst *LPadVal = InsertValueInst::Create(llvm::UndefValue::get(Ty),
+ Exn, 0,
+ "lpad.val", InsertPt);
+ return InsertValueInst::Create(LPadVal, Sel, 1, "lpad.val", InsertPt);
+}
+
+/// ReplaceLandingPadVal - Replace the landingpad instruction's value with a
+/// load from the stored values (via CreateLandingPadLoad). This looks through
+/// PHI nodes, and removes them if they are dead.
+static void ReplaceLandingPadVal(Function &F, Instruction *Inst, Value *ExnAddr,
+ Value *SelAddr) {
+ if (Inst->use_empty()) return;
+
+ while (!Inst->use_empty()) {
+ Instruction *I = cast<Instruction>(Inst->use_back());
+
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ ReplaceLandingPadVal(F, PN, ExnAddr, SelAddr);
+ if (PN->use_empty()) PN->eraseFromParent();
+ continue;
+ }
+
+ I->replaceUsesOfWith(Inst, CreateLandingPadLoad(F, ExnAddr, SelAddr, I));
+ }
+}
+
bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
SmallVector<ReturnInst*,16> Returns;
SmallVector<UnwindInst*,16> Unwinds;
@@ -337,10 +438,23 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
SmallVector<CallInst*,16> EH_Exceptions;
SmallVector<Instruction*,16> JmpbufUpdatePoints;
- // Note: Skip the entry block since there's nothing there that interests
- // us. eh.selector and eh.exception shouldn't ever be there, and we
- // want to disregard any allocas that are there.
- for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) {
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ // Note: Skip the entry block since there's nothing there that interests
+ // us. eh.selector and eh.exception shouldn't ever be there, and we
+ // want to disregard any allocas that are there.
+ //
+ // FIXME: This is awkward. The new EH scheme won't need to skip the entry
+ // block.
+ if (BB == F.begin()) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(F.begin()->getTerminator())) {
+ // FIXME: This will be always non-NULL in the new EH.
+ if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst())
+ if (!PersonalityFn) PersonalityFn = LPI->getPersonalityFn();
+ }
+
+ continue;
+ }
+
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
if (CallInst *CI = dyn_cast<CallInst>(I)) {
if (CI->getCalledFunction() == SelectorFn) {
@@ -353,6 +467,10 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
}
} else if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
JmpbufUpdatePoints.push_back(AI);
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(I)) {
+ // FIXME: This will be always non-NULL in the new EH.
+ if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst())
+ if (!PersonalityFn) PersonalityFn = LPI->getPersonalityFn();
}
}
}
@@ -371,6 +489,16 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
// invoke's.
splitLiveRangesAcrossInvokes(Invokes);
+
+ SmallVector<LandingPadInst*, 16> LandingPads;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
+ // FIXME: This will be always non-NULL in the new EH.
+ if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst())
+ LandingPads.push_back(LPI);
+ }
+
+
BasicBlock *EntryBB = F.begin();
// Create an alloca for the incoming jump buffer ptr and the new jump buffer
// that needs to be restored on all exits from the function. This is an
@@ -381,27 +509,25 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
"fcn_context", F.begin()->begin());
Value *Idxs[2];
- const Type *Int32Ty = Type::getInt32Ty(F.getContext());
+ Type *Int32Ty = Type::getInt32Ty(F.getContext());
Value *Zero = ConstantInt::get(Int32Ty, 0);
// We need to also keep around a reference to the call_site field
Idxs[0] = Zero;
Idxs[1] = ConstantInt::get(Int32Ty, 1);
- CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
- "call_site",
+ CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, "call_site",
EntryBB->getTerminator());
// The exception selector comes back in context->data[1]
Idxs[1] = ConstantInt::get(Int32Ty, 2);
- Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
- "fc_data",
+ Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, "fc_data",
EntryBB->getTerminator());
Idxs[1] = ConstantInt::get(Int32Ty, 1);
- Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
+ Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs,
"exc_selector_gep",
EntryBB->getTerminator());
// The exception value comes back in context->data[0]
Idxs[1] = Zero;
- Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
+ Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs,
"exception_gep",
EntryBB->getTerminator());
@@ -423,13 +549,16 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
// instruction hasn't already been removed.
if (!I->getParent()) continue;
Value *Val = new LoadInst(ExceptionAddr, "exception", true, I);
- const Type *Ty = Type::getInt8PtrTy(F.getContext());
+ Type *Ty = Type::getInt8PtrTy(F.getContext());
Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I);
I->replaceAllUsesWith(Val);
I->eraseFromParent();
}
+ for (unsigned i = 0, e = LandingPads.size(); i != e; ++i)
+ ReplaceLandingPadVal(F, LandingPads[i], ExceptionAddr, SelectorAddr);
+
// The entry block changes to have the eh.sjlj.setjmp, with a conditional
// branch to a dispatch block for non-zero returns. If we return normally,
// we're not handling an exception and just register the function context and
@@ -466,8 +595,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
Idxs[0] = Zero;
Idxs[1] = ConstantInt::get(Int32Ty, 4);
Value *LSDAFieldPtr =
- GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
- "lsda_gep",
+ GetElementPtrInst::Create(FunctionContext, Idxs, "lsda_gep",
EntryBB->getTerminator());
Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr",
EntryBB->getTerminator());
@@ -475,8 +603,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
Idxs[1] = ConstantInt::get(Int32Ty, 3);
Value *PersonalityFieldPtr =
- GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
- "lsda_gep",
+ GetElementPtrInst::Create(FunctionContext, Idxs, "lsda_gep",
EntryBB->getTerminator());
new StoreInst(PersonalityFn, PersonalityFieldPtr, true,
EntryBB->getTerminator());
@@ -484,12 +611,11 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
// Save the frame pointer.
Idxs[1] = ConstantInt::get(Int32Ty, 5);
Value *JBufPtr
- = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
- "jbuf_gep",
+ = GetElementPtrInst::Create(FunctionContext, Idxs, "jbuf_gep",
EntryBB->getTerminator());
Idxs[1] = ConstantInt::get(Int32Ty, 0);
Value *FramePtr =
- GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_fp_gep",
+ GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_fp_gep",
EntryBB->getTerminator());
Value *Val = CallInst::Create(FrameAddrFn,
@@ -501,7 +627,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
// Save the stack pointer.
Idxs[1] = ConstantInt::get(Int32Ty, 2);
Value *StackPtr =
- GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_sp_gep",
+ GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_sp_gep",
EntryBB->getTerminator());
Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator());
@@ -513,7 +639,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
Type::getInt8PtrTy(F.getContext()), "",
EntryBB->getTerminator());
Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg,
- "dispatch",
+ "",
EntryBB->getTerminator());
// Add a call to dispatch_setup after the setjmp call. This is expanded to any
@@ -554,6 +680,8 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
if (Callee != SelectorFn && Callee != ExceptionFn
&& !CI->doesNotThrow())
insertCallSiteStore(CI, -1, CallSite);
+ } else if (ResumeInst *RI = dyn_cast<ResumeInst>(I)) {
+ insertCallSiteStore(RI, -1, CallSite);
}
}
@@ -582,7 +710,317 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
return true;
}
+/// setupFunctionContext - Allocate the function context on the stack and fill
+/// it with all of the data that we know at this point.
+Value *SjLjEHPass::
+setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
+ BasicBlock *EntryBB = F.begin();
+
+ // Create an alloca for the incoming jump buffer ptr and the new jump buffer
+ // that needs to be restored on all exits from the function. This is an alloca
+ // because the value needs to be added to the global context list.
+ unsigned Align =
+ TLI->getTargetData()->getPrefTypeAlignment(FunctionContextTy);
+ AllocaInst *FuncCtx =
+ new AllocaInst(FunctionContextTy, 0, Align, "fn_context", EntryBB->begin());
+
+ // Fill in the function context structure.
+ Value *Idxs[2];
+ Type *Int32Ty = Type::getInt32Ty(F.getContext());
+ Value *Zero = ConstantInt::get(Int32Ty, 0);
+ Value *One = ConstantInt::get(Int32Ty, 1);
+
+ // Keep around a reference to the call_site field.
+ Idxs[0] = Zero;
+ Idxs[1] = One;
+ CallSite = GetElementPtrInst::Create(FuncCtx, Idxs, "call_site",
+ EntryBB->getTerminator());
+
+ // Reference the __data field.
+ Idxs[1] = ConstantInt::get(Int32Ty, 2);
+ Value *FCData = GetElementPtrInst::Create(FuncCtx, Idxs, "__data",
+ EntryBB->getTerminator());
+
+ // The exception value comes back in context->__data[0].
+ Idxs[1] = Zero;
+ Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs,
+ "exception_gep",
+ EntryBB->getTerminator());
+
+ // The exception selector comes back in context->__data[1].
+ Idxs[1] = One;
+ Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs,
+ "exn_selector_gep",
+ EntryBB->getTerminator());
+
+ for (unsigned I = 0, E = LPads.size(); I != E; ++I) {
+ LandingPadInst *LPI = LPads[I];
+ IRBuilder<> Builder(LPI->getParent()->getFirstInsertionPt());
+
+ Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val");
+ ExnVal = Builder.CreateIntToPtr(ExnVal, Type::getInt8PtrTy(F.getContext()));
+ Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val");
+
+ Type *LPadType = LPI->getType();
+ Value *LPadVal = UndefValue::get(LPadType);
+ LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val");
+ LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val");
+
+ LPI->replaceAllUsesWith(LPadVal);
+ }
+
+ // Personality function
+ Idxs[1] = ConstantInt::get(Int32Ty, 3);
+ if (!PersonalityFn)
+ PersonalityFn = LPads[0]->getPersonalityFn();
+ Value *PersonalityFieldPtr =
+ GetElementPtrInst::Create(FuncCtx, Idxs, "pers_fn_gep",
+ EntryBB->getTerminator());
+ new StoreInst(PersonalityFn, PersonalityFieldPtr, true,
+ EntryBB->getTerminator());
+
+ // LSDA address
+ Idxs[1] = ConstantInt::get(Int32Ty, 4);
+ Value *LSDAFieldPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "lsda_gep",
+ EntryBB->getTerminator());
+ Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr",
+ EntryBB->getTerminator());
+ new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator());
+
+ return FuncCtx;
+}
+
+/// lowerIncomingArguments - To avoid having to handle incoming arguments
+/// specially, we lower each arg to a copy instruction in the entry block. This
+/// ensures that the argument value itself cannot be live out of the entry
+/// block.
+void SjLjEHPass::lowerIncomingArguments(Function &F) {
+ BasicBlock::iterator AfterAllocaInsPt = F.begin()->begin();
+ while (isa<AllocaInst>(AfterAllocaInsPt) &&
+ isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsPt)->getArraySize()))
+ ++AfterAllocaInsPt;
+
+ for (Function::arg_iterator
+ AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) {
+ Type *Ty = AI->getType();
+
+ // Aggregate types can't be cast, but are legal argument types, so we have
+ // to handle them differently. We use an extract/insert pair as a
+ // lightweight method to achieve the same goal.
+ if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
+ Instruction *EI = ExtractValueInst::Create(AI, 0, "", AfterAllocaInsPt);
+ Instruction *NI = InsertValueInst::Create(AI, EI, 0);
+ NI->insertAfter(EI);
+ AI->replaceAllUsesWith(NI);
+
+ // Set the operand of the instructions back to the AllocaInst.
+ EI->setOperand(0, AI);
+ NI->setOperand(0, AI);
+ } else {
+ // This is always a no-op cast because we're casting AI to AI->getType()
+ // so src and destination types are identical. BitCast is the only
+ // possibility.
+ CastInst *NC =
+ new BitCastInst(AI, AI->getType(), AI->getName() + ".tmp",
+ AfterAllocaInsPt);
+ AI->replaceAllUsesWith(NC);
+
+ // Set the operand of the cast instruction back to the AllocaInst.
+ // Normally it's forbidden to replace a CastInst's operand because it
+ // could cause the opcode to reflect an illegal conversion. However, we're
+ // replacing it here with the same value it was constructed with. We do
+ // this because the above replaceAllUsesWith() clobbered the operand, but
+ // we want this one to remain.
+ NC->setOperand(0, AI);
+ }
+ }
+}
+
+/// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind
+/// edge and spill them.
+void SjLjEHPass::lowerAcrossUnwindEdges(Function &F,
+ ArrayRef<InvokeInst*> Invokes) {
+ // Finally, scan the code looking for instructions with bad live ranges.
+ for (Function::iterator
+ BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
+ for (BasicBlock::iterator
+ II = BB->begin(), IIE = BB->end(); II != IIE; ++II) {
+ // Ignore obvious cases we don't have to handle. In particular, most
+ // instructions either have no uses or only have a single use inside the
+ // current block. Ignore them quickly.
+ Instruction *Inst = II;
+ if (Inst->use_empty()) continue;
+ if (Inst->hasOneUse() &&
+ cast<Instruction>(Inst->use_back())->getParent() == BB &&
+ !isa<PHINode>(Inst->use_back())) continue;
+
+ // If this is an alloca in the entry block, it's not a real register
+ // value.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
+ if (isa<ConstantInt>(AI->getArraySize()) && BB == F.begin())
+ continue;
+
+ // Avoid iterator invalidation by copying users to a temporary vector.
+ SmallVector<Instruction*, 16> Users;
+ for (Value::use_iterator
+ UI = Inst->use_begin(), E = Inst->use_end(); UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (User->getParent() != BB || isa<PHINode>(User))
+ Users.push_back(User);
+ }
+
+ // Find all of the blocks that this value is live in.
+ std::set<BasicBlock*> LiveBBs;
+ LiveBBs.insert(Inst->getParent());
+ while (!Users.empty()) {
+ Instruction *U = Users.back();
+ Users.pop_back();
+
+ if (!isa<PHINode>(U)) {
+ MarkBlocksLiveIn(U->getParent(), LiveBBs);
+ } else {
+ // Uses for a PHI node occur in their predecessor block.
+ PHINode *PN = cast<PHINode>(U);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == Inst)
+ MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
+ }
+ }
+
+ // Now that we know all of the blocks that this thing is live in, see if
+ // it includes any of the unwind locations.
+ bool NeedsSpill = false;
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+ BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
+ if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
+ NeedsSpill = true;
+ }
+ }
+
+ // If we decided we need a spill, do it.
+ // FIXME: Spilling this way is overkill, as it forces all uses of
+ // the value to be reloaded from the stack slot, even those that aren't
+ // in the unwind blocks. We should be more selective.
+ if (NeedsSpill) {
+ ++NumSpilled;
+ DemoteRegToStack(*Inst, true);
+ }
+ }
+ }
+}
+
+/// setupEntryBlockAndCallSites - Setup the entry block by creating and filling
+/// the function context and marking the call sites with the appropriate
+/// values. These values are used by the DWARF EH emitter.
+bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) {
+ SmallVector<ReturnInst*, 16> Returns;
+ SmallVector<InvokeInst*, 16> Invokes;
+ SmallVector<LandingPadInst*, 16> LPads;
+
+ // Look through the terminators of the basic blocks to find invokes.
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ Invokes.push_back(II);
+ LPads.push_back(II->getUnwindDest()->getLandingPadInst());
+ } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ Returns.push_back(RI);
+ }
+
+ if (Invokes.empty()) return false;
+
+ lowerIncomingArguments(F);
+ lowerAcrossUnwindEdges(F, Invokes);
+
+ Value *FuncCtx = setupFunctionContext(F, LPads);
+ BasicBlock *EntryBB = F.begin();
+ Type *Int32Ty = Type::getInt32Ty(F.getContext());
+
+ Value *Idxs[2] = {
+ ConstantInt::get(Int32Ty, 0), 0
+ };
+
+ // Get a reference to the jump buffer.
+ Idxs[1] = ConstantInt::get(Int32Ty, 5);
+ Value *JBufPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "jbuf_gep",
+ EntryBB->getTerminator());
+
+ // Save the frame pointer.
+ Idxs[1] = ConstantInt::get(Int32Ty, 0);
+ Value *FramePtr = GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_fp_gep",
+ EntryBB->getTerminator());
+
+ Value *Val = CallInst::Create(FrameAddrFn,
+ ConstantInt::get(Int32Ty, 0),
+ "fp",
+ EntryBB->getTerminator());
+ new StoreInst(Val, FramePtr, true, EntryBB->getTerminator());
+
+ // Save the stack pointer.
+ Idxs[1] = ConstantInt::get(Int32Ty, 2);
+ Value *StackPtr = GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_sp_gep",
+ EntryBB->getTerminator());
+
+ Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator());
+ new StoreInst(Val, StackPtr, true, EntryBB->getTerminator());
+
+ // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
+ Value *SetjmpArg = CastInst::Create(Instruction::BitCast, JBufPtr,
+ Type::getInt8PtrTy(F.getContext()), "",
+ EntryBB->getTerminator());
+ CallInst::Create(BuiltinSetjmpFn, SetjmpArg, "", EntryBB->getTerminator());
+
+ // Store a pointer to the function context so that the back-end will know
+ // where to look for it.
+ Value *FuncCtxArg = CastInst::Create(Instruction::BitCast, FuncCtx,
+ Type::getInt8PtrTy(F.getContext()), "",
+ EntryBB->getTerminator());
+ CallInst::Create(FuncCtxFn, FuncCtxArg, "", EntryBB->getTerminator());
+
+ // At this point, we are all set up, update the invoke instructions to mark
+ // their call_site values.
+ for (unsigned I = 0, E = Invokes.size(); I != E; ++I) {
+ insertCallSiteStore(Invokes[I], I + 1, CallSite);
+
+ ConstantInt *CallSiteNum =
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1);
+
+ // Record the call site value for the back end so it stays associated with
+ // the invoke.
+ CallInst::Create(CallSiteFn, CallSiteNum, "", Invokes[I]);
+ }
+
+ // Mark call instructions that aren't nounwind as no-action (call_site ==
+ // -1). Skip the entry block, as prior to then, no function context has been
+ // created for this function and any unexpected exceptions thrown will go
+ // directly to the caller's context, which is what we want anyway, so no need
+ // to do anything here.
+ for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;)
+ for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (!CI->doesNotThrow())
+ insertCallSiteStore(CI, -1, CallSite);
+ } else if (ResumeInst *RI = dyn_cast<ResumeInst>(I)) {
+ insertCallSiteStore(RI, -1, CallSite);
+ }
+
+ // Register the function context and make sure it's known to not throw
+ CallInst *Register = CallInst::Create(RegisterFn, FuncCtx, "",
+ EntryBB->getTerminator());
+ Register->setDoesNotThrow();
+
+ // Finally, for any returns from this function, if this function contains an
+ // invoke, add a call to unregister the function context.
+ for (unsigned I = 0, E = Returns.size(); I != E; ++I)
+ CallInst::Create(UnregisterFn, FuncCtx, "", Returns[I]);
+
+ return true;
+}
+
bool SjLjEHPass::runOnFunction(Function &F) {
- bool Res = insertSjLjEHSupport(F);
+ bool Res = false;
+ if (!DisableOldSjLjEH)
+ Res = insertSjLjEHSupport(F);
+ else
+ Res = setupEntryBlockAndCallSites(F);
return Res;
}
diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp
index 6949618..6f33f54 100644
--- a/lib/CodeGen/SpillPlacement.cpp
+++ b/lib/CodeGen/SpillPlacement.cpp
@@ -220,6 +220,7 @@ void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) {
0, // DontCare,
1, // PrefReg,
-1, // PrefSpill
+ 0, // PrefBoth
-HUGE_VALF // MustSpill
};
@@ -239,6 +240,22 @@ void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) {
}
}
+/// addPrefSpill - Same as addConstraints(PrefSpill)
+void SpillPlacement::addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong) {
+ for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end();
+ I != E; ++I) {
+ float Freq = getBlockFrequency(*I);
+ if (Strong)
+ Freq += Freq;
+ unsigned ib = bundles->getBundle(*I, 0);
+ unsigned ob = bundles->getBundle(*I, 1);
+ activate(ib);
+ activate(ob);
+ nodes[ib].addBias(-Freq, 1);
+ nodes[ob].addBias(-Freq, 0);
+ }
+}
+
void SpillPlacement::addLinks(ArrayRef<unsigned> Links) {
for (ArrayRef<unsigned>::iterator I = Links.begin(), E = Links.end(); I != E;
++I) {
diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h
index 6952ad8..fc412f8 100644
--- a/lib/CodeGen/SpillPlacement.h
+++ b/lib/CodeGen/SpillPlacement.h
@@ -71,6 +71,7 @@ public:
DontCare, ///< Block doesn't care / variable not live.
PrefReg, ///< Block entry/exit prefers a register.
PrefSpill, ///< Block entry/exit prefers a stack slot.
+ PrefBoth, ///< Block entry prefers both register and stack.
MustSpill ///< A register is impossible, variable must be spilled.
};
@@ -79,6 +80,11 @@ public:
unsigned Number; ///< Basic block number (from MBB::getNumber()).
BorderConstraint Entry : 8; ///< Constraint on block entry.
BorderConstraint Exit : 8; ///< Constraint on block exit.
+
+ /// True when this block changes the value of the live range. This means
+ /// the block has a non-PHI def. When this is false, a live-in value on
+ /// the stack can be live-out on the stack without inserting a spill.
+ bool ChangesValue;
};
/// prepare - Reset state and prepare for a new spill placement computation.
@@ -96,6 +102,14 @@ public:
/// live out.
void addConstraints(ArrayRef<BlockConstraint> LiveBlocks);
+ /// addPrefSpill - Add PrefSpill constraints to all blocks listed. This is
+ /// equivalent to calling addConstraint with identical BlockConstraints with
+ /// Entry = Exit = PrefSpill, and ChangesValue = false.
+ ///
+ /// @param Blocks Array of block numbers that prefer to spill in and out.
+ /// @param Strong When true, double the negative bias for these blocks.
+ void addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong);
+
/// addLinks - Add transparent blocks with the given numbers.
void addLinks(ArrayRef<unsigned> Links);
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 761cab7..6362780 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -178,45 +179,55 @@ bool SplitAnalysis::calcLiveBlockInfo() {
return false;
} else {
// This block has uses. Find the first and last uses in the block.
- BI.FirstUse = *UseI;
- assert(BI.FirstUse >= Start);
+ BI.FirstInstr = *UseI;
+ assert(BI.FirstInstr >= Start);
do ++UseI;
while (UseI != UseE && *UseI < Stop);
- BI.LastUse = UseI[-1];
- assert(BI.LastUse < Stop);
+ BI.LastInstr = UseI[-1];
+ assert(BI.LastInstr < Stop);
// LVI is the first live segment overlapping MBB.
BI.LiveIn = LVI->start <= Start;
+ // When not live in, the first use should be a def.
+ if (!BI.LiveIn) {
+ assert(LVI->start == LVI->valno->def && "Dangling LiveRange start");
+ assert(LVI->start == BI.FirstInstr && "First instr should be a def");
+ BI.FirstDef = BI.FirstInstr;
+ }
+
// Look for gaps in the live range.
BI.LiveOut = true;
while (LVI->end < Stop) {
SlotIndex LastStop = LVI->end;
if (++LVI == LVE || LVI->start >= Stop) {
BI.LiveOut = false;
- BI.LastUse = LastStop;
+ BI.LastInstr = LastStop;
break;
}
+
if (LastStop < LVI->start) {
// There is a gap in the live range. Create duplicate entries for the
// live-in snippet and the live-out snippet.
++NumGapBlocks;
// Push the Live-in part.
- BI.LiveThrough = false;
BI.LiveOut = false;
UseBlocks.push_back(BI);
- UseBlocks.back().LastUse = LastStop;
+ UseBlocks.back().LastInstr = LastStop;
// Set up BI for the live-out part.
BI.LiveIn = false;
BI.LiveOut = true;
- BI.FirstUse = LVI->start;
+ BI.FirstInstr = BI.FirstDef = LVI->start;
}
+
+ // A LiveRange that starts in the middle of the block must be a def.
+ assert(LVI->start == LVI->valno->def && "Dangling LiveRange start");
+ if (!BI.FirstDef)
+ BI.FirstDef = LVI->start;
}
- // Don't set LiveThrough when the block has a gap.
- BI.LiveThrough = BI.LiveIn && BI.LiveOut;
UseBlocks.push_back(BI);
// LVI is now at LVE or LVI->end >= Stop.
@@ -299,17 +310,21 @@ SplitEditor::SplitEditor(SplitAnalysis &sa,
TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()),
Edit(0),
OpenIdx(0),
+ SpillMode(SM_Partition),
RegAssign(Allocator)
{}
-void SplitEditor::reset(LiveRangeEdit &lre) {
- Edit = &lre;
+void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) {
+ Edit = &LRE;
+ SpillMode = SM;
OpenIdx = 0;
RegAssign.clear();
Values.clear();
- // We don't need to clear LiveOutCache, only LiveOutSeen entries are read.
- LiveOutSeen.clear();
+ // Reset the LiveRangeCalc instances needed for this spill mode.
+ LRCalc[0].reset(&VRM.getMachineFunction());
+ if (SpillMode)
+ LRCalc[1].reset(&VRM.getMachineFunction());
// We don't need an AliasAnalysis since we will only be performing
// cheap-as-a-copy remats anyway.
@@ -340,7 +355,8 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx,
// Use insert for lookup, so we can add missing values with a second lookup.
std::pair<ValueMap::iterator, bool> InsP =
- Values.insert(std::make_pair(std::make_pair(RegIdx, ParentVNI->id), VNI));
+ Values.insert(std::make_pair(std::make_pair(RegIdx, ParentVNI->id),
+ ValueForcePair(VNI, false)));
// This was the first time (RegIdx, ParentVNI) was mapped.
// Keep it as a simple def without any liveness.
@@ -348,11 +364,11 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx,
return VNI;
// If the previous value was a simple mapping, add liveness for it now.
- if (VNInfo *OldVNI = InsP.first->second) {
+ if (VNInfo *OldVNI = InsP.first->second.getPointer()) {
SlotIndex Def = OldVNI->def;
LI->addRange(LiveRange(Def, Def.getNextSlot(), OldVNI));
- // No longer a simple mapping.
- InsP.first->second = 0;
+ // No longer a simple mapping. Switch to a complex, non-forced mapping.
+ InsP.first->second = ValueForcePair();
}
// This is a complex mapping, add liveness for VNI
@@ -362,230 +378,24 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx,
return VNI;
}
-void SplitEditor::markComplexMapped(unsigned RegIdx, const VNInfo *ParentVNI) {
+void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) {
assert(ParentVNI && "Mapping NULL value");
- VNInfo *&VNI = Values[std::make_pair(RegIdx, ParentVNI->id)];
+ ValueForcePair &VFP = Values[std::make_pair(RegIdx, ParentVNI->id)];
+ VNInfo *VNI = VFP.getPointer();
- // ParentVNI was either unmapped or already complex mapped. Either way.
- if (!VNI)
+ // ParentVNI was either unmapped or already complex mapped. Either way, just
+ // set the force bit.
+ if (!VNI) {
+ VFP.setInt(true);
return;
+ }
// This was previously a single mapping. Make sure the old def is represented
// by a trivial live range.
SlotIndex Def = VNI->def;
Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getNextSlot(), VNI));
- VNI = 0;
-}
-
-// extendRange - Extend the live range to reach Idx.
-// Potentially create phi-def values.
-void SplitEditor::extendRange(unsigned RegIdx, SlotIndex Idx) {
- assert(Idx.isValid() && "Invalid SlotIndex");
- MachineBasicBlock *IdxMBB = LIS.getMBBFromIndex(Idx);
- assert(IdxMBB && "No MBB at Idx");
- LiveInterval *LI = Edit->get(RegIdx);
-
- // Is there a def in the same MBB we can extend?
- if (LI->extendInBlock(LIS.getMBBStartIdx(IdxMBB), Idx))
- return;
-
- // Now for the fun part. We know that ParentVNI potentially has multiple defs,
- // and we may need to create even more phi-defs to preserve VNInfo SSA form.
- // Perform a search for all predecessor blocks where we know the dominating
- // VNInfo.
- VNInfo *VNI = findReachingDefs(LI, IdxMBB, Idx.getNextSlot());
-
- // When there were multiple different values, we may need new PHIs.
- if (!VNI)
- return updateSSA();
-
- // Poor man's SSA update for the single-value case.
- LiveOutPair LOP(VNI, MDT[LIS.getMBBFromIndex(VNI->def)]);
- for (SmallVectorImpl<LiveInBlock>::iterator I = LiveInBlocks.begin(),
- E = LiveInBlocks.end(); I != E; ++I) {
- MachineBasicBlock *MBB = I->DomNode->getBlock();
- SlotIndex Start = LIS.getMBBStartIdx(MBB);
- if (I->Kill.isValid())
- LI->addRange(LiveRange(Start, I->Kill, VNI));
- else {
- LiveOutCache[MBB] = LOP;
- LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI));
- }
- }
-}
-
-/// findReachingDefs - Search the CFG for known live-out values.
-/// Add required live-in blocks to LiveInBlocks.
-VNInfo *SplitEditor::findReachingDefs(LiveInterval *LI,
- MachineBasicBlock *KillMBB,
- SlotIndex Kill) {
- // Initialize the live-out cache the first time it is needed.
- if (LiveOutSeen.empty()) {
- unsigned N = VRM.getMachineFunction().getNumBlockIDs();
- LiveOutSeen.resize(N);
- LiveOutCache.resize(N);
- }
-
- // Blocks where LI should be live-in.
- SmallVector<MachineBasicBlock*, 16> WorkList(1, KillMBB);
-
- // Remember if we have seen more than one value.
- bool UniqueVNI = true;
- VNInfo *TheVNI = 0;
-
- // Using LiveOutCache as a visited set, perform a BFS for all reaching defs.
- for (unsigned i = 0; i != WorkList.size(); ++i) {
- MachineBasicBlock *MBB = WorkList[i];
- assert(!MBB->pred_empty() && "Value live-in to entry block?");
- for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
- PE = MBB->pred_end(); PI != PE; ++PI) {
- MachineBasicBlock *Pred = *PI;
- LiveOutPair &LOP = LiveOutCache[Pred];
-
- // Is this a known live-out block?
- if (LiveOutSeen.test(Pred->getNumber())) {
- if (VNInfo *VNI = LOP.first) {
- if (TheVNI && TheVNI != VNI)
- UniqueVNI = false;
- TheVNI = VNI;
- }
- continue;
- }
-
- // First time. LOP is garbage and must be cleared below.
- LiveOutSeen.set(Pred->getNumber());
-
- // Does Pred provide a live-out value?
- SlotIndex Start, Last;
- tie(Start, Last) = LIS.getSlotIndexes()->getMBBRange(Pred);
- Last = Last.getPrevSlot();
- VNInfo *VNI = LI->extendInBlock(Start, Last);
- LOP.first = VNI;
- if (VNI) {
- LOP.second = MDT[LIS.getMBBFromIndex(VNI->def)];
- if (TheVNI && TheVNI != VNI)
- UniqueVNI = false;
- TheVNI = VNI;
- continue;
- }
- LOP.second = 0;
-
- // No, we need a live-in value for Pred as well
- if (Pred != KillMBB)
- WorkList.push_back(Pred);
- else
- // Loopback to KillMBB, so value is really live through.
- Kill = SlotIndex();
- }
- }
-
- // Transfer WorkList to LiveInBlocks in reverse order.
- // This ordering works best with updateSSA().
- LiveInBlocks.clear();
- LiveInBlocks.reserve(WorkList.size());
- while(!WorkList.empty())
- LiveInBlocks.push_back(MDT[WorkList.pop_back_val()]);
-
- // The kill block may not be live-through.
- assert(LiveInBlocks.back().DomNode->getBlock() == KillMBB);
- LiveInBlocks.back().Kill = Kill;
-
- return UniqueVNI ? TheVNI : 0;
-}
-
-void SplitEditor::updateSSA() {
- // This is essentially the same iterative algorithm that SSAUpdater uses,
- // except we already have a dominator tree, so we don't have to recompute it.
- unsigned Changes;
- do {
- Changes = 0;
- // Propagate live-out values down the dominator tree, inserting phi-defs
- // when necessary.
- for (SmallVectorImpl<LiveInBlock>::iterator I = LiveInBlocks.begin(),
- E = LiveInBlocks.end(); I != E; ++I) {
- MachineDomTreeNode *Node = I->DomNode;
- // Skip block if the live-in value has already been determined.
- if (!Node)
- continue;
- MachineBasicBlock *MBB = Node->getBlock();
- MachineDomTreeNode *IDom = Node->getIDom();
- LiveOutPair IDomValue;
-
- // We need a live-in value to a block with no immediate dominator?
- // This is probably an unreachable block that has survived somehow.
- bool needPHI = !IDom || !LiveOutSeen.test(IDom->getBlock()->getNumber());
-
- // IDom dominates all of our predecessors, but it may not be their
- // immediate dominator. Check if any of them have live-out values that are
- // properly dominated by IDom. If so, we need a phi-def here.
- if (!needPHI) {
- IDomValue = LiveOutCache[IDom->getBlock()];
- for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
- PE = MBB->pred_end(); PI != PE; ++PI) {
- LiveOutPair Value = LiveOutCache[*PI];
- if (!Value.first || Value.first == IDomValue.first)
- continue;
- // This predecessor is carrying something other than IDomValue.
- // It could be because IDomValue hasn't propagated yet, or it could be
- // because MBB is in the dominance frontier of that value.
- if (MDT.dominates(IDom, Value.second)) {
- needPHI = true;
- break;
- }
- }
- }
-
- // The value may be live-through even if Kill is set, as can happen when
- // we are called from extendRange. In that case LiveOutSeen is true, and
- // LiveOutCache indicates a foreign or missing value.
- LiveOutPair &LOP = LiveOutCache[MBB];
-
- // Create a phi-def if required.
- if (needPHI) {
- ++Changes;
- SlotIndex Start = LIS.getMBBStartIdx(MBB);
- unsigned RegIdx = RegAssign.lookup(Start);
- LiveInterval *LI = Edit->get(RegIdx);
- VNInfo *VNI = LI->getNextValue(Start, 0, LIS.getVNInfoAllocator());
- VNI->setIsPHIDef(true);
- I->Value = VNI;
- // This block is done, we know the final value.
- I->DomNode = 0;
- if (I->Kill.isValid())
- LI->addRange(LiveRange(Start, I->Kill, VNI));
- else {
- LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI));
- LOP = LiveOutPair(VNI, Node);
- }
- } else if (IDomValue.first) {
- // No phi-def here. Remember incoming value.
- I->Value = IDomValue.first;
- if (I->Kill.isValid())
- continue;
- // Propagate IDomValue if needed:
- // MBB is live-out and doesn't define its own value.
- if (LOP.second != Node && LOP.first != IDomValue.first) {
- ++Changes;
- LOP = IDomValue;
- }
- }
- }
- } while (Changes);
-
- // The values in LiveInBlocks are now accurate. No more phi-defs are needed
- // for these blocks, so we can color the live ranges.
- for (SmallVectorImpl<LiveInBlock>::iterator I = LiveInBlocks.begin(),
- E = LiveInBlocks.end(); I != E; ++I) {
- if (!I->DomNode)
- continue;
- assert(I->Value && "No live-in value found");
- MachineBasicBlock *MBB = I->DomNode->getBlock();
- SlotIndex Start = LIS.getMBBStartIdx(MBB);
- unsigned RegIdx = RegAssign.lookup(Start);
- LiveInterval *LI = Edit->get(RegIdx);
- LI->addRange(LiveRange(Start, I->Kill.isValid() ?
- I->Kill : LIS.getMBBEndIdx(MBB), I->Value));
- }
+ // Mark as complex mapped, forced.
+ VFP = ValueForcePair(0, true);
}
VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
@@ -710,17 +520,28 @@ SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) {
DEBUG(dbgs() << " leaveIntvAfter " << Idx);
// The interval must be live beyond the instruction at Idx.
- Idx = Idx.getBoundaryIndex();
- VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
+ SlotIndex Boundary = Idx.getBoundaryIndex();
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Boundary);
if (!ParentVNI) {
DEBUG(dbgs() << ": not live\n");
- return Idx.getNextSlot();
+ return Boundary.getNextSlot();
}
DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
-
- MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+ MachineInstr *MI = LIS.getInstructionFromIndex(Boundary);
assert(MI && "No instruction at index");
- VNInfo *VNI = defFromParent(0, ParentVNI, Idx, *MI->getParent(),
+
+ // In spill mode, make live ranges as short as possible by inserting the copy
+ // before MI. This is only possible if that instruction doesn't redefine the
+ // value. The inserted COPY is not a kill, and we don't need to recompute
+ // the source live range. The spiller also won't try to hoist this copy.
+ if (SpillMode && !SlotIndex::isSameInstr(ParentVNI->def, Idx) &&
+ MI->readsVirtualRegister(Edit->getReg())) {
+ forceRecompute(0, ParentVNI);
+ defFromParent(0, ParentVNI, Idx, *MI->getParent(), MI);
+ return Idx;
+ }
+
+ VNInfo *VNI = defFromParent(0, ParentVNI, Boundary, *MI->getParent(),
llvm::next(MachineBasicBlock::iterator(MI)));
return VNI->def;
}
@@ -730,7 +551,7 @@ SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) {
DEBUG(dbgs() << " leaveIntvBefore " << Idx);
// The interval must be live into the instruction at Idx.
- Idx = Idx.getBoundaryIndex();
+ Idx = Idx.getBaseIndex();
VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
if (!ParentVNI) {
DEBUG(dbgs() << ": not live\n");
@@ -770,19 +591,219 @@ void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) {
assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) &&
"Range cannot span basic blocks");
- // The complement interval will be extended as needed by extendRange().
+ // The complement interval will be extended as needed by LRCalc.extend().
if (ParentVNI)
- markComplexMapped(0, ParentVNI);
+ forceRecompute(0, ParentVNI);
DEBUG(dbgs() << " overlapIntv [" << Start << ';' << End << "):");
RegAssign.insert(Start, End, OpenIdx);
DEBUG(dump());
}
+//===----------------------------------------------------------------------===//
+// Spill modes
+//===----------------------------------------------------------------------===//
+
+void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
+ LiveInterval *LI = Edit->get(0);
+ DEBUG(dbgs() << "Removing " << Copies.size() << " back-copies.\n");
+ RegAssignMap::iterator AssignI;
+ AssignI.setMap(RegAssign);
+
+ for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
+ VNInfo *VNI = Copies[i];
+ SlotIndex Def = VNI->def;
+ MachineInstr *MI = LIS.getInstructionFromIndex(Def);
+ assert(MI && "No instruction for back-copy");
+
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineBasicBlock::iterator MBBI(MI);
+ bool AtBegin;
+ do AtBegin = MBBI == MBB->begin();
+ while (!AtBegin && (--MBBI)->isDebugValue());
+
+ DEBUG(dbgs() << "Removing " << Def << '\t' << *MI);
+ LI->removeValNo(VNI);
+ LIS.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+
+ // Adjust RegAssign if a register assignment is killed at VNI->def. We
+ // want to avoid calculating the live range of the source register if
+ // possible.
+ AssignI.find(VNI->def.getPrevSlot());
+ if (!AssignI.valid() || AssignI.start() >= Def)
+ continue;
+ // If MI doesn't kill the assigned register, just leave it.
+ if (AssignI.stop() != Def)
+ continue;
+ unsigned RegIdx = AssignI.value();
+ if (AtBegin || !MBBI->readsVirtualRegister(Edit->getReg())) {
+ DEBUG(dbgs() << " cannot find simple kill of RegIdx " << RegIdx << '\n');
+ forceRecompute(RegIdx, Edit->getParent().getVNInfoAt(Def));
+ } else {
+ SlotIndex Kill = LIS.getInstructionIndex(MBBI).getDefIndex();
+ DEBUG(dbgs() << " move kill to " << Kill << '\t' << *MBBI);
+ AssignI.setStop(Kill);
+ }
+ }
+}
+
+MachineBasicBlock*
+SplitEditor::findShallowDominator(MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB) {
+ if (MBB == DefMBB)
+ return MBB;
+ assert(MDT.dominates(DefMBB, MBB) && "MBB must be dominated by the def.");
+
+ const MachineLoopInfo &Loops = SA.Loops;
+ const MachineLoop *DefLoop = Loops.getLoopFor(DefMBB);
+ MachineDomTreeNode *DefDomNode = MDT[DefMBB];
+
+ // Best candidate so far.
+ MachineBasicBlock *BestMBB = MBB;
+ unsigned BestDepth = UINT_MAX;
+
+ for (;;) {
+ const MachineLoop *Loop = Loops.getLoopFor(MBB);
+
+ // MBB isn't in a loop, it doesn't get any better. All dominators have a
+ // higher frequency by definition.
+ if (!Loop) {
+ DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#"
+ << MBB->getNumber() << " at depth 0\n");
+ return MBB;
+ }
+
+ // We'll never be able to exit the DefLoop.
+ if (Loop == DefLoop) {
+ DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#"
+ << MBB->getNumber() << " in the same loop\n");
+ return MBB;
+ }
+
+ // Least busy dominator seen so far.
+ unsigned Depth = Loop->getLoopDepth();
+ if (Depth < BestDepth) {
+ BestMBB = MBB;
+ BestDepth = Depth;
+ DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#"
+ << MBB->getNumber() << " at depth " << Depth << '\n');
+ }
+
+ // Leave loop by going to the immediate dominator of the loop header.
+ // This is a bigger stride than simply walking up the dominator tree.
+ MachineDomTreeNode *IDom = MDT[Loop->getHeader()]->getIDom();
+
+ // Too far up the dominator tree?
+ if (!IDom || !MDT.dominates(DefDomNode, IDom))
+ return BestMBB;
+
+ MBB = IDom->getBlock();
+ }
+}
+
+void SplitEditor::hoistCopiesForSize() {
+ // Get the complement interval, always RegIdx 0.
+ LiveInterval *LI = Edit->get(0);
+ LiveInterval *Parent = &Edit->getParent();
+
+ // Track the nearest common dominator for all back-copies for each ParentVNI,
+ // indexed by ParentVNI->id.
+ typedef std::pair<MachineBasicBlock*, SlotIndex> DomPair;
+ SmallVector<DomPair, 8> NearestDom(Parent->getNumValNums());
+
+ // Find the nearest common dominator for parent values with multiple
+ // back-copies. If a single back-copy dominates, put it in DomPair.second.
+ for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end();
+ VI != VE; ++VI) {
+ VNInfo *VNI = *VI;
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
+ assert(ParentVNI && "Parent not live at complement def");
+
+ // Don't hoist remats. The complement is probably going to disappear
+ // completely anyway.
+ if (Edit->didRematerialize(ParentVNI))
+ continue;
+
+ MachineBasicBlock *ValMBB = LIS.getMBBFromIndex(VNI->def);
+ DomPair &Dom = NearestDom[ParentVNI->id];
+
+ // Keep directly defined parent values. This is either a PHI or an
+ // instruction in the complement range. All other copies of ParentVNI
+ // should be eliminated.
+ if (VNI->def == ParentVNI->def) {
+ DEBUG(dbgs() << "Direct complement def at " << VNI->def << '\n');
+ Dom = DomPair(ValMBB, VNI->def);
+ continue;
+ }
+ // Skip the singly mapped values. There is nothing to gain from hoisting a
+ // single back-copy.
+ if (Values.lookup(std::make_pair(0, ParentVNI->id)).getPointer()) {
+ DEBUG(dbgs() << "Single complement def at " << VNI->def << '\n');
+ continue;
+ }
+
+ if (!Dom.first) {
+ // First time we see ParentVNI. VNI dominates itself.
+ Dom = DomPair(ValMBB, VNI->def);
+ } else if (Dom.first == ValMBB) {
+ // Two defs in the same block. Pick the earlier def.
+ if (!Dom.second.isValid() || VNI->def < Dom.second)
+ Dom.second = VNI->def;
+ } else {
+ // Different basic blocks. Check if one dominates.
+ MachineBasicBlock *Near =
+ MDT.findNearestCommonDominator(Dom.first, ValMBB);
+ if (Near == ValMBB)
+ // Def ValMBB dominates.
+ Dom = DomPair(ValMBB, VNI->def);
+ else if (Near != Dom.first)
+ // None dominate. Hoist to common dominator, need new def.
+ Dom = DomPair(Near, SlotIndex());
+ }
+
+ DEBUG(dbgs() << "Multi-mapped complement " << VNI->id << '@' << VNI->def
+ << " for parent " << ParentVNI->id << '@' << ParentVNI->def
+ << " hoist to BB#" << Dom.first->getNumber() << ' '
+ << Dom.second << '\n');
+ }
+
+ // Insert the hoisted copies.
+ for (unsigned i = 0, e = Parent->getNumValNums(); i != e; ++i) {
+ DomPair &Dom = NearestDom[i];
+ if (!Dom.first || Dom.second.isValid())
+ continue;
+ // This value needs a hoisted copy inserted at the end of Dom.first.
+ VNInfo *ParentVNI = Parent->getValNumInfo(i);
+ MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(ParentVNI->def);
+ // Get a less loopy dominator than Dom.first.
+ Dom.first = findShallowDominator(Dom.first, DefMBB);
+ SlotIndex Last = LIS.getMBBEndIdx(Dom.first).getPrevSlot();
+ Dom.second =
+ defFromParent(0, ParentVNI, Last, *Dom.first,
+ LIS.getLastSplitPoint(Edit->getParent(), Dom.first))->def;
+ }
+
+ // Remove redundant back-copies that are now known to be dominated by another
+ // def with the same value.
+ SmallVector<VNInfo*, 8> BackCopies;
+ for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end();
+ VI != VE; ++VI) {
+ VNInfo *VNI = *VI;
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
+ const DomPair &Dom = NearestDom[ParentVNI->id];
+ if (!Dom.first || Dom.second == VNI->def)
+ continue;
+ BackCopies.push_back(VNI);
+ forceRecompute(0, ParentVNI);
+ }
+ removeBackCopies(BackCopies);
+}
+
+
/// transferValues - Transfer all possible values to the new live ranges.
-/// Values that were rematerialized are left alone, they need extendRange().
+/// Values that were rematerialized are left alone, they need LRCalc.extend().
bool SplitEditor::transferValues() {
bool Skipped = false;
- LiveInBlocks.clear();
RegAssignMap::const_iterator AssignI = RegAssign.begin();
for (LiveInterval::const_iterator ParentI = Edit->getParent().begin(),
ParentE = Edit->getParent().end(); ParentI != ParentE; ++ParentI) {
@@ -812,28 +833,23 @@ bool SplitEditor::transferValues() {
LiveInterval *LI = Edit->get(RegIdx);
// Check for a simply defined value that can be blitted directly.
- if (VNInfo *VNI = Values.lookup(std::make_pair(RegIdx, ParentVNI->id))) {
+ ValueForcePair VFP = Values.lookup(std::make_pair(RegIdx, ParentVNI->id));
+ if (VNInfo *VNI = VFP.getPointer()) {
DEBUG(dbgs() << ':' << VNI->id);
LI->addRange(LiveRange(Start, End, VNI));
Start = End;
continue;
}
- // Skip rematerialized values, we need to use extendRange() and
- // extendPHIKillRanges() to completely recompute the live ranges.
- if (Edit->didRematerialize(ParentVNI)) {
- DEBUG(dbgs() << "(remat)");
+ // Skip values with forced recomputation.
+ if (VFP.getInt()) {
+ DEBUG(dbgs() << "(recalc)");
Skipped = true;
Start = End;
continue;
}
- // Initialize the live-out cache the first time it is needed.
- if (LiveOutSeen.empty()) {
- unsigned N = VRM.getMachineFunction().getNumBlockIDs();
- LiveOutSeen.resize(N);
- LiveOutCache.resize(N);
- }
+ LiveRangeCalc &LRC = getLRCalc(RegIdx);
// This value has multiple defs in RegIdx, but it wasn't rematerialized,
// so the live range is accurate. Add live-in blocks in [Start;End) to the
@@ -844,15 +860,13 @@ bool SplitEditor::transferValues() {
// The first block may be live-in, or it may have its own def.
if (Start != BlockStart) {
- VNInfo *VNI = LI->extendInBlock(BlockStart,
- std::min(BlockEnd, End).getPrevSlot());
+ VNInfo *VNI = LI->extendInBlock(BlockStart, std::min(BlockEnd, End));
assert(VNI && "Missing def for complex mapped value");
DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber());
// MBB has its own def. Is it also live-out?
- if (BlockEnd <= End) {
- LiveOutSeen.set(MBB->getNumber());
- LiveOutCache[MBB] = LiveOutPair(VNI, MDT[MBB]);
- }
+ if (BlockEnd <= End)
+ LRC.setLiveOutValue(MBB, VNI);
+
// Skip to the next block for live-in.
++MBB;
BlockStart = BlockEnd;
@@ -866,25 +880,19 @@ bool SplitEditor::transferValues() {
if (BlockStart == ParentVNI->def) {
// This block has the def of a parent PHI, so it isn't live-in.
assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?");
- VNInfo *VNI = LI->extendInBlock(BlockStart,
- std::min(BlockEnd, End).getPrevSlot());
+ VNInfo *VNI = LI->extendInBlock(BlockStart, std::min(BlockEnd, End));
assert(VNI && "Missing def for complex mapped parent PHI");
- if (End >= BlockEnd) {
- // Live-out as well.
- LiveOutSeen.set(MBB->getNumber());
- LiveOutCache[MBB] = LiveOutPair(VNI, MDT[MBB]);
- }
+ if (End >= BlockEnd)
+ LRC.setLiveOutValue(MBB, VNI); // Live-out as well.
} else {
- // This block needs a live-in value.
- LiveInBlocks.push_back(MDT[MBB]);
- // The last block covered may not be live-out.
+ // This block needs a live-in value. The last block covered may not
+ // be live-out.
if (End < BlockEnd)
- LiveInBlocks.back().Kill = End;
+ LRC.addLiveInBlock(LI, MDT[MBB], End);
else {
- // Live-out, but we need updateSSA to tell us the value.
- LiveOutSeen.set(MBB->getNumber());
- LiveOutCache[MBB] = LiveOutPair((VNInfo*)0,
- (MachineDomTreeNode*)0);
+ // Live-through, and we don't know the value.
+ LRC.addLiveInBlock(LI, MDT[MBB]);
+ LRC.setLiveOutValue(MBB, 0);
}
}
BlockStart = BlockEnd;
@@ -895,8 +903,11 @@ bool SplitEditor::transferValues() {
DEBUG(dbgs() << '\n');
}
- if (!LiveInBlocks.empty())
- updateSSA();
+ LRCalc[0].calculateValues(LIS.getSlotIndexes(), &MDT,
+ &LIS.getVNInfoAllocator());
+ if (SpillMode)
+ LRCalc[1].calculateValues(LIS.getSlotIndexes(), &MDT,
+ &LIS.getVNInfoAllocator());
return Skipped;
}
@@ -909,16 +920,20 @@ void SplitEditor::extendPHIKillRanges() {
if (PHIVNI->isUnused() || !PHIVNI->isPHIDef())
continue;
unsigned RegIdx = RegAssign.lookup(PHIVNI->def);
+ LiveInterval *LI = Edit->get(RegIdx);
+ LiveRangeCalc &LRC = getLRCalc(RegIdx);
MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def);
for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
PE = MBB->pred_end(); PI != PE; ++PI) {
- SlotIndex End = LIS.getMBBEndIdx(*PI).getPrevSlot();
+ SlotIndex End = LIS.getMBBEndIdx(*PI);
+ SlotIndex LastUse = End.getPrevSlot();
// The predecessor may not have a live-out value. That is OK, like an
// undef PHI operand.
- if (Edit->getParent().liveAt(End)) {
- assert(RegAssign.lookup(End) == RegIdx &&
+ if (Edit->getParent().liveAt(LastUse)) {
+ assert(RegAssign.lookup(LastUse) == RegIdx &&
"Different register assignment in phi predecessor");
- extendRange(RegIdx, End);
+ LRC.extend(LI, End,
+ LIS.getSlotIndexes(), &MDT, &LIS.getVNInfoAllocator());
}
}
}
@@ -938,25 +953,22 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
continue;
}
- // <undef> operands don't really read the register, so just assign them to
- // the complement.
- if (MO.isUse() && MO.isUndef()) {
- MO.setReg(Edit->get(0)->reg);
- continue;
- }
-
+ // <undef> operands don't really read the register, so it doesn't matter
+ // which register we choose. When the use operand is tied to a def, we must
+ // use the same register as the def, so just do that always.
SlotIndex Idx = LIS.getInstructionIndex(MI);
- if (MO.isDef())
+ if (MO.isDef() || MO.isUndef())
Idx = MO.isEarlyClobber() ? Idx.getUseIndex() : Idx.getDefIndex();
// Rewrite to the mapped register at Idx.
unsigned RegIdx = RegAssign.lookup(Idx);
- MO.setReg(Edit->get(RegIdx)->reg);
+ LiveInterval *LI = Edit->get(RegIdx);
+ MO.setReg(LI->reg);
DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t'
<< Idx << ':' << RegIdx << '\t' << *MI);
// Extend liveness to Idx if the instruction reads reg.
- if (!ExtendRanges)
+ if (!ExtendRanges || MO.isUndef())
continue;
// Skip instructions that don't read Reg.
@@ -971,7 +983,8 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
} else
Idx = Idx.getUseIndex();
- extendRange(RegIdx, Idx);
+ getLRCalc(RegIdx).extend(LI, Idx.getNextSlot(), LIS.getSlotIndexes(),
+ &MDT, &LIS.getVNInfoAllocator());
}
}
@@ -1019,11 +1032,24 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
VNI->setIsPHIDef(ParentVNI->isPHIDef());
VNI->setCopy(ParentVNI->getCopy());
- // Mark rematted values as complex everywhere to force liveness computation.
+ // Force rematted values to be recomputed everywhere.
// The new live ranges may be truncated.
if (Edit->didRematerialize(ParentVNI))
for (unsigned i = 0, e = Edit->size(); i != e; ++i)
- markComplexMapped(i, ParentVNI);
+ forceRecompute(i, ParentVNI);
+ }
+
+ // Hoist back-copies to the complement interval when in spill mode.
+ switch (SpillMode) {
+ case SM_Partition:
+ // Leave all back-copies as is.
+ break;
+ case SM_Size:
+ hoistCopiesForSize();
+ break;
+ case SM_Speed:
+ llvm_unreachable("Spill mode 'speed' not implemented yet");
+ break;
}
// Transfer the simply mapped values, check if any are skipped.
@@ -1081,50 +1107,39 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
// Single Block Splitting
//===----------------------------------------------------------------------===//
-/// getMultiUseBlocks - if CurLI has more than one use in a basic block, it
-/// may be an advantage to split CurLI for the duration of the block.
-bool SplitAnalysis::getMultiUseBlocks(BlockPtrSet &Blocks) {
- // If CurLI is local to one block, there is no point to splitting it.
- if (UseBlocks.size() <= 1)
+bool SplitAnalysis::shouldSplitSingleBlock(const BlockInfo &BI,
+ bool SingleInstrs) const {
+ // Always split for multiple instructions.
+ if (!BI.isOneInstr())
+ return true;
+ // Don't split for single instructions unless explicitly requested.
+ if (!SingleInstrs)
return false;
- // Add blocks with multiple uses.
- for (unsigned i = 0, e = UseBlocks.size(); i != e; ++i) {
- const BlockInfo &BI = UseBlocks[i];
- if (BI.FirstUse == BI.LastUse)
- continue;
- Blocks.insert(BI.MBB);
- }
- return !Blocks.empty();
+ // Splitting a live-through range always makes progress.
+ if (BI.LiveIn && BI.LiveOut)
+ return true;
+ // No point in isolating a copy. It has no register class constraints.
+ if (LIS.getInstructionFromIndex(BI.FirstInstr)->isCopyLike())
+ return false;
+ // Finally, don't isolate an end point that was created by earlier splits.
+ return isOriginalEndpoint(BI.FirstInstr);
}
void SplitEditor::splitSingleBlock(const SplitAnalysis::BlockInfo &BI) {
openIntv();
SlotIndex LastSplitPoint = SA.getLastSplitPoint(BI.MBB->getNumber());
- SlotIndex SegStart = enterIntvBefore(std::min(BI.FirstUse,
+ SlotIndex SegStart = enterIntvBefore(std::min(BI.FirstInstr,
LastSplitPoint));
- if (!BI.LiveOut || BI.LastUse < LastSplitPoint) {
- useIntv(SegStart, leaveIntvAfter(BI.LastUse));
+ if (!BI.LiveOut || BI.LastInstr < LastSplitPoint) {
+ useIntv(SegStart, leaveIntvAfter(BI.LastInstr));
} else {
// The last use is after the last valid split point.
SlotIndex SegStop = leaveIntvBefore(LastSplitPoint);
useIntv(SegStart, SegStop);
- overlapIntv(SegStop, BI.LastUse);
+ overlapIntv(SegStop, BI.LastInstr);
}
}
-/// splitSingleBlocks - Split CurLI into a separate live interval inside each
-/// basic block in Blocks.
-void SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) {
- DEBUG(dbgs() << " splitSingleBlocks for " << Blocks.size() << " blocks.\n");
- ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA.getUseBlocks();
- for (unsigned i = 0; i != UseBlocks.size(); ++i) {
- const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
- if (Blocks.count(BI.MBB))
- splitSingleBlock(BI);
- }
- finish();
-}
-
//===----------------------------------------------------------------------===//
// Global Live Range Splitting Support
@@ -1149,6 +1164,12 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,
assert((IntvIn || IntvOut) && "Use splitSingleBlock for isolated blocks");
+ assert((!LeaveBefore || LeaveBefore < Stop) && "Interference after block");
+ assert((!IntvIn || !LeaveBefore || LeaveBefore > Start) && "Impossible intf");
+ assert((!EnterAfter || EnterAfter >= Start) && "Interference before block");
+
+ MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum);
+
if (!IntvOut) {
DEBUG(dbgs() << ", spill on entry.\n");
//
@@ -1157,7 +1178,6 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,
// -____________ Spill on entry.
//
selectIntv(IntvIn);
- MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum);
SlotIndex Idx = leaveIntvAtTop(*MBB);
assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
(void)Idx;
@@ -1172,7 +1192,6 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,
// ___________-- Reload on exit.
//
selectIntv(IntvOut);
- MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum);
SlotIndex Idx = enterIntvAtEnd(*MBB);
assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
(void)Idx;
@@ -1192,6 +1211,7 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,
// We cannot legally insert splits after LSP.
SlotIndex LSP = SA.getLastSplitPoint(MBBNum);
+ assert((!IntvOut || !EnterAfter || EnterAfter < LSP) && "Impossible intf");
if (IntvIn != IntvOut && (!LeaveBefore || !EnterAfter ||
LeaveBefore.getBaseIndex() > EnterAfter.getBoundaryIndex())) {
@@ -1201,10 +1221,14 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,
// |-----------| Live through.
// ------======= Switch intervals between interference.
//
- SlotIndex Cut = (LeaveBefore && LeaveBefore < LSP) ? LeaveBefore : LSP;
selectIntv(IntvOut);
- SlotIndex Idx = enterIntvBefore(Cut);
- useIntv(Idx, Stop);
+ SlotIndex Idx;
+ if (LeaveBefore && LeaveBefore < LSP) {
+ Idx = enterIntvBefore(LeaveBefore);
+ useIntv(Idx, Stop);
+ } else {
+ Idx = enterIntvAtEnd(*MBB);
+ }
selectIntv(IntvIn);
useIntv(Start, Idx);
assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
@@ -1238,7 +1262,7 @@ void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop
- << "), uses " << BI.FirstUse << '-' << BI.LastUse
+ << "), uses " << BI.FirstInstr << '-' << BI.LastInstr
<< ", reg-in " << IntvIn << ", leave before " << LeaveBefore
<< (BI.LiveOut ? ", stack-out" : ", killed in block"));
@@ -1246,7 +1270,7 @@ void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
assert(BI.LiveIn && "Must be live-in");
assert((!LeaveBefore || LeaveBefore > Start) && "Bad interference");
- if (!BI.LiveOut && (!LeaveBefore || LeaveBefore >= BI.LastUse)) {
+ if (!BI.LiveOut && (!LeaveBefore || LeaveBefore >= BI.LastInstr)) {
DEBUG(dbgs() << " before interference.\n");
//
// <<< Interference after kill.
@@ -1254,13 +1278,13 @@ void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
// ========= Use IntvIn everywhere.
//
selectIntv(IntvIn);
- useIntv(Start, BI.LastUse);
+ useIntv(Start, BI.LastInstr);
return;
}
SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber());
- if (!LeaveBefore || LeaveBefore > BI.LastUse.getBoundaryIndex()) {
+ if (!LeaveBefore || LeaveBefore > BI.LastInstr.getBoundaryIndex()) {
//
// <<< Possible interference after last use.
// |---o---o---| Live-out on stack.
@@ -1271,17 +1295,17 @@ void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
// ============ Copy to stack after LSP, overlap IntvIn.
// \_____ Stack interval is live-out.
//
- if (BI.LastUse < LSP) {
+ if (BI.LastInstr < LSP) {
DEBUG(dbgs() << ", spill after last use before interference.\n");
selectIntv(IntvIn);
- SlotIndex Idx = leaveIntvAfter(BI.LastUse);
+ SlotIndex Idx = leaveIntvAfter(BI.LastInstr);
useIntv(Start, Idx);
assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
} else {
DEBUG(dbgs() << ", spill before last split point.\n");
selectIntv(IntvIn);
SlotIndex Idx = leaveIntvBefore(LSP);
- overlapIntv(Idx, BI.LastUse);
+ overlapIntv(Idx, BI.LastInstr);
useIntv(Start, Idx);
assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
}
@@ -1295,13 +1319,13 @@ void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
(void)LocalIntv;
DEBUG(dbgs() << ", creating local interval " << LocalIntv << ".\n");
- if (!BI.LiveOut || BI.LastUse < LSP) {
+ if (!BI.LiveOut || BI.LastInstr < LSP) {
//
// <<<<<<< Interference overlapping uses.
// |---o---o---| Live-out on stack.
// =====----____ Leave IntvIn before interference, then spill.
//
- SlotIndex To = leaveIntvAfter(BI.LastUse);
+ SlotIndex To = leaveIntvAfter(BI.LastInstr);
SlotIndex From = enterIntvBefore(LeaveBefore);
useIntv(From, To);
selectIntv(IntvIn);
@@ -1316,7 +1340,7 @@ void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
// \_____ Stack interval is live-out.
//
SlotIndex To = leaveIntvBefore(LSP);
- overlapIntv(To, BI.LastUse);
+ overlapIntv(To, BI.LastInstr);
SlotIndex From = enterIntvBefore(std::min(To, LeaveBefore));
useIntv(From, To);
selectIntv(IntvIn);
@@ -1330,7 +1354,7 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop
- << "), uses " << BI.FirstUse << '-' << BI.LastUse
+ << "), uses " << BI.FirstInstr << '-' << BI.LastInstr
<< ", reg-out " << IntvOut << ", enter after " << EnterAfter
<< (BI.LiveIn ? ", stack-in" : ", defined in block"));
@@ -1340,7 +1364,7 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
assert(BI.LiveOut && "Must be live-out");
assert((!EnterAfter || EnterAfter < LSP) && "Bad interference");
- if (!BI.LiveIn && (!EnterAfter || EnterAfter <= BI.FirstUse)) {
+ if (!BI.LiveIn && (!EnterAfter || EnterAfter <= BI.FirstInstr)) {
DEBUG(dbgs() << " after interference.\n");
//
// >>>> Interference before def.
@@ -1348,11 +1372,11 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
// ========= Use IntvOut everywhere.
//
selectIntv(IntvOut);
- useIntv(BI.FirstUse, Stop);
+ useIntv(BI.FirstInstr, Stop);
return;
}
- if (!EnterAfter || EnterAfter < BI.FirstUse.getBaseIndex()) {
+ if (!EnterAfter || EnterAfter < BI.FirstInstr.getBaseIndex()) {
DEBUG(dbgs() << ", reload after interference.\n");
//
// >>>> Interference before def.
@@ -1360,7 +1384,7 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
// ____========= Enter IntvOut before first use.
//
selectIntv(IntvOut);
- SlotIndex Idx = enterIntvBefore(std::min(LSP, BI.FirstUse));
+ SlotIndex Idx = enterIntvBefore(std::min(LSP, BI.FirstInstr));
useIntv(Idx, Stop);
assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
return;
@@ -1381,6 +1405,6 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
openIntv();
- SlotIndex From = enterIntvBefore(std::min(Idx, BI.FirstUse));
+ SlotIndex From = enterIntvBefore(std::min(Idx, BI.FirstInstr));
useIntv(From, Idx);
}
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index 7948b72..d8fc212 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -15,13 +15,11 @@
#ifndef LLVM_CODEGEN_SPLITKIT_H
#define LLVM_CODEGEN_SPLITKIT_H
+#include "LiveRangeCalc.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/IndexedMap.h"
#include "llvm/ADT/IntervalMap.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/CodeGen/SlotIndexes.h"
namespace llvm {
@@ -38,12 +36,6 @@ class VirtRegMap;
class VNInfo;
class raw_ostream;
-/// At some point we should just include MachineDominators.h:
-class MachineDominatorTree;
-template <class NodeT> class DomTreeNodeBase;
-typedef DomTreeNodeBase<MachineBasicBlock> MachineDomTreeNode;
-
-
/// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting
/// opportunities.
class SplitAnalysis {
@@ -76,16 +68,16 @@ public:
///
struct BlockInfo {
MachineBasicBlock *MBB;
- SlotIndex FirstUse; ///< First instr using current reg.
- SlotIndex LastUse; ///< Last instr using current reg.
- bool LiveThrough; ///< Live in whole block (Templ 5. above).
+ SlotIndex FirstInstr; ///< First instr accessing current reg.
+ SlotIndex LastInstr; ///< Last instr accessing current reg.
+ SlotIndex FirstDef; ///< First non-phi valno->def, or SlotIndex().
bool LiveIn; ///< Current reg is live in.
bool LiveOut; ///< Current reg is live out.
/// isOneInstr - Returns true when this BlockInfo describes a single
/// instruction.
bool isOneInstr() const {
- return SlotIndex::isSameInstr(FirstUse, LastUse);
+ return SlotIndex::isSameInstr(FirstInstr, LastInstr);
}
};
@@ -185,10 +177,15 @@ public:
typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet;
- /// getMultiUseBlocks - Add basic blocks to Blocks that may benefit from
- /// having CurLI split to a new live interval. Return true if Blocks can be
- /// passed to SplitEditor::splitSingleBlocks.
- bool getMultiUseBlocks(BlockPtrSet &Blocks);
+ /// shouldSplitSingleBlock - Returns true if it would help to create a local
+ /// live range for the instructions in BI. There is normally no benefit to
+ /// creating a live range for a single instruction, but it does enable
+ /// register class inflation if the instruction has a restricted register
+ /// class.
+ ///
+ /// @param BI The block to be isolated.
+ /// @param SingleInstrs True when single instructions should be isolated.
+ bool shouldSplitSingleBlock(const BlockInfo &BI, bool SingleInstrs) const;
};
@@ -212,6 +209,36 @@ class SplitEditor {
const TargetInstrInfo &TII;
const TargetRegisterInfo &TRI;
+public:
+
+ /// ComplementSpillMode - Select how the complement live range should be
+ /// created. SplitEditor automatically creates interval 0 to contain
+ /// anything that isn't added to another interval. This complement interval
+ /// can get quite complicated, and it can sometimes be an advantage to allow
+ /// it to overlap the other intervals. If it is going to spill anyway, no
+ /// registers are wasted by keeping a value in two places at the same time.
+ enum ComplementSpillMode {
+ /// SM_Partition(Default) - Try to create the complement interval so it
+ /// doesn't overlap any other intervals, and the original interval is
+ /// partitioned. This may require a large number of back copies and extra
+ /// PHI-defs. Only segments marked with overlapIntv will be overlapping.
+ SM_Partition,
+
+ /// SM_Size - Overlap intervals to minimize the number of inserted COPY
+ /// instructions. Copies to the complement interval are hoisted to their
+ /// common dominator, so only one COPY is required per value in the
+ /// complement interval. This also means that no extra PHI-defs need to be
+ /// inserted in the complement interval.
+ SM_Size,
+
+ /// SM_Speed - Overlap intervals to minimize the expected execution
+ /// frequency of the inserted copies. This is very similar to SM_Size, but
+ /// the complement interval may get some extra PHI-defs.
+ SM_Speed
+ };
+
+private:
+
/// Edit - The current parent register and new intervals created.
LiveRangeEdit *Edit;
@@ -220,6 +247,9 @@ class SplitEditor {
/// openIntv will be 1.
unsigned OpenIdx;
+ /// The current spill mode, selected by reset().
+ ComplementSpillMode SpillMode;
+
typedef IntervalMap<SlotIndex, unsigned> RegAssignMap;
/// Allocator for the interval map. This will eventually be shared with
@@ -231,65 +261,34 @@ class SplitEditor {
/// Idx.
RegAssignMap RegAssign;
- typedef DenseMap<std::pair<unsigned, unsigned>, VNInfo*> ValueMap;
+ typedef PointerIntPair<VNInfo*, 1> ValueForcePair;
+ typedef DenseMap<std::pair<unsigned, unsigned>, ValueForcePair> ValueMap;
/// Values - keep track of the mapping from parent values to values in the new
/// intervals. Given a pair (RegIdx, ParentVNI->id), Values contains:
///
/// 1. No entry - the value is not mapped to Edit.get(RegIdx).
- /// 2. Null - the value is mapped to multiple values in Edit.get(RegIdx).
- /// Each value is represented by a minimal live range at its def.
- /// 3. A non-null VNInfo - the value is mapped to a single new value.
+ /// 2. (Null, false) - the value is mapped to multiple values in
+ /// Edit.get(RegIdx). Each value is represented by a minimal live range at
+ /// its def. The full live range can be inferred exactly from the range
+ /// of RegIdx in RegAssign.
+ /// 3. (Null, true). As above, but the ranges in RegAssign are too large, and
+ /// the live range must be recomputed using LiveRangeCalc::extend().
+ /// 4. (VNI, false) The value is mapped to a single new value.
/// The new value has no live ranges anywhere.
ValueMap Values;
- typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair;
- typedef IndexedMap<LiveOutPair, MBB2NumberFunctor> LiveOutMap;
-
- // LiveOutCache - Map each basic block where a new register is live out to the
- // live-out value and its defining block.
- // One of these conditions shall be true:
- //
- // 1. !LiveOutCache.count(MBB)
- // 2. LiveOutCache[MBB].second.getNode() == MBB
- // 3. forall P in preds(MBB): LiveOutCache[P] == LiveOutCache[MBB]
- //
- // This is only a cache, the values can be computed as:
- //
- // VNI = Edit.get(RegIdx)->getVNInfoAt(LIS.getMBBEndIdx(MBB))
- // Node = mbt_[LIS.getMBBFromIndex(VNI->def)]
- //
- // The cache is also used as a visited set by extendRange(). It can be shared
- // by all the new registers because at most one is live out of each block.
- LiveOutMap LiveOutCache;
-
- // LiveOutSeen - Indexed by MBB->getNumber(), a bit is set for each valid
- // entry in LiveOutCache.
- BitVector LiveOutSeen;
-
- /// LiveInBlock - Info for updateSSA() about a block where a register is
- /// live-in.
- /// The updateSSA caller provides DomNode and Kill inside MBB, updateSSA()
- /// adds the computed live-in value.
- struct LiveInBlock {
- // Dominator tree node for the block.
- // Cleared by updateSSA when the final value has been determined.
- MachineDomTreeNode *DomNode;
-
- // Live-in value filled in by updateSSA once it is known.
- VNInfo *Value;
-
- // Position in block where the live-in range ends, or SlotIndex() if the
- // range passes through the block.
- SlotIndex Kill;
-
- LiveInBlock(MachineDomTreeNode *node) : DomNode(node), Value(0) {}
- };
+ /// LRCalc - Cache for computing live ranges and SSA update. Each instance
+ /// can only handle non-overlapping live ranges, so use a separate
+ /// LiveRangeCalc instance for the complement interval when in spill mode.
+ LiveRangeCalc LRCalc[2];
- /// LiveInBlocks - List of live-in blocks used by findReachingDefs() and
- /// updateSSA(). This list is usually empty, it exists here to avoid frequent
- /// reallocations.
- SmallVector<LiveInBlock, 16> LiveInBlocks;
+ /// getLRCalc - Return the LRCalc to use for RegIdx. In spill mode, the
+ /// complement interval can overlap the other intervals, so it gets its own
+ /// LRCalc instance. When not in spill mode, all intervals can share one.
+ LiveRangeCalc &getLRCalc(unsigned RegIdx) {
+ return LRCalc[SpillMode != SM_Partition && RegIdx != 0];
+ }
/// defValue - define a value in RegIdx from ParentVNI at Idx.
/// Idx does not have to be ParentVNI->def, but it must be contained within
@@ -298,9 +297,11 @@ class SplitEditor {
/// Return the new LI value.
VNInfo *defValue(unsigned RegIdx, const VNInfo *ParentVNI, SlotIndex Idx);
- /// markComplexMapped - Mark ParentVNI as complex mapped in RegIdx regardless
- /// of the number of defs.
- void markComplexMapped(unsigned RegIdx, const VNInfo *ParentVNI);
+ /// forceRecompute - Force the live range of ParentVNI in RegIdx to be
+ /// recomputed by LiveRangeCalc::extend regardless of the number of defs.
+ /// This is used for values whose live range doesn't match RegAssign exactly.
+ /// They could have rematerialized, or back-copies may have been moved.
+ void forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI);
/// defFromParent - Define Reg from ParentVNI at UseIdx using either
/// rematerialization or a COPY from parent. Return the new value.
@@ -310,22 +311,18 @@ class SplitEditor {
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I);
- /// extendRange - Extend the live range of Edit.get(RegIdx) so it reaches Idx.
- /// Insert PHIDefs as needed to preserve SSA form.
- void extendRange(unsigned RegIdx, SlotIndex Idx);
+ /// removeBackCopies - Remove the copy instructions that defines the values
+ /// in the vector in the complement interval.
+ void removeBackCopies(SmallVectorImpl<VNInfo*> &Copies);
- /// findReachingDefs - Starting from MBB, add blocks to LiveInBlocks until all
- /// reaching defs for LI are found.
- /// @param LI Live interval whose value is needed.
- /// @param MBB Block where LI should be live-in.
- /// @param Kill Kill point in MBB.
- /// @return Unique value seen, or NULL.
- VNInfo *findReachingDefs(LiveInterval *LI, MachineBasicBlock *MBB,
- SlotIndex Kill);
+ /// getShallowDominator - Returns the least busy dominator of MBB that is
+ /// also dominated by DefMBB. Busy is measured by loop depth.
+ MachineBasicBlock *findShallowDominator(MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB);
- /// updateSSA - Compute and insert PHIDefs such that all blocks in
- // LiveInBlocks get a known live-in value. Add live ranges to the blocks.
- void updateSSA();
+ /// hoistCopiesForSize - Hoist back-copies to the complement interval in a
+ /// way that minimizes code size. This implements the SM_Size spill mode.
+ void hoistCopiesForSize();
/// transferValues - Transfer values to the new ranges.
/// Return true if any ranges were skipped.
@@ -348,7 +345,7 @@ public:
MachineDominatorTree&);
/// reset - Prepare for a new split.
- void reset(LiveRangeEdit&);
+ void reset(LiveRangeEdit&, ComplementSpillMode = SM_Partition);
/// Create a new virtual register and live interval.
/// Return the interval index, starting from 1. Interval index 0 is the
@@ -423,10 +420,6 @@ public:
/// split, and doesn't call finish().
void splitSingleBlock(const SplitAnalysis::BlockInfo &BI);
- /// splitSingleBlocks - Split CurLI into a separate live interval inside each
- /// basic block in Blocks.
- void splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks);
-
/// splitLiveThroughBlock - Split CurLI in the given block such that it
/// enters the block in IntvIn and leaves it in IntvOut. There may be uses in
/// the block, but they will be ignored when placing split points.
diff --git a/lib/CodeGen/Splitter.cpp b/lib/CodeGen/Splitter.cpp
index ec75df4..77973b7 100644
--- a/lib/CodeGen/Splitter.cpp
+++ b/lib/CodeGen/Splitter.cpp
@@ -11,7 +11,6 @@
#include "Splitter.h"
-#include "RegisterCoalescer.h"
#include "llvm/Module.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
@@ -20,6 +19,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -262,7 +262,7 @@ namespace llvm {
au.addPreserved<MachineDominatorTree>();
au.addRequired<MachineLoopInfo>();
au.addPreserved<MachineLoopInfo>();
- au.addPreserved<RegisterCoalescer>();
+ au.addPreservedID(RegisterCoalescerPassID);
au.addPreserved<CalculateSpillWeights>();
au.addPreserved<LiveStacks>();
au.addRequired<SlotIndexes>();
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index d3cbd15..1f0e5a2 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -123,7 +123,7 @@ bool StackProtector::RequiresStackProtector() const {
// protectors.
return true;
- if (const ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) {
+ if (ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) {
// We apparently only care about character arrays.
if (!AT->getElementType()->isIntegerTy(8))
continue;
@@ -165,7 +165,7 @@ bool StackProtector::InsertStackProtectors() {
// StackGuard = load __stack_chk_guard
// call void @llvm.stackprotect.create(StackGuard, StackGuardSlot)
//
- const PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext());
+ PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext());
unsigned AddressSpace, Offset;
if (TLI->getStackCookieLocation(AddressSpace, Offset)) {
Constant *OffsetVal =
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index 227eb47..260cc0e 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -47,6 +47,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -673,7 +674,7 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
if (PHIColor && SrcColor == PHIColor) {
LiveInterval &SrcInterval = LI->getInterval(SrcReg);
SlotIndex PredIndex = LI->getMBBEndIdx(PredBB);
- VNInfo *SrcVNI = SrcInterval.getVNInfoAt(PredIndex.getPrevIndex());
+ VNInfo *SrcVNI = SrcInterval.getVNInfoBefore(PredIndex);
assert(SrcVNI);
SrcVNI->setHasPHIKill(true);
continue;
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 6b801cb..3a6211a 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -25,6 +25,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index 86e71d8..f32678f 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -74,23 +74,25 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() &&
"This only knows how to commute register operands so far");
+ unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0;
unsigned Reg1 = MI->getOperand(Idx1).getReg();
unsigned Reg2 = MI->getOperand(Idx2).getReg();
bool Reg1IsKill = MI->getOperand(Idx1).isKill();
bool Reg2IsKill = MI->getOperand(Idx2).isKill();
- bool ChangeReg0 = false;
- if (HasDef && MI->getOperand(0).getReg() == Reg1) {
- // Must be two address instruction!
- assert(MI->getDesc().getOperandConstraint(0, MCOI::TIED_TO) &&
- "Expecting a two-address instruction!");
+ // If destination is tied to either of the commuted source register, then
+ // it must be updated.
+ if (HasDef && Reg0 == Reg1 &&
+ MI->getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) {
Reg2IsKill = false;
- ChangeReg0 = true;
+ Reg0 = Reg2;
+ } else if (HasDef && Reg0 == Reg2 &&
+ MI->getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) {
+ Reg1IsKill = false;
+ Reg0 = Reg1;
}
if (NewMI) {
// Create a new instruction.
- unsigned Reg0 = HasDef
- ? (ChangeReg0 ? Reg2 : MI->getOperand(0).getReg()) : 0;
bool Reg0IsDead = HasDef ? MI->getOperand(0).isDead() : false;
MachineFunction &MF = *MI->getParent()->getParent();
if (HasDef)
@@ -104,8 +106,8 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
.addReg(Reg1, getKillRegState(Reg2IsKill));
}
- if (ChangeReg0)
- MI->getOperand(0).setReg(Reg2);
+ if (HasDef)
+ MI->getOperand(0).setReg(Reg0);
MI->getOperand(Idx2).setReg(Reg1);
MI->getOperand(Idx1).setReg(Reg2);
MI->getOperand(Idx2).setIsKill(Reg1IsKill);
@@ -160,6 +162,42 @@ bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI,
return MadeChange;
}
+bool TargetInstrInfoImpl::hasLoadFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
+ for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+ oe = MI->memoperands_end();
+ o != oe;
+ ++o) {
+ if ((*o)->isLoad() && (*o)->getValue())
+ if (const FixedStackPseudoSourceValue *Value =
+ dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+ FrameIndex = Value->getFrameIndex();
+ MMO = *o;
+ return true;
+ }
+ }
+ return false;
+}
+
+bool TargetInstrInfoImpl::hasStoreToStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
+ for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+ oe = MI->memoperands_end();
+ o != oe;
+ ++o) {
+ if ((*o)->isStore() && (*o)->getValue())
+ if (const FixedStackPseudoSourceValue *Value =
+ dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+ FrameIndex = Value->getFrameIndex();
+ MMO = *o;
+ return true;
+ }
+ }
+ return false;
+}
+
void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg,
@@ -324,6 +362,19 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
const TargetInstrInfo &TII = *TM.getInstrInfo();
const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+ // Remat clients assume operand 0 is the defined register.
+ if (!MI->getNumOperands() || !MI->getOperand(0).isReg())
+ return false;
+ unsigned DefReg = MI->getOperand(0).getReg();
+
+ // A sub-register definition can only be rematerialized if the instruction
+ // doesn't read the other parts of the register. Otherwise it is really a
+ // read-modify-write operation on the full virtual register which cannot be
+ // moved safely.
+ if (TargetRegisterInfo::isVirtualRegister(DefReg) &&
+ MI->getOperand(0).getSubReg() && MI->readsVirtualRegister(DefReg))
+ return false;
+
// A load from a fixed stack slot can be rematerialized. This may be
// redundant with subsequent checks, but it's target-independent,
// simple, and a common case.
@@ -383,8 +434,9 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
continue;
}
- // Only allow one virtual-register def, and that in the first operand.
- if (MO.isDef() != (i == 0))
+ // Only allow one virtual-register def. There may be multiple defs of the
+ // same virtual register, though.
+ if (MO.isDef() && Reg != DefReg)
return false;
// Don't allow any virtual-register uses. Rematting an instruction with
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index a3c5620..fb87154 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -43,153 +43,6 @@ using namespace dwarf;
// ELF
//===----------------------------------------------------------------------===//
-TargetLoweringObjectFileELF::TargetLoweringObjectFileELF()
- : TargetLoweringObjectFile(),
- TLSDataSection(0),
- TLSBSSSection(0),
- DataRelSection(0),
- DataRelLocalSection(0),
- DataRelROSection(0),
- DataRelROLocalSection(0),
- MergeableConst4Section(0),
- MergeableConst8Section(0),
- MergeableConst16Section(0) {
-}
-
-void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
- const TargetMachine &TM) {
- TargetLoweringObjectFile::Initialize(Ctx, TM);
-
- BSSSection =
- getContext().getELFSection(".bss", ELF::SHT_NOBITS,
- ELF::SHF_WRITE |ELF::SHF_ALLOC,
- SectionKind::getBSS());
-
- TextSection =
- getContext().getELFSection(".text", ELF::SHT_PROGBITS,
- ELF::SHF_EXECINSTR |
- ELF::SHF_ALLOC,
- SectionKind::getText());
-
- DataSection =
- getContext().getELFSection(".data", ELF::SHT_PROGBITS,
- ELF::SHF_WRITE |ELF::SHF_ALLOC,
- SectionKind::getDataRel());
-
- ReadOnlySection =
- getContext().getELFSection(".rodata", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC,
- SectionKind::getReadOnly());
-
- TLSDataSection =
- getContext().getELFSection(".tdata", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC | ELF::SHF_TLS |
- ELF::SHF_WRITE,
- SectionKind::getThreadData());
-
- TLSBSSSection =
- getContext().getELFSection(".tbss", ELF::SHT_NOBITS,
- ELF::SHF_ALLOC | ELF::SHF_TLS |
- ELF::SHF_WRITE,
- SectionKind::getThreadBSS());
-
- DataRelSection =
- getContext().getELFSection(".data.rel", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC |ELF::SHF_WRITE,
- SectionKind::getDataRel());
-
- DataRelLocalSection =
- getContext().getELFSection(".data.rel.local", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC |ELF::SHF_WRITE,
- SectionKind::getDataRelLocal());
-
- DataRelROSection =
- getContext().getELFSection(".data.rel.ro", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC |ELF::SHF_WRITE,
- SectionKind::getReadOnlyWithRel());
-
- DataRelROLocalSection =
- getContext().getELFSection(".data.rel.ro.local", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC |ELF::SHF_WRITE,
- SectionKind::getReadOnlyWithRelLocal());
-
- MergeableConst4Section =
- getContext().getELFSection(".rodata.cst4", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC |ELF::SHF_MERGE,
- SectionKind::getMergeableConst4());
-
- MergeableConst8Section =
- getContext().getELFSection(".rodata.cst8", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC |ELF::SHF_MERGE,
- SectionKind::getMergeableConst8());
-
- MergeableConst16Section =
- getContext().getELFSection(".rodata.cst16", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC |ELF::SHF_MERGE,
- SectionKind::getMergeableConst16());
-
- StaticCtorSection =
- getContext().getELFSection(".ctors", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC |ELF::SHF_WRITE,
- SectionKind::getDataRel());
-
- StaticDtorSection =
- getContext().getELFSection(".dtors", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC |ELF::SHF_WRITE,
- SectionKind::getDataRel());
-
- // Exception Handling Sections.
-
- // FIXME: We're emitting LSDA info into a readonly section on ELF, even though
- // it contains relocatable pointers. In PIC mode, this is probably a big
- // runtime hit for C++ apps. Either the contents of the LSDA need to be
- // adjusted or this should be a data section.
- LSDASection =
- getContext().getELFSection(".gcc_except_table", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC,
- SectionKind::getReadOnly());
- // Debug Info Sections.
- DwarfAbbrevSection =
- getContext().getELFSection(".debug_abbrev", ELF::SHT_PROGBITS, 0,
- SectionKind::getMetadata());
- DwarfInfoSection =
- getContext().getELFSection(".debug_info", ELF::SHT_PROGBITS, 0,
- SectionKind::getMetadata());
- DwarfLineSection =
- getContext().getELFSection(".debug_line", ELF::SHT_PROGBITS, 0,
- SectionKind::getMetadata());
- DwarfFrameSection =
- getContext().getELFSection(".debug_frame", ELF::SHT_PROGBITS, 0,
- SectionKind::getMetadata());
- DwarfPubNamesSection =
- getContext().getELFSection(".debug_pubnames", ELF::SHT_PROGBITS, 0,
- SectionKind::getMetadata());
- DwarfPubTypesSection =
- getContext().getELFSection(".debug_pubtypes", ELF::SHT_PROGBITS, 0,
- SectionKind::getMetadata());
- DwarfStrSection =
- getContext().getELFSection(".debug_str", ELF::SHT_PROGBITS, 0,
- SectionKind::getMetadata());
- DwarfLocSection =
- getContext().getELFSection(".debug_loc", ELF::SHT_PROGBITS, 0,
- SectionKind::getMetadata());
- DwarfARangesSection =
- getContext().getELFSection(".debug_aranges", ELF::SHT_PROGBITS, 0,
- SectionKind::getMetadata());
- DwarfRangesSection =
- getContext().getELFSection(".debug_ranges", ELF::SHT_PROGBITS, 0,
- SectionKind::getMetadata());
- DwarfMacroInfoSection =
- getContext().getELFSection(".debug_macinfo", ELF::SHT_PROGBITS, 0,
- SectionKind::getMetadata());
-}
-
-const MCSection *TargetLoweringObjectFileELF::getEHFrameSection() const {
- return getContext().getELFSection(".eh_frame", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC,
- SectionKind::getDataRel());
-}
-
MCSymbol *
TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV,
Mangler *Mang,
@@ -493,221 +346,6 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
// MachO
//===----------------------------------------------------------------------===//
-TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO()
- : TargetLoweringObjectFile(),
- TLSDataSection(0),
- TLSBSSSection(0),
- TLSTLVSection(0),
- TLSThreadInitSection(0),
- CStringSection(0),
- UStringSection(0),
- TextCoalSection(0),
- ConstTextCoalSection(0),
- ConstDataSection(0),
- DataCoalSection(0),
- DataCommonSection(0),
- DataBSSSection(0),
- FourByteConstantSection(0),
- EightByteConstantSection(0),
- SixteenByteConstantSection(0),
- LazySymbolPointerSection(0),
- NonLazySymbolPointerSection(0) {
-}
-
-void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
- const TargetMachine &TM) {
- IsFunctionEHFrameSymbolPrivate = false;
- SupportsWeakOmittedEHFrame = false;
-
- // .comm doesn't support alignment before Leopard.
- Triple T(((LLVMTargetMachine&)TM).getTargetTriple());
- if (T.isMacOSX() && T.isMacOSXVersionLT(10, 5))
- CommDirectiveSupportsAlignment = false;
-
- TargetLoweringObjectFile::Initialize(Ctx, TM);
-
- TextSection // .text
- = getContext().getMachOSection("__TEXT", "__text",
- MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
- SectionKind::getText());
- DataSection // .data
- = getContext().getMachOSection("__DATA", "__data", 0,
- SectionKind::getDataRel());
-
- TLSDataSection // .tdata
- = getContext().getMachOSection("__DATA", "__thread_data",
- MCSectionMachO::S_THREAD_LOCAL_REGULAR,
- SectionKind::getDataRel());
- TLSBSSSection // .tbss
- = getContext().getMachOSection("__DATA", "__thread_bss",
- MCSectionMachO::S_THREAD_LOCAL_ZEROFILL,
- SectionKind::getThreadBSS());
-
- // TODO: Verify datarel below.
- TLSTLVSection // .tlv
- = getContext().getMachOSection("__DATA", "__thread_vars",
- MCSectionMachO::S_THREAD_LOCAL_VARIABLES,
- SectionKind::getDataRel());
-
- TLSThreadInitSection
- = getContext().getMachOSection("__DATA", "__thread_init",
- MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS,
- SectionKind::getDataRel());
-
- CStringSection // .cstring
- = getContext().getMachOSection("__TEXT", "__cstring",
- MCSectionMachO::S_CSTRING_LITERALS,
- SectionKind::getMergeable1ByteCString());
- UStringSection
- = getContext().getMachOSection("__TEXT","__ustring", 0,
- SectionKind::getMergeable2ByteCString());
- FourByteConstantSection // .literal4
- = getContext().getMachOSection("__TEXT", "__literal4",
- MCSectionMachO::S_4BYTE_LITERALS,
- SectionKind::getMergeableConst4());
- EightByteConstantSection // .literal8
- = getContext().getMachOSection("__TEXT", "__literal8",
- MCSectionMachO::S_8BYTE_LITERALS,
- SectionKind::getMergeableConst8());
-
- // ld_classic doesn't support .literal16 in 32-bit mode, and ld64 falls back
- // to using it in -static mode.
- SixteenByteConstantSection = 0;
- if (TM.getRelocationModel() != Reloc::Static &&
- TM.getTargetData()->getPointerSize() == 32)
- SixteenByteConstantSection = // .literal16
- getContext().getMachOSection("__TEXT", "__literal16",
- MCSectionMachO::S_16BYTE_LITERALS,
- SectionKind::getMergeableConst16());
-
- ReadOnlySection // .const
- = getContext().getMachOSection("__TEXT", "__const", 0,
- SectionKind::getReadOnly());
-
- TextCoalSection
- = getContext().getMachOSection("__TEXT", "__textcoal_nt",
- MCSectionMachO::S_COALESCED |
- MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
- SectionKind::getText());
- ConstTextCoalSection
- = getContext().getMachOSection("__TEXT", "__const_coal",
- MCSectionMachO::S_COALESCED,
- SectionKind::getReadOnly());
- ConstDataSection // .const_data
- = getContext().getMachOSection("__DATA", "__const", 0,
- SectionKind::getReadOnlyWithRel());
- DataCoalSection
- = getContext().getMachOSection("__DATA","__datacoal_nt",
- MCSectionMachO::S_COALESCED,
- SectionKind::getDataRel());
- DataCommonSection
- = getContext().getMachOSection("__DATA","__common",
- MCSectionMachO::S_ZEROFILL,
- SectionKind::getBSS());
- DataBSSSection
- = getContext().getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL,
- SectionKind::getBSS());
-
-
- LazySymbolPointerSection
- = getContext().getMachOSection("__DATA", "__la_symbol_ptr",
- MCSectionMachO::S_LAZY_SYMBOL_POINTERS,
- SectionKind::getMetadata());
- NonLazySymbolPointerSection
- = getContext().getMachOSection("__DATA", "__nl_symbol_ptr",
- MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
- SectionKind::getMetadata());
-
- if (TM.getRelocationModel() == Reloc::Static) {
- StaticCtorSection
- = getContext().getMachOSection("__TEXT", "__constructor", 0,
- SectionKind::getDataRel());
- StaticDtorSection
- = getContext().getMachOSection("__TEXT", "__destructor", 0,
- SectionKind::getDataRel());
- } else {
- StaticCtorSection
- = getContext().getMachOSection("__DATA", "__mod_init_func",
- MCSectionMachO::S_MOD_INIT_FUNC_POINTERS,
- SectionKind::getDataRel());
- StaticDtorSection
- = getContext().getMachOSection("__DATA", "__mod_term_func",
- MCSectionMachO::S_MOD_TERM_FUNC_POINTERS,
- SectionKind::getDataRel());
- }
-
- // Exception Handling.
- LSDASection = getContext().getMachOSection("__TEXT", "__gcc_except_tab", 0,
- SectionKind::getReadOnlyWithRel());
-
- if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6))
- CompactUnwindSection =
- getContext().getMachOSection("__LD", "__compact_unwind",
- MCSectionMachO::S_ATTR_DEBUG,
- SectionKind::getReadOnly());
-
- // Debug Information.
- DwarfAbbrevSection =
- getContext().getMachOSection("__DWARF", "__debug_abbrev",
- MCSectionMachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
- DwarfInfoSection =
- getContext().getMachOSection("__DWARF", "__debug_info",
- MCSectionMachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
- DwarfLineSection =
- getContext().getMachOSection("__DWARF", "__debug_line",
- MCSectionMachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
- DwarfFrameSection =
- getContext().getMachOSection("__DWARF", "__debug_frame",
- MCSectionMachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
- DwarfPubNamesSection =
- getContext().getMachOSection("__DWARF", "__debug_pubnames",
- MCSectionMachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
- DwarfPubTypesSection =
- getContext().getMachOSection("__DWARF", "__debug_pubtypes",
- MCSectionMachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
- DwarfStrSection =
- getContext().getMachOSection("__DWARF", "__debug_str",
- MCSectionMachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
- DwarfLocSection =
- getContext().getMachOSection("__DWARF", "__debug_loc",
- MCSectionMachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
- DwarfARangesSection =
- getContext().getMachOSection("__DWARF", "__debug_aranges",
- MCSectionMachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
- DwarfRangesSection =
- getContext().getMachOSection("__DWARF", "__debug_ranges",
- MCSectionMachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
- DwarfMacroInfoSection =
- getContext().getMachOSection("__DWARF", "__debug_macinfo",
- MCSectionMachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
- DwarfDebugInlineSection =
- getContext().getMachOSection("__DWARF", "__debug_inlined",
- MCSectionMachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
-
- TLSExtraDataSection = TLSTLVSection;
-}
-
-const MCSection *TargetLoweringObjectFileMachO::getEHFrameSection() const {
- return getContext().getMachOSection("__TEXT", "__eh_frame",
- MCSectionMachO::S_COALESCED |
- MCSectionMachO::S_ATTR_NO_TOC |
- MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS |
- MCSectionMachO::S_ATTR_LIVE_SUPPORT,
- SectionKind::getReadOnly());
-}
-
const MCSection *TargetLoweringObjectFileMachO::
getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
Mangler *Mang, const TargetMachine &TM) const {
@@ -905,183 +543,10 @@ getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
return SSym;
}
-unsigned TargetLoweringObjectFileMachO::getPersonalityEncoding() const {
- return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
-}
-
-unsigned TargetLoweringObjectFileMachO::getLSDAEncoding() const {
- return DW_EH_PE_pcrel;
-}
-
-unsigned TargetLoweringObjectFileMachO::getFDEEncoding(bool CFI) const {
- return DW_EH_PE_pcrel;
-}
-
-unsigned TargetLoweringObjectFileMachO::getTTypeEncoding() const {
- return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
-}
-
//===----------------------------------------------------------------------===//
// COFF
//===----------------------------------------------------------------------===//
-TargetLoweringObjectFileCOFF::TargetLoweringObjectFileCOFF()
- : TargetLoweringObjectFile(),
- DrectveSection(0),
- PDataSection(0),
- XDataSection(0) {
-}
-
-void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
- const TargetMachine &TM) {
- TargetLoweringObjectFile::Initialize(Ctx, TM);
- TextSection =
- getContext().getCOFFSection(".text",
- COFF::IMAGE_SCN_CNT_CODE |
- COFF::IMAGE_SCN_MEM_EXECUTE |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getText());
- DataSection =
- getContext().getCOFFSection(".data",
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ |
- COFF::IMAGE_SCN_MEM_WRITE,
- SectionKind::getDataRel());
- ReadOnlySection =
- getContext().getCOFFSection(".rdata",
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getReadOnly());
- StaticCtorSection =
- getContext().getCOFFSection(".ctors",
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ |
- COFF::IMAGE_SCN_MEM_WRITE,
- SectionKind::getDataRel());
- StaticDtorSection =
- getContext().getCOFFSection(".dtors",
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ |
- COFF::IMAGE_SCN_MEM_WRITE,
- SectionKind::getDataRel());
-
- // FIXME: We're emitting LSDA info into a readonly section on COFF, even
- // though it contains relocatable pointers. In PIC mode, this is probably a
- // big runtime hit for C++ apps. Either the contents of the LSDA need to be
- // adjusted or this should be a data section.
- LSDASection =
- getContext().getCOFFSection(".gcc_except_table",
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getReadOnly());
- // Debug info.
- DwarfAbbrevSection =
- getContext().getCOFFSection(".debug_abbrev",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfInfoSection =
- getContext().getCOFFSection(".debug_info",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfLineSection =
- getContext().getCOFFSection(".debug_line",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfFrameSection =
- getContext().getCOFFSection(".debug_frame",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfPubNamesSection =
- getContext().getCOFFSection(".debug_pubnames",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfPubTypesSection =
- getContext().getCOFFSection(".debug_pubtypes",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfStrSection =
- getContext().getCOFFSection(".debug_str",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfLocSection =
- getContext().getCOFFSection(".debug_loc",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfARangesSection =
- getContext().getCOFFSection(".debug_aranges",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfRangesSection =
- getContext().getCOFFSection(".debug_ranges",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfMacroInfoSection =
- getContext().getCOFFSection(".debug_macinfo",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
-
- DrectveSection =
- getContext().getCOFFSection(".drectve",
- COFF::IMAGE_SCN_LNK_INFO,
- SectionKind::getMetadata());
-
- PDataSection =
- getContext().getCOFFSection(".pdata",
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ |
- COFF::IMAGE_SCN_MEM_WRITE,
- SectionKind::getDataRel());
-
- XDataSection =
- getContext().getCOFFSection(".xdata",
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ |
- COFF::IMAGE_SCN_MEM_WRITE,
- SectionKind::getDataRel());
-}
-
-const MCSection *TargetLoweringObjectFileCOFF::getEHFrameSection() const {
- return getContext().getCOFFSection(".eh_frame",
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ |
- COFF::IMAGE_SCN_MEM_WRITE,
- SectionKind::getDataRel());
-}
-
-const MCSection *TargetLoweringObjectFileCOFF::getWin64EHFuncTableSection(
- StringRef suffix) const {
- if (suffix == "")
- return PDataSection;
- return getContext().getCOFFSection((".pdata"+suffix).str(),
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ |
- COFF::IMAGE_SCN_MEM_WRITE,
- SectionKind::getDataRel());
-}
-
-const MCSection *TargetLoweringObjectFileCOFF::getWin64EHTableSection(
- StringRef suffix) const {
- if (suffix == "")
- return XDataSection;
- return getContext().getCOFFSection((".xdata"+suffix).str(),
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ |
- COFF::IMAGE_SCN_MEM_WRITE,
- SectionKind::getDataRel());
-}
-
-
static unsigned
getCOFFSectionFlags(SectionKind K) {
unsigned Flags = 0;
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 6d6244e..d879378 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -177,6 +177,10 @@ char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
MachineInstr *MI, unsigned SavedReg,
MachineBasicBlock::iterator OldPos) {
+ // FIXME: Shouldn't we be trying to do this before we three-addressify the
+ // instruction? After this transformation is done, we no longer need
+ // the instruction to be in three-address form.
+
// Check if it's safe to move this instruction.
bool SeenStore = true; // Be conservative.
if (!MI->isSafeToMove(TII, AA, SeenStore))
@@ -217,7 +221,11 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
break;
}
- if (!KillMI || KillMI->getParent() != MBB || KillMI == MI)
+ // If we find the instruction that kills SavedReg, and it is in an
+ // appropriate location, we can try to sink the current instruction
+ // past it.
+ if (!KillMI || KillMI->getParent() != MBB || KillMI == MI ||
+ KillMI->getDesc().isTerminator())
return false;
// If any of the definitions are used by another instruction between the
@@ -1041,6 +1049,9 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** Function: "
<< MF.getFunction()->getName() << '\n');
+ // This pass takes the function out of SSA form.
+ MRI->leaveSSA();
+
// ReMatRegs - Keep track of the registers whose def's are remat'ed.
BitVector ReMatRegs(MRI->getNumVirtRegs());
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 7557979..8a1cdc0 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -41,8 +41,8 @@
#include <algorithm>
using namespace llvm;
-STATISTIC(NumSpills , "Number of register spills");
-STATISTIC(NumIdCopies, "Number of identity moves eliminated after rewriting");
+STATISTIC(NumSpillSlots, "Number of spill slots allocated");
+STATISTIC(NumIdCopies, "Number of identity moves eliminated after rewriting");
//===----------------------------------------------------------------------===//
// VirtRegMap implementation
@@ -111,6 +111,7 @@ unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
unsigned Idx = SS-LowSpillSlot;
while (Idx >= SpillSlotToUsesMap.size())
SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2);
+ ++NumSpillSlots;
return SS;
}
@@ -130,7 +131,6 @@ int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) {
assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
"attempt to assign stack slot to already spilled register");
const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg);
- ++NumSpills;
return Virt2StackSlotMap[virtReg] = createSpillSlot(RC);
}
@@ -285,14 +285,24 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) {
// Preserve semantics of sub-register operands.
if (MO.getSubReg()) {
// A virtual register kill refers to the whole register, so we may
- // have to add <imp-use,kill> operands for the super-register.
- if (MO.isUse()) {
- if (MO.isKill() && !MO.isUndef())
- SuperKills.push_back(PhysReg);
- } else if (MO.isDead())
- SuperDeads.push_back(PhysReg);
- else
- SuperDefs.push_back(PhysReg);
+ // have to add <imp-use,kill> operands for the super-register. A
+ // partial redef always kills and redefines the super-register.
+ if (MO.readsReg() && (MO.isDef() || MO.isKill()))
+ SuperKills.push_back(PhysReg);
+
+ if (MO.isDef()) {
+ // The <def,undef> flag only makes sense for sub-register defs, and
+ // we are substituting a full physreg. An <imp-use,kill> operand
+ // from the SuperKills list will represent the partial read of the
+ // super-register.
+ MO.setIsUndef(false);
+
+ // Also add implicit defs for the super-register.
+ if (MO.isDead())
+ SuperDeads.push_back(PhysReg);
+ else
+ SuperDefs.push_back(PhysReg);
+ }
// PhysReg operands cannot have subregister indexes.
PhysReg = TRI->getSubReg(PhysReg, MO.getSubReg());
diff --git a/lib/CompilerDriver/Action.cpp b/lib/CompilerDriver/Action.cpp
deleted file mode 100644
index a8d625c..0000000
--- a/lib/CompilerDriver/Action.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-//===--- Action.cpp - The LLVM Compiler Driver ------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open
-// Source License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Action class - implementation and auxiliary functions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CompilerDriver/Action.h"
-#include "llvm/CompilerDriver/BuiltinOptions.h"
-#include "llvm/CompilerDriver/Error.h"
-#include "llvm/CompilerDriver/Main.h"
-
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/SystemUtils.h"
-#include "llvm/Support/Program.h"
-#include "llvm/Support/TimeValue.h"
-
-#include <stdexcept>
-#include <string>
-
-using namespace llvm;
-using namespace llvmc;
-
-namespace llvmc {
-
-extern const char* ProgramName;
-
-}
-
-namespace {
-
- void PrintString (const std::string& str) {
- errs() << str << ' ';
- }
-
- void PrintCommand (const std::string& Cmd, const StrVector& Args) {
- errs() << Cmd << ' ';
- std::for_each(Args.begin(), Args.end(), &PrintString);
- errs() << '\n';
- }
-
- bool IsSegmentationFault (int returnCode) {
-#ifdef LLVM_ON_WIN32
- return (returnCode >= 0xc0000000UL)
-#else
- return (returnCode < 0);
-#endif
- }
-
- int ExecuteProgram (const std::string& name, const StrVector& args) {
- sys::Path prog(name);
-
- if (sys::path::is_relative(prog.str())) {
- prog = PrependMainExecutablePath(name, ProgramName,
- (void *)(intptr_t)&Main);
-
- if (!prog.canExecute()) {
- prog = sys::Program::FindProgramByName(name);
- if (prog.isEmpty()) {
- PrintError("Can't find program '" + name + "'");
- return -1;
- }
- }
- }
- if (!prog.canExecute()) {
- PrintError("Program '" + name + "' is not executable.");
- return -1;
- }
-
- // Build the command line vector and the redirects array.
- const sys::Path* redirects[3] = {0,0,0};
- sys::Path stdout_redirect;
-
- std::vector<const char*> argv;
- argv.reserve((args.size()+2));
- argv.push_back(name.c_str());
-
- for (StrVector::const_iterator B = args.begin(), E = args.end();
- B!=E; ++B) {
- if (*B == ">") {
- ++B;
- stdout_redirect.set(*B);
- redirects[1] = &stdout_redirect;
- }
- else {
- argv.push_back((*B).c_str());
- }
- }
- argv.push_back(0); // null terminate list.
-
- // Invoke the program.
- int ret = sys::Program::ExecuteAndWait(prog, &argv[0], 0, &redirects[0]);
-
- if (IsSegmentationFault(ret)) {
- errs() << "Segmentation fault: ";
- PrintCommand(name, args);
- }
-
- return ret;
- }
-}
-
-namespace llvmc {
- void AppendToGlobalTimeLog (const std::string& cmd, double time);
-}
-
-int llvmc::Action::Execute () const {
- if (DryRun || VerboseMode)
- PrintCommand(Command_, Args_);
-
- if (!DryRun) {
- if (Time) {
- sys::TimeValue now = sys::TimeValue::now();
- int ret = ExecuteProgram(Command_, Args_);
- sys::TimeValue now2 = sys::TimeValue::now();
- now2 -= now;
- double elapsed = now2.seconds() + now2.microseconds() / 1000000.0;
- AppendToGlobalTimeLog(Command_, elapsed);
-
- return ret;
- }
- else {
- return ExecuteProgram(Command_, Args_);
- }
- }
-
- return 0;
-}
diff --git a/lib/CompilerDriver/BuiltinOptions.cpp b/lib/CompilerDriver/BuiltinOptions.cpp
deleted file mode 100644
index 3844203..0000000
--- a/lib/CompilerDriver/BuiltinOptions.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-//===--- BuiltinOptions.cpp - The LLVM Compiler Driver ----------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open
-// Source License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Definitions of all global command-line option variables.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CompilerDriver/BuiltinOptions.h"
-
-#ifdef ENABLE_LLVMC_DYNAMIC_PLUGINS
-#include "llvm/Support/PluginLoader.h"
-#endif
-
-namespace cl = llvm::cl;
-
-namespace llvmc {
-
-cl::list<std::string> InputFilenames(cl::Positional, cl::desc("<input file>"),
- cl::ZeroOrMore);
-cl::opt<std::string> OutputFilename("o", cl::desc("Output file name"),
- cl::value_desc("file"), cl::Prefix);
-cl::opt<std::string> TempDirname("temp-dir", cl::desc("Temp dir name"),
- cl::value_desc("<directory>"), cl::Prefix);
-cl::list<std::string> Languages("x",
- cl::desc("Specify the language of the following input files"),
- cl::ZeroOrMore);
-
-cl::opt<bool> DryRun("dry-run",
- cl::desc("Only pretend to run commands"));
-cl::opt<bool> Time("time", cl::desc("Time individual commands"));
-cl::opt<bool> VerboseMode("v",
- cl::desc("Enable verbose mode"));
-
-cl::opt<bool> CheckGraph("check-graph",
- cl::desc("Check the compilation graph for errors"),
- cl::Hidden);
-cl::opt<bool> WriteGraph("write-graph",
- cl::desc("Write compilation-graph.dot file"),
- cl::Hidden);
-cl::opt<bool> ViewGraph("view-graph",
- cl::desc("Show compilation graph in GhostView"),
- cl::Hidden);
-
-cl::opt<SaveTempsEnum::Values> SaveTemps
-("save-temps", cl::desc("Keep temporary files"),
- cl::init(SaveTempsEnum::Unset),
- cl::values(clEnumValN(SaveTempsEnum::Obj, "obj",
- "Save files in the directory specified with -o"),
- clEnumValN(SaveTempsEnum::Cwd, "cwd",
- "Use current working directory"),
- clEnumValN(SaveTempsEnum::Obj, "", "Same as 'cwd'"),
- clEnumValEnd),
- cl::ValueOptional);
-
-} // End namespace llvmc.
diff --git a/lib/CompilerDriver/CMakeLists.txt b/lib/CompilerDriver/CMakeLists.txt
deleted file mode 100644
index a12b337..0000000
--- a/lib/CompilerDriver/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-set(LLVM_LINK_COMPONENTS support)
-
-# We don't want this library to appear in `llvm-config --libs` output,
-# so its name doesn't start with "LLVM".
-
-add_llvm_library(CompilerDriver
- Action.cpp
- BuiltinOptions.cpp
- CompilationGraph.cpp
- Main.cpp
- Tool.cpp
- )
diff --git a/lib/CompilerDriver/CompilationGraph.cpp b/lib/CompilerDriver/CompilationGraph.cpp
deleted file mode 100644
index 33c6566..0000000
--- a/lib/CompilerDriver/CompilationGraph.cpp
+++ /dev/null
@@ -1,655 +0,0 @@
-//===--- CompilationGraph.cpp - The LLVM Compiler Driver --------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open
-// Source License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Compilation graph - implementation.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CompilerDriver/BuiltinOptions.h"
-#include "llvm/CompilerDriver/CompilationGraph.h"
-#include "llvm/CompilerDriver/Error.h"
-
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/DOTGraphTraits.h"
-#include "llvm/Support/GraphWriter.h"
-#include "llvm/Support/raw_ostream.h"
-
-#include <algorithm>
-#include <cstring>
-#include <iterator>
-#include <limits>
-#include <queue>
-
-using namespace llvm;
-using namespace llvmc;
-
-namespace llvmc {
-
- const std::string* LanguageMap::GetLanguage(const sys::Path& File) const {
- // Remove the '.'.
- StringRef suf = sys::path::extension(File.str()).substr(1);
- LanguageMap::const_iterator Lang =
- this->find(suf.empty() ? "*empty*" : suf);
- if (Lang == this->end()) {
- PrintError("File '" + File.str() + "' has unknown suffix '"
- + suf.str() + '\'');
- return 0;
- }
- return &Lang->second;
- }
-}
-
-namespace {
-
- /// ChooseEdge - Return the edge with the maximum weight. Returns 0 on error.
- template <class C>
- const Edge* ChooseEdge(const C& EdgesContainer,
- const InputLanguagesSet& InLangs,
- const std::string& NodeName = "root") {
- const Edge* MaxEdge = 0;
- int MaxWeight = 0;
- bool SingleMax = true;
-
- // TODO: fix calculation of SingleMax.
- for (typename C::const_iterator B = EdgesContainer.begin(),
- E = EdgesContainer.end(); B != E; ++B) {
- const Edge* e = B->getPtr();
- int EW = e->Weight(InLangs);
- if (EW < 0) {
- // (error) invocation in TableGen -> we don't need to print an error
- // message.
- return 0;
- }
- if (EW > MaxWeight) {
- MaxEdge = e;
- MaxWeight = EW;
- SingleMax = true;
- } else if (EW == MaxWeight) {
- SingleMax = false;
- }
- }
-
- if (!SingleMax) {
- PrintError("Node " + NodeName + ": multiple maximal outward edges found!"
- " Most probably a specification error.");
- return 0;
- }
- if (!MaxEdge) {
- PrintError("Node " + NodeName + ": no maximal outward edge found!"
- " Most probably a specification error.");
- return 0;
- }
- return MaxEdge;
- }
-
-}
-
-void Node::AddEdge(Edge* Edg) {
- // If there already was an edge between two nodes, modify it instead
- // of adding a new edge.
- const std::string& ToolName = Edg->ToolName();
- for (container_type::iterator B = OutEdges.begin(), E = OutEdges.end();
- B != E; ++B) {
- if ((*B)->ToolName() == ToolName) {
- llvm::IntrusiveRefCntPtr<Edge>(Edg).swap(*B);
- return;
- }
- }
- OutEdges.push_back(llvm::IntrusiveRefCntPtr<Edge>(Edg));
-}
-
-CompilationGraph::CompilationGraph() {
- NodesMap["root"] = Node(this);
-}
-
-Node* CompilationGraph::getNode(const std::string& ToolName) {
- nodes_map_type::iterator I = NodesMap.find(ToolName);
- if (I == NodesMap.end()) {
- PrintError("Node " + ToolName + " is not in the graph");
- return 0;
- }
- return &I->second;
-}
-
-const Node* CompilationGraph::getNode(const std::string& ToolName) const {
- nodes_map_type::const_iterator I = NodesMap.find(ToolName);
- if (I == NodesMap.end()) {
- PrintError("Node " + ToolName + " is not in the graph!");
- return 0;
- }
- return &I->second;
-}
-
-// Find the tools list corresponding to the given language name.
-const CompilationGraph::tools_vector_type*
-CompilationGraph::getToolsVector(const std::string& LangName) const
-{
- tools_map_type::const_iterator I = ToolsMap.find(LangName);
- if (I == ToolsMap.end()) {
- PrintError("No tool corresponding to the language " + LangName + " found");
- return 0;
- }
- return &I->second;
-}
-
-void CompilationGraph::insertNode(Tool* V) {
- if (NodesMap.count(V->Name()) == 0)
- NodesMap[V->Name()] = Node(this, V);
-}
-
-int CompilationGraph::insertEdge(const std::string& A, Edge* Edg) {
- Node* B = getNode(Edg->ToolName());
- if (B == 0)
- return 1;
-
- if (A == "root") {
- const char** InLangs = B->ToolPtr->InputLanguages();
- for (;*InLangs; ++InLangs)
- ToolsMap[*InLangs].push_back(IntrusiveRefCntPtr<Edge>(Edg));
- NodesMap["root"].AddEdge(Edg);
- }
- else {
- Node* N = getNode(A);
- if (N == 0)
- return 1;
-
- N->AddEdge(Edg);
- }
- // Increase the inward edge counter.
- B->IncrInEdges();
-
- return 0;
-}
-
-// Pass input file through the chain until we bump into a Join node or
-// a node that says that it is the last.
-int CompilationGraph::PassThroughGraph (const sys::Path& InFile,
- const Node* StartNode,
- const InputLanguagesSet& InLangs,
- const sys::Path& TempDir,
- const LanguageMap& LangMap) const {
- sys::Path In = InFile;
- const Node* CurNode = StartNode;
-
- while(true) {
- Tool* CurTool = CurNode->ToolPtr.getPtr();
-
- if (CurTool->IsJoin()) {
- JoinTool& JT = static_cast<JoinTool&>(*CurTool);
- JT.AddToJoinList(In);
- break;
- }
-
- Action CurAction;
- if (int ret = CurTool->GenerateAction(CurAction, In, CurNode->HasChildren(),
- TempDir, InLangs, LangMap)) {
- return ret;
- }
-
- if (int ret = CurAction.Execute())
- return ret;
-
- if (CurAction.StopCompilation())
- return 0;
-
- const Edge* Edg = ChooseEdge(CurNode->OutEdges, InLangs, CurNode->Name());
- if (Edg == 0)
- return 1;
-
- CurNode = getNode(Edg->ToolName());
- if (CurNode == 0)
- return 1;
-
- In = CurAction.OutFile();
- }
-
- return 0;
-}
-
-// Find the head of the toolchain corresponding to the given file.
-// Also, insert an input language into InLangs.
-const Node* CompilationGraph::
-FindToolChain(const sys::Path& In, const std::string* ForceLanguage,
- InputLanguagesSet& InLangs, const LanguageMap& LangMap) const {
-
- // Determine the input language.
- const std::string* InLang = (ForceLanguage ? ForceLanguage
- : LangMap.GetLanguage(In));
- if (InLang == 0)
- return 0;
- const std::string& InLanguage = *InLang;
-
- // Add the current input language to the input language set.
- InLangs.insert(InLanguage);
-
- // Find the toolchain for the input language.
- const tools_vector_type* pTV = getToolsVector(InLanguage);
- if (pTV == 0)
- return 0;
-
- const tools_vector_type& TV = *pTV;
- if (TV.empty()) {
- PrintError("No toolchain corresponding to language "
- + InLanguage + " found");
- return 0;
- }
-
- const Edge* Edg = ChooseEdge(TV, InLangs);
- if (Edg == 0)
- return 0;
-
- return getNode(Edg->ToolName());
-}
-
-// Helper function used by Build().
-// Traverses initial portions of the toolchains (up to the first Join node).
-// This function is also responsible for handling the -x option.
-int CompilationGraph::BuildInitial (InputLanguagesSet& InLangs,
- const sys::Path& TempDir,
- const LanguageMap& LangMap) {
- // This is related to -x option handling.
- cl::list<std::string>::const_iterator xIter = Languages.begin(),
- xBegin = xIter, xEnd = Languages.end();
- bool xEmpty = true;
- const std::string* xLanguage = 0;
- unsigned xPos = 0, xPosNext = 0, filePos = 0;
-
- if (xIter != xEnd) {
- xEmpty = false;
- xPos = Languages.getPosition(xIter - xBegin);
- cl::list<std::string>::const_iterator xNext = llvm::next(xIter);
- xPosNext = (xNext == xEnd) ? std::numeric_limits<unsigned>::max()
- : Languages.getPosition(xNext - xBegin);
- xLanguage = (*xIter == "none") ? 0 : &(*xIter);
- }
-
- // For each input file:
- for (cl::list<std::string>::const_iterator B = InputFilenames.begin(),
- CB = B, E = InputFilenames.end(); B != E; ++B) {
- sys::Path In = sys::Path(*B);
-
- // Code for handling the -x option.
- // Output: std::string* xLanguage (can be NULL).
- if (!xEmpty) {
- filePos = InputFilenames.getPosition(B - CB);
-
- if (xPos < filePos) {
- if (filePos < xPosNext) {
- xLanguage = (*xIter == "none") ? 0 : &(*xIter);
- }
- else { // filePos >= xPosNext
- // Skip xIters while filePos > xPosNext
- while (filePos > xPosNext) {
- ++xIter;
- xPos = xPosNext;
-
- cl::list<std::string>::const_iterator xNext = llvm::next(xIter);
- if (xNext == xEnd)
- xPosNext = std::numeric_limits<unsigned>::max();
- else
- xPosNext = Languages.getPosition(xNext - xBegin);
- xLanguage = (*xIter == "none") ? 0 : &(*xIter);
- }
- }
- }
- }
-
- // Find the toolchain corresponding to this file.
- const Node* N = FindToolChain(In, xLanguage, InLangs, LangMap);
- if (N == 0)
- return 1;
- // Pass file through the chain starting at head.
- if (int ret = PassThroughGraph(In, N, InLangs, TempDir, LangMap))
- return ret;
- }
-
- return 0;
-}
-
-// Sort the nodes in topological order.
-int CompilationGraph::TopologicalSort(std::vector<const Node*>& Out) {
- std::queue<const Node*> Q;
-
- Node* Root = getNode("root");
- if (Root == 0)
- return 1;
-
- Q.push(Root);
-
- while (!Q.empty()) {
- const Node* A = Q.front();
- Q.pop();
- Out.push_back(A);
- for (Node::const_iterator EB = A->EdgesBegin(), EE = A->EdgesEnd();
- EB != EE; ++EB) {
- Node* B = getNode((*EB)->ToolName());
- if (B == 0)
- return 1;
-
- B->DecrInEdges();
- if (B->HasNoInEdges())
- Q.push(B);
- }
- }
-
- return 0;
-}
-
-namespace {
- bool NotJoinNode(const Node* N) {
- return N->ToolPtr ? !N->ToolPtr->IsJoin() : true;
- }
-}
-
-// Call TopologicalSort and filter the resulting list to include
-// only Join nodes.
-int CompilationGraph::
-TopologicalSortFilterJoinNodes(std::vector<const Node*>& Out) {
- std::vector<const Node*> TopSorted;
- if (int ret = TopologicalSort(TopSorted))
- return ret;
- std::remove_copy_if(TopSorted.begin(), TopSorted.end(),
- std::back_inserter(Out), NotJoinNode);
-
- return 0;
-}
-
-int CompilationGraph::Build (const sys::Path& TempDir,
- const LanguageMap& LangMap) {
- InputLanguagesSet InLangs;
- bool WasSomeActionGenerated = !InputFilenames.empty();
-
- // Traverse initial parts of the toolchains and fill in InLangs.
- if (int ret = BuildInitial(InLangs, TempDir, LangMap))
- return ret;
-
- std::vector<const Node*> JTV;
- if (int ret = TopologicalSortFilterJoinNodes(JTV))
- return ret;
-
- // For all join nodes in topological order:
- for (std::vector<const Node*>::iterator B = JTV.begin(), E = JTV.end();
- B != E; ++B) {
-
- const Node* CurNode = *B;
- JoinTool* JT = &static_cast<JoinTool&>(*CurNode->ToolPtr.getPtr());
-
- // Are there any files in the join list?
- if (JT->JoinListEmpty() && !(JT->WorksOnEmpty() && InputFilenames.empty()))
- continue;
-
- WasSomeActionGenerated = true;
- Action CurAction;
- if (int ret = JT->GenerateAction(CurAction, CurNode->HasChildren(),
- TempDir, InLangs, LangMap)) {
- return ret;
- }
-
- if (int ret = CurAction.Execute())
- return ret;
-
- if (CurAction.StopCompilation())
- return 0;
-
- const Edge* Edg = ChooseEdge(CurNode->OutEdges, InLangs, CurNode->Name());
- if (Edg == 0)
- return 1;
-
- const Node* NextNode = getNode(Edg->ToolName());
- if (NextNode == 0)
- return 1;
-
- if (int ret = PassThroughGraph(sys::Path(CurAction.OutFile()), NextNode,
- InLangs, TempDir, LangMap)) {
- return ret;
- }
- }
-
- if (!WasSomeActionGenerated) {
- PrintError("no input files");
- return 1;
- }
-
- return 0;
-}
-
-int CompilationGraph::CheckLanguageNames() const {
- int ret = 0;
-
- // Check that names for output and input languages on all edges do match.
- for (const_nodes_iterator B = this->NodesMap.begin(),
- E = this->NodesMap.end(); B != E; ++B) {
-
- const Node & N1 = B->second;
- if (N1.ToolPtr) {
- for (Node::const_iterator EB = N1.EdgesBegin(), EE = N1.EdgesEnd();
- EB != EE; ++EB) {
- const Node* N2 = this->getNode((*EB)->ToolName());
- if (N2 == 0)
- return 1;
-
- if (!N2->ToolPtr) {
- ++ret;
- errs() << "Error: there is an edge from '" << N1.ToolPtr->Name()
- << "' back to the root!\n\n";
- continue;
- }
-
- const char** OutLangs = N1.ToolPtr->OutputLanguages();
- const char** InLangs = N2->ToolPtr->InputLanguages();
- bool eq = false;
- const char* OutLang = 0;
- for (;*OutLangs; ++OutLangs) {
- OutLang = *OutLangs;
- for (;*InLangs; ++InLangs) {
- if (std::strcmp(OutLang, *InLangs) == 0) {
- eq = true;
- break;
- }
- }
- }
-
- if (!eq) {
- ++ret;
- errs() << "Error: Output->input language mismatch in the edge '"
- << N1.ToolPtr->Name() << "' -> '" << N2->ToolPtr->Name()
- << "'!\n"
- << "Expected one of { ";
-
- InLangs = N2->ToolPtr->InputLanguages();
- for (;*InLangs; ++InLangs) {
- errs() << '\'' << *InLangs << (*(InLangs+1) ? "', " : "'");
- }
-
- errs() << " }, but got '" << OutLang << "'!\n\n";
- }
-
- }
- }
- }
-
- return ret;
-}
-
-int CompilationGraph::CheckMultipleDefaultEdges() const {
- int ret = 0;
- InputLanguagesSet Dummy;
-
- // For all nodes, just iterate over the outgoing edges and check if there is
- // more than one edge with maximum weight.
- for (const_nodes_iterator B = this->NodesMap.begin(),
- E = this->NodesMap.end(); B != E; ++B) {
- const Node& N = B->second;
- int MaxWeight = -1024;
-
- // Ignore the root node.
- if (!N.ToolPtr)
- continue;
-
- for (Node::const_iterator EB = N.EdgesBegin(), EE = N.EdgesEnd();
- EB != EE; ++EB) {
- int EdgeWeight = (*EB)->Weight(Dummy);
- if (EdgeWeight > MaxWeight) {
- MaxWeight = EdgeWeight;
- }
- else if (EdgeWeight == MaxWeight) {
- ++ret;
- errs() << "Error: there are multiple maximal edges stemming from the '"
- << N.ToolPtr->Name() << "' node!\n\n";
- break;
- }
- }
- }
-
- return ret;
-}
-
-int CompilationGraph::CheckCycles() {
- unsigned deleted = 0;
- std::queue<Node*> Q;
-
- Node* Root = getNode("root");
- if (Root == 0)
- return 1;
-
- Q.push(Root);
-
- // Try to delete all nodes that have no ingoing edges, starting from the
- // root. If there are any nodes left after this operation, then we have a
- // cycle. This relies on '--check-graph' not performing the topological sort.
- while (!Q.empty()) {
- Node* A = Q.front();
- Q.pop();
- ++deleted;
-
- for (Node::iterator EB = A->EdgesBegin(), EE = A->EdgesEnd();
- EB != EE; ++EB) {
- Node* B = getNode((*EB)->ToolName());
- if (B == 0)
- return 1;
-
- B->DecrInEdges();
- if (B->HasNoInEdges())
- Q.push(B);
- }
- }
-
- if (deleted != NodesMap.size()) {
- errs() << "Error: there are cycles in the compilation graph!\n"
- << "Try inspecting the diagram produced by "
- << "'llvmc --view-graph'.\n\n";
- return 1;
- }
-
- return 0;
-}
-
-int CompilationGraph::Check () {
- // We try to catch as many errors as we can in one go.
- int errs = 0;
- int ret = 0;
-
- // Check that output/input language names match.
- ret = this->CheckLanguageNames();
- if (ret < 0)
- return 1;
- errs += ret;
-
- // Check for multiple default edges.
- ret = this->CheckMultipleDefaultEdges();
- if (ret < 0)
- return 1;
- errs += ret;
-
- // Check for cycles.
- ret = this->CheckCycles();
- if (ret < 0)
- return 1;
- errs += ret;
-
- return errs;
-}
-
-// Code related to graph visualization.
-
-namespace {
-
-std::string SquashStrArray (const char** StrArr) {
- std::string ret;
-
- for (; *StrArr; ++StrArr) {
- if (*(StrArr + 1)) {
- ret += *StrArr;
- ret += ", ";
- }
- else {
- ret += *StrArr;
- }
- }
-
- return ret;
-}
-
-} // End anonymous namespace.
-
-namespace llvm {
- template <>
- struct DOTGraphTraits<llvmc::CompilationGraph*>
- : public DefaultDOTGraphTraits
- {
- DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
-
- template<typename GraphType>
- static std::string getNodeLabel(const Node* N, const GraphType&)
- {
- if (N->ToolPtr)
- if (N->ToolPtr->IsJoin())
- return N->Name() + "\n (join" +
- (N->HasChildren() ? ")"
- : std::string(": ") +
- SquashStrArray(N->ToolPtr->OutputLanguages()) + ')');
- else
- return N->Name();
- else
- return "root";
- }
-
- template<typename EdgeIter>
- static std::string getEdgeSourceLabel(const Node* N, EdgeIter I) {
- if (N->ToolPtr) {
- return SquashStrArray(N->ToolPtr->OutputLanguages());
- }
- else {
- return SquashStrArray(I->ToolPtr->InputLanguages());
- }
- }
- };
-
-} // End namespace llvm
-
-int CompilationGraph::writeGraph(const std::string& OutputFilename) {
- std::string ErrorInfo;
- raw_fd_ostream O(OutputFilename.c_str(), ErrorInfo);
-
- if (ErrorInfo.empty()) {
- errs() << "Writing '"<< OutputFilename << "' file...";
- llvm::WriteGraph(O, this);
- errs() << "done.\n";
- }
- else {
- PrintError("Error opening file '" + OutputFilename + "' for writing!");
- return 1;
- }
-
- return 0;
-}
-
-void CompilationGraph::viewGraph() {
- llvm::ViewGraph(this, "compilation-graph");
-}
diff --git a/lib/CompilerDriver/Main.cpp b/lib/CompilerDriver/Main.cpp
deleted file mode 100644
index 7120027..0000000
--- a/lib/CompilerDriver/Main.cpp
+++ /dev/null
@@ -1,146 +0,0 @@
-//===--- Main.cpp - The LLVM Compiler Driver --------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open
-// Source License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// llvmc::Main function - driver entry point.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CompilerDriver/AutoGenerated.h"
-#include "llvm/CompilerDriver/BuiltinOptions.h"
-#include "llvm/CompilerDriver/CompilationGraph.h"
-#include "llvm/CompilerDriver/Error.h"
-
-#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Path.h"
-
-#include <sstream>
-#include <string>
-
-namespace cl = llvm::cl;
-namespace sys = llvm::sys;
-using namespace llvmc;
-
-namespace {
-
- std::stringstream* GlobalTimeLog;
-
- /// GetTempDir - Get the temporary directory location. Returns non-zero value
- /// on error.
- int GetTempDir(sys::Path& tempDir) {
- // The --temp-dir option.
- if (!TempDirname.empty()) {
- tempDir = TempDirname;
- }
- // GCC 4.5-style -save-temps handling.
- else if (SaveTemps == SaveTempsEnum::Unset) {
- tempDir = sys::Path::GetTemporaryDirectory();
- return 0;
- }
- else if (SaveTemps == SaveTempsEnum::Obj && !OutputFilename.empty()) {
- tempDir = sys::path::parent_path(OutputFilename);
- }
- else {
- // SaveTemps == Cwd --> use current dir (leave tempDir empty).
- return 0;
- }
-
- bool Exists;
- if (llvm::sys::fs::exists(tempDir.str(), Exists) || !Exists) {
- std::string ErrMsg;
- if (tempDir.createDirectoryOnDisk(true, &ErrMsg)) {
- PrintError(ErrMsg);
- return 1;
- }
- }
-
- return 0;
- }
-
- /// BuildTargets - A small wrapper for CompilationGraph::Build. Returns
- /// non-zero value in case of error.
- int BuildTargets(CompilationGraph& graph, const LanguageMap& langMap) {
- int ret;
- sys::Path tempDir;
- bool toDelete = (SaveTemps == SaveTempsEnum::Unset);
-
- if (int ret = GetTempDir(tempDir))
- return ret;
-
- ret = graph.Build(tempDir, langMap);
-
- if (toDelete)
- tempDir.eraseFromDisk(true);
-
- return ret;
- }
-}
-
-namespace llvmc {
-
-// Used to implement -time option. External linkage is intentional.
-void AppendToGlobalTimeLog(const std::string& cmd, double time) {
- *GlobalTimeLog << "# " << cmd << ' ' << time << '\n';
-}
-
-// Sometimes user code wants to access the argv[0] value.
-const char* ProgramName;
-
-int Main(int argc, char** argv) {
- int ret = 0;
- LanguageMap langMap;
- CompilationGraph graph;
-
- ProgramName = argv[0];
-
- cl::ParseCommandLineOptions
- (argc, argv,
- /* Overview = */ "LLVM Compiler Driver (Work In Progress)",
- /* ReadResponseFiles = */ false);
-
- if (int ret = autogenerated::RunInitialization(langMap, graph))
- return ret;
-
- if (CheckGraph) {
- ret = graph.Check();
- if (!ret)
- llvm::errs() << "check-graph: no errors found.\n";
-
- return ret;
- }
-
- if (ViewGraph) {
- graph.viewGraph();
- if (!WriteGraph)
- return 0;
- }
-
- if (WriteGraph) {
- const std::string& Out = (OutputFilename.empty()
- ? std::string("compilation-graph.dot")
- : OutputFilename);
- return graph.writeGraph(Out);
- }
-
- if (Time) {
- GlobalTimeLog = new std::stringstream;
- GlobalTimeLog->precision(2);
- }
-
- ret = BuildTargets(graph, langMap);
-
- if (Time) {
- llvm::errs() << GlobalTimeLog->str();
- delete GlobalTimeLog;
- }
-
- return ret;
-}
-
-} // end namespace llvmc
diff --git a/lib/CompilerDriver/Makefile b/lib/CompilerDriver/Makefile
deleted file mode 100644
index 10cfa4f..0000000
--- a/lib/CompilerDriver/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-##===- lib/CompilerDriver/Makefile -------------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open
-# Source License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../..
-
-# We don't want this library to appear in `llvm-config --libs` output, so its
-# name doesn't start with "LLVM" and NO_LLVM_CONFIG is set.
-
-LIBRARYNAME = CompilerDriver
-LINK_COMPONENTS = support
-NO_LLVM_CONFIG = 1
-
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/CompilerDriver/Tool.cpp b/lib/CompilerDriver/Tool.cpp
deleted file mode 100644
index 876759a..0000000
--- a/lib/CompilerDriver/Tool.cpp
+++ /dev/null
@@ -1,95 +0,0 @@
-//===--- Tool.cpp - The LLVM Compiler Driver --------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open
-// Source License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Tool base class - implementation details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CompilerDriver/BuiltinOptions.h"
-#include "llvm/CompilerDriver/Tool.h"
-
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/Path.h"
-
-#include <algorithm>
-
-using namespace llvm;
-using namespace llvmc;
-
-namespace {
- sys::Path MakeTempFile(const sys::Path& TempDir, const std::string& BaseName,
- const std::string& Suffix) {
- sys::Path Out;
-
- // Make sure we don't end up with path names like '/file.o' if the
- // TempDir is empty.
- if (TempDir.empty()) {
- Out.set(BaseName);
- }
- else {
- Out = TempDir;
- Out.appendComponent(BaseName);
- }
- Out.appendSuffix(Suffix);
- // NOTE: makeUnique always *creates* a unique temporary file,
- // which is good, since there will be no races. However, some
- // tools do not like it when the output file already exists, so
- // they need to be placated with -f or something like that.
- Out.makeUnique(true, NULL);
- return Out;
- }
-}
-
-sys::Path Tool::OutFilename(const sys::Path& In,
- const sys::Path& TempDir,
- bool StopCompilation,
- const char* OutputSuffix) const {
- sys::Path Out;
-
- if (StopCompilation) {
- if (!OutputFilename.empty()) {
- Out.set(OutputFilename);
- }
- else if (IsJoin()) {
- Out.set("a");
- Out.appendSuffix(OutputSuffix);
- }
- else {
- Out.set(sys::path::stem(In.str()));
- Out.appendSuffix(OutputSuffix);
- }
- }
- else {
- if (IsJoin())
- Out = MakeTempFile(TempDir, "tmp", OutputSuffix);
- else
- Out = MakeTempFile(TempDir, sys::path::stem(In.str()), OutputSuffix);
- }
- return Out;
-}
-
-namespace {
- template <class A, class B>
- bool CompareFirst (std::pair<A,B> p1, std::pair<A,B> p2) {
- return std::less<A>()(p1.first, p2.first);
- }
-}
-
-StrVector Tool::SortArgs(ArgsVector& Args) const {
- StrVector Out;
-
- // HACK: this won't be needed when we'll migrate away from CommandLine.
- std::stable_sort(Args.begin(), Args.end(),
- &CompareFirst<unsigned, std::string>);
- for (ArgsVector::iterator B = Args.begin(), E = Args.end(); B != E; ++B) {
- Out.push_back(B->second);
- }
-
- return Out;
-}
diff --git a/lib/DebugInfo/CMakeLists.txt b/lib/DebugInfo/CMakeLists.txt
new file mode 100644
index 0000000..fdffcb6
--- /dev/null
+++ b/lib/DebugInfo/CMakeLists.txt
@@ -0,0 +1,16 @@
+add_llvm_library(LLVMDebugInfo
+ DIContext.cpp
+ DWARFAbbreviationDeclaration.cpp
+ DWARFCompileUnit.cpp
+ DWARFContext.cpp
+ DWARFDebugAbbrev.cpp
+ DWARFDebugArangeSet.cpp
+ DWARFDebugAranges.cpp
+ DWARFDebugInfoEntry.cpp
+ DWARFDebugLine.cpp
+ DWARFFormValue.cpp
+ )
+
+add_llvm_library_dependencies(LLVMDebugInfo
+ LLVMSupport
+ )
diff --git a/lib/DebugInfo/DIContext.cpp b/lib/DebugInfo/DIContext.cpp
new file mode 100644
index 0000000..e2fd55f
--- /dev/null
+++ b/lib/DebugInfo/DIContext.cpp
@@ -0,0 +1,24 @@
+//===-- DIContext.cpp -----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/DIContext.h"
+#include "DWARFContext.h"
+using namespace llvm;
+
+DIContext::~DIContext() {}
+
+DIContext *DIContext::getDWARFContext(bool isLittleEndian,
+ StringRef infoSection,
+ StringRef abbrevSection,
+ StringRef aRangeSection,
+ StringRef lineSection,
+ StringRef stringSection) {
+ return new DWARFContextInMemory(isLittleEndian, infoSection, abbrevSection,
+ aRangeSection, lineSection, stringSection);
+}
diff --git a/lib/DebugInfo/DWARFAbbreviationDeclaration.cpp b/lib/DebugInfo/DWARFAbbreviationDeclaration.cpp
new file mode 100644
index 0000000..0df692c
--- /dev/null
+++ b/lib/DebugInfo/DWARFAbbreviationDeclaration.cpp
@@ -0,0 +1,83 @@
+//===-- DWARFAbbreviationDeclaration.cpp ----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFAbbreviationDeclaration.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+using namespace dwarf;
+
+bool
+DWARFAbbreviationDeclaration::extract(DataExtractor data, uint32_t* offset_ptr){
+ return extract(data, offset_ptr, data.getULEB128(offset_ptr));
+}
+
+bool
+DWARFAbbreviationDeclaration::extract(DataExtractor data, uint32_t* offset_ptr,
+ uint32_t code) {
+ Code = code;
+ Attributes.clear();
+ if (Code) {
+ Tag = data.getULEB128(offset_ptr);
+ HasChildren = data.getU8(offset_ptr);
+
+ while (data.isValidOffset(*offset_ptr)) {
+ uint16_t attr = data.getULEB128(offset_ptr);
+ uint16_t form = data.getULEB128(offset_ptr);
+
+ if (attr && form)
+ Attributes.push_back(DWARFAttribute(attr, form));
+ else
+ break;
+ }
+
+ return Tag != 0;
+ } else {
+ Tag = 0;
+ HasChildren = false;
+ }
+
+ return false;
+}
+
+void DWARFAbbreviationDeclaration::dump(raw_ostream &OS) const {
+ const char *tagString = TagString(getTag());
+ OS << '[' << getCode() << "] ";
+ if (tagString)
+ OS << tagString;
+ else
+ OS << format("DW_TAG_Unknown_%x", getTag());
+ OS << "\tDW_CHILDREN_" << (hasChildren() ? "yes" : "no") << '\n';
+ for (unsigned i = 0, e = Attributes.size(); i != e; ++i) {
+ OS << '\t';
+ const char *attrString = AttributeString(Attributes[i].getAttribute());
+ if (attrString)
+ OS << attrString;
+ else
+ OS << format("DW_AT_Unknown_%x", Attributes[i].getAttribute());
+ OS << '\t';
+ const char *formString = FormEncodingString(Attributes[i].getForm());
+ if (formString)
+ OS << formString;
+ else
+ OS << format("DW_FORM_Unknown_%x", Attributes[i].getForm());
+ OS << '\n';
+ }
+ OS << '\n';
+}
+
+uint32_t
+DWARFAbbreviationDeclaration::findAttributeIndex(uint16_t attr) const {
+ for (uint32_t i = 0, e = Attributes.size(); i != e; ++i) {
+ if (Attributes[i].getAttribute() == attr)
+ return i;
+ }
+ return -1U;
+}
diff --git a/lib/DebugInfo/DWARFAbbreviationDeclaration.h b/lib/DebugInfo/DWARFAbbreviationDeclaration.h
new file mode 100644
index 0000000..2463a3c
--- /dev/null
+++ b/lib/DebugInfo/DWARFAbbreviationDeclaration.h
@@ -0,0 +1,54 @@
+//===-- DWARFAbbreviationDeclaration.h --------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H
+#define LLVM_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H
+
+#include "DWARFAttribute.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DataExtractor.h"
+
+namespace llvm {
+
+class raw_ostream;
+
+class DWARFAbbreviationDeclaration {
+ uint32_t Code;
+ uint32_t Tag;
+ bool HasChildren;
+ SmallVector<DWARFAttribute, 8> Attributes;
+public:
+ enum { InvalidCode = 0 };
+ DWARFAbbreviationDeclaration()
+ : Code(InvalidCode), Tag(0), HasChildren(0) {}
+
+ uint32_t getCode() const { return Code; }
+ uint32_t getTag() const { return Tag; }
+ bool hasChildren() const { return HasChildren; }
+ uint32_t getNumAttributes() const { return Attributes.size(); }
+ uint16_t getAttrByIndex(uint32_t idx) const {
+ return Attributes.size() > idx ? Attributes[idx].getAttribute() : 0;
+ }
+ uint16_t getFormByIndex(uint32_t idx) const {
+ return Attributes.size() > idx ? Attributes[idx].getForm() : 0;
+ }
+
+ uint32_t findAttributeIndex(uint16_t attr) const;
+ bool extract(DataExtractor data, uint32_t* offset_ptr);
+ bool extract(DataExtractor data, uint32_t* offset_ptr, uint32_t code);
+ bool isValid() const { return Code != 0 && Tag != 0; }
+ void dump(raw_ostream &OS) const;
+ const SmallVectorImpl<DWARFAttribute> &getAttributes() const {
+ return Attributes;
+ }
+};
+
+}
+
+#endif
diff --git a/lib/DebugInfo/DWARFAttribute.h b/lib/DebugInfo/DWARFAttribute.h
new file mode 100644
index 0000000..6f49b63
--- /dev/null
+++ b/lib/DebugInfo/DWARFAttribute.h
@@ -0,0 +1,30 @@
+//===-- DWARFAttribute.h ----------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARFATTRIBUTE_H
+#define LLVM_DEBUGINFO_DWARFATTRIBUTE_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class DWARFAttribute {
+ uint16_t Attribute;
+ uint16_t Form;
+ public:
+ DWARFAttribute(uint16_t attr, uint16_t form)
+ : Attribute(attr), Form(form) {}
+
+ uint16_t getAttribute() const { return Attribute; }
+ uint16_t getForm() const { return Form; }
+};
+
+}
+
+#endif
diff --git a/lib/DebugInfo/DWARFCompileUnit.cpp b/lib/DebugInfo/DWARFCompileUnit.cpp
new file mode 100644
index 0000000..24bf97f
--- /dev/null
+++ b/lib/DebugInfo/DWARFCompileUnit.cpp
@@ -0,0 +1,238 @@
+//===-- DWARFCompileUnit.cpp ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFCompileUnit.h"
+#include "DWARFContext.h"
+#include "DWARFFormValue.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+using namespace dwarf;
+
+DataExtractor DWARFCompileUnit::getDebugInfoExtractor() const {
+ return DataExtractor(Context.getInfoSection(),
+ Context.isLittleEndian(), getAddressByteSize());
+}
+
+bool DWARFCompileUnit::extract(DataExtractor debug_info, uint32_t *offset_ptr) {
+ clear();
+
+ Offset = *offset_ptr;
+
+ if (debug_info.isValidOffset(*offset_ptr)) {
+ uint64_t abbrOffset;
+ const DWARFDebugAbbrev *abbr = Context.getDebugAbbrev();
+ Length = debug_info.getU32(offset_ptr);
+ Version = debug_info.getU16(offset_ptr);
+ abbrOffset = debug_info.getU32(offset_ptr);
+ AddrSize = debug_info.getU8(offset_ptr);
+
+ bool lengthOK = debug_info.isValidOffset(getNextCompileUnitOffset()-1);
+ bool versionOK = DWARFContext::isSupportedVersion(Version);
+ bool abbrOffsetOK = Context.getAbbrevSection().size() > abbrOffset;
+ bool addrSizeOK = AddrSize == 4 || AddrSize == 8;
+
+ if (lengthOK && versionOK && addrSizeOK && abbrOffsetOK && abbr != NULL) {
+ Abbrevs = abbr->getAbbreviationDeclarationSet(abbrOffset);
+ return true;
+ }
+
+ // reset the offset to where we tried to parse from if anything went wrong
+ *offset_ptr = Offset;
+ }
+
+ return false;
+}
+
+uint32_t
+DWARFCompileUnit::extract(uint32_t offset, DataExtractor debug_info_data,
+ const DWARFAbbreviationDeclarationSet *abbrevs) {
+ clear();
+
+ Offset = offset;
+
+ if (debug_info_data.isValidOffset(offset)) {
+ Length = debug_info_data.getU32(&offset);
+ Version = debug_info_data.getU16(&offset);
+ bool abbrevsOK = debug_info_data.getU32(&offset) == abbrevs->getOffset();
+ Abbrevs = abbrevs;
+ AddrSize = debug_info_data.getU8 (&offset);
+
+ bool versionOK = DWARFContext::isSupportedVersion(Version);
+ bool addrSizeOK = AddrSize == 4 || AddrSize == 8;
+
+ if (versionOK && addrSizeOK && abbrevsOK &&
+ debug_info_data.isValidOffset(offset))
+ return offset;
+ }
+ return 0;
+}
+
+void DWARFCompileUnit::clear() {
+ Offset = 0;
+ Length = 0;
+ Version = 0;
+ Abbrevs = 0;
+ AddrSize = 0;
+ BaseAddr = 0;
+ DieArray.clear();
+}
+
+void DWARFCompileUnit::dump(raw_ostream &OS) {
+ OS << format("0x%08x", Offset) << ": Compile Unit:"
+ << " length = " << format("0x%08x", Length)
+ << " version = " << format("0x%04x", Version)
+ << " abbr_offset = " << format("0x%04x", Abbrevs->getOffset())
+ << " addr_size = " << format("0x%02x", AddrSize)
+ << " (next CU at " << format("0x%08x", getNextCompileUnitOffset())
+ << ")\n";
+
+ getCompileUnitDIE(false)->dump(OS, this, -1U);
+}
+
+void DWARFCompileUnit::setDIERelations() {
+ if (DieArray.empty())
+ return;
+ DWARFDebugInfoEntryMinimal *die_array_begin = &DieArray.front();
+ DWARFDebugInfoEntryMinimal *die_array_end = &DieArray.back();
+ DWARFDebugInfoEntryMinimal *curr_die;
+ // We purposely are skipping the last element in the array in the loop below
+ // so that we can always have a valid next item
+ for (curr_die = die_array_begin; curr_die < die_array_end; ++curr_die) {
+ // Since our loop doesn't include the last element, we can always
+ // safely access the next die in the array.
+ DWARFDebugInfoEntryMinimal *next_die = curr_die + 1;
+
+ const DWARFAbbreviationDeclaration *curr_die_abbrev =
+ curr_die->getAbbreviationDeclarationPtr();
+
+ if (curr_die_abbrev) {
+ // Normal DIE
+ if (curr_die_abbrev->hasChildren())
+ next_die->setParent(curr_die);
+ else
+ curr_die->setSibling(next_die);
+ } else {
+ // NULL DIE that terminates a sibling chain
+ DWARFDebugInfoEntryMinimal *parent = curr_die->getParent();
+ if (parent)
+ parent->setSibling(next_die);
+ }
+ }
+
+ // Since we skipped the last element, we need to fix it up!
+ if (die_array_begin < die_array_end)
+ curr_die->setParent(die_array_begin);
+}
+
+size_t DWARFCompileUnit::extractDIEsIfNeeded(bool cu_die_only) {
+ const size_t initial_die_array_size = DieArray.size();
+ if ((cu_die_only && initial_die_array_size > 0) ||
+ initial_die_array_size > 1)
+ return 0; // Already parsed
+
+ // Set the offset to that of the first DIE and calculate the start of the
+ // next compilation unit header.
+ uint32_t offset = getFirstDIEOffset();
+ uint32_t next_cu_offset = getNextCompileUnitOffset();
+
+ DWARFDebugInfoEntryMinimal die;
+ // Keep a flat array of the DIE for binary lookup by DIE offset
+ uint32_t depth = 0;
+ // We are in our compile unit, parse starting at the offset
+ // we were told to parse
+
+ const uint8_t *fixed_form_sizes =
+ DWARFFormValue::getFixedFormSizesForAddressSize(getAddressByteSize());
+
+ while (offset < next_cu_offset &&
+ die.extractFast(this, fixed_form_sizes, &offset)) {
+
+ if (depth == 0) {
+ uint64_t base_addr =
+ die.getAttributeValueAsUnsigned(this, DW_AT_low_pc, -1U);
+ if (base_addr == -1U)
+ base_addr = die.getAttributeValueAsUnsigned(this, DW_AT_entry_pc, 0);
+ setBaseAddress(base_addr);
+ }
+
+ if (cu_die_only) {
+ addDIE(die);
+ return 1;
+ }
+ else if (depth == 0 && initial_die_array_size == 1) {
+ // Don't append the CU die as we already did that
+ } else {
+ addDIE (die);
+ }
+
+ const DWARFAbbreviationDeclaration *abbrDecl =
+ die.getAbbreviationDeclarationPtr();
+ if (abbrDecl) {
+ // Normal DIE
+ if (abbrDecl->hasChildren())
+ ++depth;
+ } else {
+ // NULL DIE.
+ if (depth > 0)
+ --depth;
+ if (depth == 0)
+ break; // We are done with this compile unit!
+ }
+
+ }
+
+ // Give a little bit of info if we encounter corrupt DWARF (our offset
+ // should always terminate at or before the start of the next compilation
+ // unit header).
+ if (offset > next_cu_offset) {
+ fprintf (stderr, "warning: DWARF compile unit extends beyond its bounds cu 0x%8.8x at 0x%8.8x'\n", getOffset(), offset);
+ }
+
+ setDIERelations();
+ return DieArray.size();
+}
+
+void DWARFCompileUnit::clearDIEs(bool keep_compile_unit_die) {
+ if (DieArray.size() > 1) {
+ // std::vectors never get any smaller when resized to a smaller size,
+ // or when clear() or erase() are called, the size will report that it
+ // is smaller, but the memory allocated remains intact (call capacity()
+ // to see this). So we need to create a temporary vector and swap the
+ // contents which will cause just the internal pointers to be swapped
+ // so that when "tmp_array" goes out of scope, it will destroy the
+ // contents.
+
+ // Save at least the compile unit DIE
+ std::vector<DWARFDebugInfoEntryMinimal> tmpArray;
+ DieArray.swap(tmpArray);
+ if (keep_compile_unit_die)
+ DieArray.push_back(tmpArray.front());
+ }
+}
+
+void
+DWARFCompileUnit::buildAddressRangeTable(DWARFDebugAranges *debug_aranges,
+ bool clear_dies_if_already_not_parsed){
+ // This function is usually called if there in no .debug_aranges section
+ // in order to produce a compile unit level set of address ranges that
+ // is accurate. If the DIEs weren't parsed, then we don't want all dies for
+ // all compile units to stay loaded when they weren't needed. So we can end
+ // up parsing the DWARF and then throwing them all away to keep memory usage
+ // down.
+ const bool clear_dies = extractDIEsIfNeeded(false) > 1;
+
+ DieArray[0].buildAddressRangeTable(this, debug_aranges);
+
+ // Keep memory down by clearing DIEs if this generate function
+ // caused them to be parsed.
+ if (clear_dies)
+ clearDIEs(true);
+}
diff --git a/lib/DebugInfo/DWARFCompileUnit.h b/lib/DebugInfo/DWARFCompileUnit.h
new file mode 100644
index 0000000..d916729
--- /dev/null
+++ b/lib/DebugInfo/DWARFCompileUnit.h
@@ -0,0 +1,111 @@
+//===-- DWARFCompileUnit.h --------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARFCOMPILEUNIT_H
+#define LLVM_DEBUGINFO_DWARFCOMPILEUNIT_H
+
+#include "DWARFDebugAbbrev.h"
+#include "DWARFDebugInfoEntry.h"
+#include <vector>
+
+namespace llvm {
+
+class DWARFContext;
+class raw_ostream;
+
+class DWARFCompileUnit {
+ DWARFContext &Context;
+
+ uint32_t Offset;
+ uint32_t Length;
+ uint16_t Version;
+ const DWARFAbbreviationDeclarationSet *Abbrevs;
+ uint8_t AddrSize;
+ uint64_t BaseAddr;
+ // The compile unit debug information entry item.
+ std::vector<DWARFDebugInfoEntryMinimal> DieArray;
+public:
+ DWARFCompileUnit(DWARFContext &context) : Context(context) {
+ clear();
+ }
+
+ DWARFContext &getContext() const { return Context; }
+ DataExtractor getDebugInfoExtractor() const;
+
+ bool extract(DataExtractor debug_info, uint32_t* offset_ptr);
+ uint32_t extract(uint32_t offset, DataExtractor debug_info_data,
+ const DWARFAbbreviationDeclarationSet *abbrevs);
+
+ /// extractDIEsIfNeeded - Parses a compile unit and indexes its DIEs if it
+ /// hasn't already been done.
+ size_t extractDIEsIfNeeded(bool cu_die_only);
+ void clear();
+ void dump(raw_ostream &OS);
+ uint32_t getOffset() const { return Offset; }
+ /// Size in bytes of the compile unit header.
+ uint32_t getSize() const { return 11; }
+ bool containsDIEOffset(uint32_t die_offset) const {
+ return die_offset >= getFirstDIEOffset() &&
+ die_offset < getNextCompileUnitOffset();
+ }
+ uint32_t getFirstDIEOffset() const { return Offset + getSize(); }
+ uint32_t getNextCompileUnitOffset() const { return Offset + Length + 4; }
+ /// Size in bytes of the .debug_info data associated with this compile unit.
+ size_t getDebugInfoSize() const { return Length + 4 - getSize(); }
+ uint32_t getLength() const { return Length; }
+ uint16_t getVersion() const { return Version; }
+ const DWARFAbbreviationDeclarationSet *getAbbreviations() const {
+ return Abbrevs;
+ }
+ uint8_t getAddressByteSize() const { return AddrSize; }
+ uint64_t getBaseAddress() const { return BaseAddr; }
+
+ void setBaseAddress(uint64_t base_addr) {
+ BaseAddr = base_addr;
+ }
+
+ const DWARFDebugInfoEntryMinimal *
+ getCompileUnitDIE(bool extract_cu_die_only = true) {
+ extractDIEsIfNeeded(extract_cu_die_only);
+ if (DieArray.empty())
+ return NULL;
+ return &DieArray[0];
+ }
+
+ /// setDIERelations - We read in all of the DIE entries into our flat list
+ /// of DIE entries and now we need to go back through all of them and set the
+ /// parent, sibling and child pointers for quick DIE navigation.
+ void setDIERelations();
+
+ void addDIE(DWARFDebugInfoEntryMinimal &die) {
+ // The average bytes per DIE entry has been seen to be
+ // around 14-20 so lets pre-reserve the needed memory for
+ // our DIE entries accordingly. Search forward for "Compute
+ // average bytes per DIE" to see #if'ed out code that does
+ // that determination.
+
+ // Only reserve the memory if we are adding children of
+ // the main compile unit DIE. The compile unit DIE is always
+ // the first entry, so if our size is 1, then we are adding
+ // the first compile unit child DIE and should reserve
+ // the memory.
+ if (DieArray.empty())
+ DieArray.reserve(getDebugInfoSize() / 14);
+ DieArray.push_back(die);
+ }
+
+ void clearDIEs(bool keep_compile_unit_die);
+
+ void buildAddressRangeTable(DWARFDebugAranges *debug_aranges,
+ bool clear_dies_if_already_not_parsed);
+};
+
+}
+
+#endif
diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp
new file mode 100644
index 0000000..e1ac398
--- /dev/null
+++ b/lib/DebugInfo/DWARFContext.cpp
@@ -0,0 +1,167 @@
+//===-- DWARFContext.cpp --------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFContext.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+using namespace dwarf;
+
+void DWARFContext::dump(raw_ostream &OS) {
+ OS << ".debug_abbrev contents:\n";
+ getDebugAbbrev()->dump(OS);
+
+ OS << "\n.debug_info contents:\n";
+ for (unsigned i = 0, e = getNumCompileUnits(); i != e; ++i)
+ getCompileUnitAtIndex(i)->dump(OS);
+
+ OS << "\n.debug_aranges contents:\n";
+ DataExtractor arangesData(getARangeSection(), isLittleEndian(), 0);
+ uint32_t offset = 0;
+ DWARFDebugArangeSet set;
+ while (set.extract(arangesData, &offset))
+ set.dump(OS);
+
+ OS << "\n.debug_lines contents:\n";
+ for (unsigned i = 0, e = getNumCompileUnits(); i != e; ++i) {
+ DWARFCompileUnit *cu = getCompileUnitAtIndex(i);
+ unsigned stmtOffset =
+ cu->getCompileUnitDIE()->getAttributeValueAsUnsigned(cu, DW_AT_stmt_list,
+ -1U);
+ if (stmtOffset != -1U) {
+ DataExtractor lineData(getLineSection(), isLittleEndian(),
+ cu->getAddressByteSize());
+ DWARFDebugLine::DumpingState state(OS);
+ DWARFDebugLine::parseStatementTable(lineData, &stmtOffset, state);
+ }
+ }
+
+ OS << "\n.debug_str contents:\n";
+ DataExtractor strData(getStringSection(), isLittleEndian(), 0);
+ offset = 0;
+ uint32_t lastOffset = 0;
+ while (const char *s = strData.getCStr(&offset)) {
+ OS << format("0x%8.8x: \"%s\"\n", lastOffset, s);
+ lastOffset = offset;
+ }
+}
+
+const DWARFDebugAbbrev *DWARFContext::getDebugAbbrev() {
+ if (Abbrev)
+ return Abbrev.get();
+
+ DataExtractor abbrData(getAbbrevSection(), isLittleEndian(), 0);
+
+ Abbrev.reset(new DWARFDebugAbbrev());
+ Abbrev->parse(abbrData);
+ return Abbrev.get();
+}
+
+const DWARFDebugAranges *DWARFContext::getDebugAranges() {
+ if (Aranges)
+ return Aranges.get();
+
+ DataExtractor arangesData(getARangeSection(), isLittleEndian(), 0);
+
+ Aranges.reset(new DWARFDebugAranges());
+ Aranges->extract(arangesData);
+ if (Aranges->isEmpty()) // No aranges in file, generate them from the DIEs.
+ Aranges->generate(this);
+ return Aranges.get();
+}
+
+const DWARFDebugLine::LineTable *
+DWARFContext::getLineTableForCompileUnit(DWARFCompileUnit *cu) {
+ if (!Line)
+ Line.reset(new DWARFDebugLine());
+
+ unsigned stmtOffset =
+ cu->getCompileUnitDIE()->getAttributeValueAsUnsigned(cu, DW_AT_stmt_list,
+ -1U);
+ if (stmtOffset == -1U)
+ return 0; // No line table for this compile unit.
+
+ // See if the line table is cached.
+ if (const DWARFDebugLine::LineTable *lt = Line->getLineTable(stmtOffset))
+ return lt;
+
+ // We have to parse it first.
+ DataExtractor lineData(getLineSection(), isLittleEndian(),
+ cu->getAddressByteSize());
+ return Line->getOrParseLineTable(lineData, stmtOffset);
+}
+
+void DWARFContext::parseCompileUnits() {
+ uint32_t offset = 0;
+ const DataExtractor &debug_info_data = DataExtractor(getInfoSection(),
+ isLittleEndian(), 0);
+ while (debug_info_data.isValidOffset(offset)) {
+ CUs.push_back(DWARFCompileUnit(*this));
+ if (!CUs.back().extract(debug_info_data, &offset)) {
+ CUs.pop_back();
+ break;
+ }
+
+ offset = CUs.back().getNextCompileUnitOffset();
+ }
+}
+
+namespace {
+ struct OffsetComparator {
+ bool operator()(const DWARFCompileUnit &LHS,
+ const DWARFCompileUnit &RHS) const {
+ return LHS.getOffset() < RHS.getOffset();
+ }
+ bool operator()(const DWARFCompileUnit &LHS, uint32_t RHS) const {
+ return LHS.getOffset() < RHS;
+ }
+ bool operator()(uint32_t LHS, const DWARFCompileUnit &RHS) const {
+ return LHS < RHS.getOffset();
+ }
+ };
+}
+
+DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint32_t offset) {
+ if (CUs.empty())
+ parseCompileUnits();
+
+ DWARFCompileUnit *i = std::lower_bound(CUs.begin(), CUs.end(), offset,
+ OffsetComparator());
+ if (i != CUs.end())
+ return &*i;
+ return 0;
+}
+
+DILineInfo DWARFContext::getLineInfoForAddress(uint64_t address) {
+ // First, get the offset of the compile unit.
+ uint32_t cuOffset = getDebugAranges()->findAddress(address);
+ // Retrieve the compile unit.
+ DWARFCompileUnit *cu = getCompileUnitForOffset(cuOffset);
+ if (!cu)
+ return DILineInfo("<invalid>", 0, 0);
+ // Get the line table for this compile unit.
+ const DWARFDebugLine::LineTable *lineTable = getLineTableForCompileUnit(cu);
+ if (!lineTable)
+ return DILineInfo("<invalid>", 0, 0);
+ // Get the index of the row we're looking for in the line table.
+ uint64_t hiPC =
+ cu->getCompileUnitDIE()->getAttributeValueAsUnsigned(cu, DW_AT_high_pc,
+ -1ULL);
+ uint32_t rowIndex = lineTable->lookupAddress(address, hiPC);
+ if (rowIndex == -1U)
+ return DILineInfo("<invalid>", 0, 0);
+
+ // From here, contruct the DILineInfo.
+ const DWARFDebugLine::Row &row = lineTable->Rows[rowIndex];
+ const std::string &fileName = lineTable->Prologue.FileNames[row.File-1].Name;
+
+ return DILineInfo(fileName.c_str(), row.Line, row.Column);
+}
diff --git a/lib/DebugInfo/DWARFContext.h b/lib/DebugInfo/DWARFContext.h
new file mode 100644
index 0000000..746a463
--- /dev/null
+++ b/lib/DebugInfo/DWARFContext.h
@@ -0,0 +1,118 @@
+//===-- DWARFContext.h ------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===/
+
+#ifndef LLVM_DEBUGINFO_DWARFCONTEXT_H
+#define LLVM_DEBUGINFO_DWARFCONTEXT_H
+
+#include "DWARFCompileUnit.h"
+#include "DWARFDebugAranges.h"
+#include "DWARFDebugLine.h"
+#include "llvm/DebugInfo/DIContext.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+
+/// DWARFContext
+/// This data structure is the top level entity that deals with dwarf debug
+/// information parsing. The actual data is supplied through pure virtual
+/// methods that a concrete implementation provides.
+class DWARFContext : public DIContext {
+ bool IsLittleEndian;
+
+ SmallVector<DWARFCompileUnit, 1> CUs;
+ OwningPtr<DWARFDebugAbbrev> Abbrev;
+ OwningPtr<DWARFDebugAranges> Aranges;
+ OwningPtr<DWARFDebugLine> Line;
+
+ DWARFContext(DWARFContext &); // = delete
+ DWARFContext &operator=(DWARFContext &); // = delete
+
+ /// Read compile units from the debug_info section and store them in CUs.
+ void parseCompileUnits();
+protected:
+ DWARFContext(bool isLittleEndian) : IsLittleEndian(isLittleEndian) {}
+public:
+ virtual void dump(raw_ostream &OS);
+ /// Get the number of compile units in this context.
+ unsigned getNumCompileUnits() {
+ if (CUs.empty())
+ parseCompileUnits();
+ return CUs.size();
+ }
+ /// Get the compile unit at the specified index for this compile unit.
+ DWARFCompileUnit *getCompileUnitAtIndex(unsigned index) {
+ if (CUs.empty())
+ parseCompileUnits();
+ return &CUs[index];
+ }
+
+ /// Return the compile unit that includes an offset (relative to .debug_info).
+ DWARFCompileUnit *getCompileUnitForOffset(uint32_t offset);
+
+ /// Get a pointer to the parsed DebugAbbrev object.
+ const DWARFDebugAbbrev *getDebugAbbrev();
+
+ /// Get a pointer to the parsed DebugAranges object.
+ const DWARFDebugAranges *getDebugAranges();
+
+ /// Get a pointer to a parsed line table corresponding to a compile unit.
+ const DWARFDebugLine::LineTable *
+ getLineTableForCompileUnit(DWARFCompileUnit *cu);
+
+ virtual DILineInfo getLineInfoForAddress(uint64_t address);
+
+ bool isLittleEndian() const { return IsLittleEndian; }
+
+ virtual StringRef getInfoSection() = 0;
+ virtual StringRef getAbbrevSection() = 0;
+ virtual StringRef getARangeSection() = 0;
+ virtual StringRef getLineSection() = 0;
+ virtual StringRef getStringSection() = 0;
+
+ static bool isSupportedVersion(unsigned version) {
+ return version == 2 || version == 3;
+ }
+};
+
+
+/// DWARFContextInMemory is the simplest possible implementation of a
+/// DWARFContext. It assumes all content is available in memory and stores
+/// pointers to it.
+class DWARFContextInMemory : public DWARFContext {
+ StringRef InfoSection;
+ StringRef AbbrevSection;
+ StringRef ARangeSection;
+ StringRef LineSection;
+ StringRef StringSection;
+public:
+ DWARFContextInMemory(bool isLittleEndian,
+ StringRef infoSection,
+ StringRef abbrevSection,
+ StringRef aRangeSection,
+ StringRef lineSection,
+ StringRef stringSection)
+ : DWARFContext(isLittleEndian),
+ InfoSection(infoSection),
+ AbbrevSection(abbrevSection),
+ ARangeSection(aRangeSection),
+ LineSection(lineSection),
+ StringSection(stringSection)
+ {}
+
+ virtual StringRef getInfoSection() { return InfoSection; }
+ virtual StringRef getAbbrevSection() { return AbbrevSection; }
+ virtual StringRef getARangeSection() { return ARangeSection; }
+ virtual StringRef getLineSection() { return LineSection; }
+ virtual StringRef getStringSection() { return StringSection; }
+};
+
+}
+
+#endif
diff --git a/lib/DebugInfo/DWARFDebugAbbrev.cpp b/lib/DebugInfo/DWARFDebugAbbrev.cpp
new file mode 100644
index 0000000..a11ae3f
--- /dev/null
+++ b/lib/DebugInfo/DWARFDebugAbbrev.cpp
@@ -0,0 +1,106 @@
+//===-- DWARFDebugAbbrev.cpp ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFDebugAbbrev.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+bool DWARFAbbreviationDeclarationSet::extract(DataExtractor data,
+ uint32_t* offset_ptr) {
+ const uint32_t beginOffset = *offset_ptr;
+ Offset = beginOffset;
+ clear();
+ DWARFAbbreviationDeclaration abbrevDeclaration;
+ uint32_t prevAbbrAode = 0;
+ while (abbrevDeclaration.extract(data, offset_ptr)) {
+ Decls.push_back(abbrevDeclaration);
+ if (IdxOffset == 0) {
+ IdxOffset = abbrevDeclaration.getCode();
+ } else {
+ if (prevAbbrAode + 1 != abbrevDeclaration.getCode())
+ IdxOffset = UINT32_MAX;// Out of order indexes, we can't do O(1) lookups
+ }
+ prevAbbrAode = abbrevDeclaration.getCode();
+ }
+ return beginOffset != *offset_ptr;
+}
+
+void DWARFAbbreviationDeclarationSet::dump(raw_ostream &OS) const {
+ for (unsigned i = 0, e = Decls.size(); i != e; ++i)
+ Decls[i].dump(OS);
+}
+
+const DWARFAbbreviationDeclaration*
+DWARFAbbreviationDeclarationSet::getAbbreviationDeclaration(uint32_t abbrCode)
+ const {
+ if (IdxOffset == UINT32_MAX) {
+ DWARFAbbreviationDeclarationCollConstIter pos;
+ DWARFAbbreviationDeclarationCollConstIter end = Decls.end();
+ for (pos = Decls.begin(); pos != end; ++pos) {
+ if (pos->getCode() == abbrCode)
+ return &(*pos);
+ }
+ } else {
+ uint32_t idx = abbrCode - IdxOffset;
+ if (idx < Decls.size())
+ return &Decls[idx];
+ }
+ return NULL;
+}
+
+DWARFDebugAbbrev::DWARFDebugAbbrev() :
+ AbbrevCollMap(),
+ PrevAbbrOffsetPos(AbbrevCollMap.end()) {}
+
+
+void DWARFDebugAbbrev::parse(DataExtractor data) {
+ uint32_t offset = 0;
+
+ while (data.isValidOffset(offset)) {
+ uint32_t initial_cu_offset = offset;
+ DWARFAbbreviationDeclarationSet abbrevDeclSet;
+
+ if (abbrevDeclSet.extract(data, &offset))
+ AbbrevCollMap[initial_cu_offset] = abbrevDeclSet;
+ else
+ break;
+ }
+ PrevAbbrOffsetPos = AbbrevCollMap.end();
+}
+
+void DWARFDebugAbbrev::dump(raw_ostream &OS) const {
+ if (AbbrevCollMap.empty()) {
+ OS << "< EMPTY >\n";
+ return;
+ }
+
+ DWARFAbbreviationDeclarationCollMapConstIter pos;
+ for (pos = AbbrevCollMap.begin(); pos != AbbrevCollMap.end(); ++pos) {
+ OS << format("Abbrev table for offset: 0x%8.8x\n", pos->first);
+ pos->second.dump(OS);
+ }
+}
+
+const DWARFAbbreviationDeclarationSet*
+DWARFDebugAbbrev::getAbbreviationDeclarationSet(uint64_t cu_abbr_offset) const {
+ DWARFAbbreviationDeclarationCollMapConstIter end = AbbrevCollMap.end();
+ DWARFAbbreviationDeclarationCollMapConstIter pos;
+ if (PrevAbbrOffsetPos != end &&
+ PrevAbbrOffsetPos->first == cu_abbr_offset) {
+ return &(PrevAbbrOffsetPos->second);
+ } else {
+ pos = AbbrevCollMap.find(cu_abbr_offset);
+ PrevAbbrOffsetPos = pos;
+ }
+
+ if (pos != AbbrevCollMap.end())
+ return &(pos->second);
+ return NULL;
+}
diff --git a/lib/DebugInfo/DWARFDebugAbbrev.h b/lib/DebugInfo/DWARFDebugAbbrev.h
new file mode 100644
index 0000000..03189b1
--- /dev/null
+++ b/lib/DebugInfo/DWARFDebugAbbrev.h
@@ -0,0 +1,73 @@
+//===-- DWARFDebugAbbrev.h --------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARFDEBUGABBREV_H
+#define LLVM_DEBUGINFO_DWARFDEBUGABBREV_H
+
+#include "DWARFAbbreviationDeclaration.h"
+#include <list>
+#include <map>
+#include <vector>
+
+namespace llvm {
+
+typedef std::vector<DWARFAbbreviationDeclaration>
+ DWARFAbbreviationDeclarationColl;
+typedef DWARFAbbreviationDeclarationColl::iterator
+ DWARFAbbreviationDeclarationCollIter;
+typedef DWARFAbbreviationDeclarationColl::const_iterator
+ DWARFAbbreviationDeclarationCollConstIter;
+
+class DWARFAbbreviationDeclarationSet {
+ uint64_t Offset;
+ uint32_t IdxOffset;
+ std::vector<DWARFAbbreviationDeclaration> Decls;
+ public:
+ DWARFAbbreviationDeclarationSet()
+ : Offset(0), IdxOffset(0) {}
+
+ DWARFAbbreviationDeclarationSet(uint64_t offset, uint32_t idxOffset)
+ : Offset(offset), IdxOffset(idxOffset) {}
+
+ void clear() {
+ IdxOffset = 0;
+ Decls.clear();
+ }
+ uint64_t getOffset() const { return Offset; }
+ void dump(raw_ostream &OS) const;
+ bool extract(DataExtractor data, uint32_t* offset_ptr);
+
+ const DWARFAbbreviationDeclaration *
+ getAbbreviationDeclaration(uint32_t abbrCode) const;
+};
+
+class DWARFDebugAbbrev {
+public:
+ typedef std::map<uint64_t, DWARFAbbreviationDeclarationSet>
+ DWARFAbbreviationDeclarationCollMap;
+ typedef DWARFAbbreviationDeclarationCollMap::iterator
+ DWARFAbbreviationDeclarationCollMapIter;
+ typedef DWARFAbbreviationDeclarationCollMap::const_iterator
+ DWARFAbbreviationDeclarationCollMapConstIter;
+
+private:
+ DWARFAbbreviationDeclarationCollMap AbbrevCollMap;
+ mutable DWARFAbbreviationDeclarationCollMapConstIter PrevAbbrOffsetPos;
+
+public:
+ DWARFDebugAbbrev();
+ const DWARFAbbreviationDeclarationSet *
+ getAbbreviationDeclarationSet(uint64_t cu_abbr_offset) const;
+ void dump(raw_ostream &OS) const;
+ void parse(DataExtractor data);
+};
+
+}
+
+#endif
diff --git a/lib/DebugInfo/DWARFDebugArangeSet.cpp b/lib/DebugInfo/DWARFDebugArangeSet.cpp
new file mode 100644
index 0000000..b0c0354
--- /dev/null
+++ b/lib/DebugInfo/DWARFDebugArangeSet.cpp
@@ -0,0 +1,150 @@
+//===-- DWARFDebugArangeSet.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFDebugArangeSet.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+using namespace llvm;
+
+void DWARFDebugArangeSet::clear() {
+ Offset = -1U;
+ std::memset(&Header, 0, sizeof(Header));
+ ArangeDescriptors.clear();
+}
+
+void DWARFDebugArangeSet::compact() {
+ if (ArangeDescriptors.empty())
+ return;
+
+ // Iterate through all arange descriptors and combine any ranges that
+ // overlap or have matching boundaries. The ArangeDescriptors are assumed
+ // to be in ascending order.
+ uint32_t i = 0;
+ while (i + 1 < ArangeDescriptors.size()) {
+ if (ArangeDescriptors[i].getEndAddress() >= ArangeDescriptors[i+1].Address){
+ // The current range ends at or exceeds the start of the next address
+ // range. Compute the max end address between the two and use that to
+ // make the new length.
+ const uint64_t max_end_addr =
+ std::max(ArangeDescriptors[i].getEndAddress(),
+ ArangeDescriptors[i+1].getEndAddress());
+ ArangeDescriptors[i].Length = max_end_addr - ArangeDescriptors[i].Address;
+ // Now remove the next entry as it was just combined with the previous one
+ ArangeDescriptors.erase(ArangeDescriptors.begin()+i+1);
+ } else {
+ // Discontiguous address range, just proceed to the next one.
+ ++i;
+ }
+ }
+}
+
+bool
+DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) {
+ if (data.isValidOffset(*offset_ptr)) {
+ ArangeDescriptors.clear();
+ Offset = *offset_ptr;
+
+ // 7.20 Address Range Table
+ //
+ // Each set of entries in the table of address ranges contained in
+ // the .debug_aranges section begins with a header consisting of: a
+ // 4-byte length containing the length of the set of entries for this
+ // compilation unit, not including the length field itself; a 2-byte
+ // version identifier containing the value 2 for DWARF Version 2; a
+ // 4-byte offset into the.debug_infosection; a 1-byte unsigned integer
+ // containing the size in bytes of an address (or the offset portion of
+ // an address for segmented addressing) on the target system; and a
+ // 1-byte unsigned integer containing the size in bytes of a segment
+ // descriptor on the target system. This header is followed by a series
+ // of tuples. Each tuple consists of an address and a length, each in
+ // the size appropriate for an address on the target architecture.
+ Header.Length = data.getU32(offset_ptr);
+ Header.Version = data.getU16(offset_ptr);
+ Header.CuOffset = data.getU32(offset_ptr);
+ Header.AddrSize = data.getU8(offset_ptr);
+ Header.SegSize = data.getU8(offset_ptr);
+
+ // Perform basic validation of the header fields.
+ if (!data.isValidOffsetForDataOfSize(Offset, Header.Length) ||
+ (Header.AddrSize != 4 && Header.AddrSize != 8)) {
+ clear();
+ return false;
+ }
+
+ // The first tuple following the header in each set begins at an offset
+ // that is a multiple of the size of a single tuple (that is, twice the
+ // size of an address). The header is padded, if necessary, to the
+ // appropriate boundary.
+ const uint32_t header_size = *offset_ptr - Offset;
+ const uint32_t tuple_size = Header.AddrSize * 2;
+ uint32_t first_tuple_offset = 0;
+ while (first_tuple_offset < header_size)
+ first_tuple_offset += tuple_size;
+
+ *offset_ptr = Offset + first_tuple_offset;
+
+ Descriptor arangeDescriptor;
+
+ assert(sizeof(arangeDescriptor.Address) == sizeof(arangeDescriptor.Length));
+ assert(sizeof(arangeDescriptor.Address) >= Header.AddrSize);
+
+ while (data.isValidOffset(*offset_ptr)) {
+ arangeDescriptor.Address = data.getUnsigned(offset_ptr, Header.AddrSize);
+ arangeDescriptor.Length = data.getUnsigned(offset_ptr, Header.AddrSize);
+
+ // Each set of tuples is terminated by a 0 for the address and 0
+ // for the length.
+ if (arangeDescriptor.Address || arangeDescriptor.Length)
+ ArangeDescriptors.push_back(arangeDescriptor);
+ else
+ break; // We are done if we get a zero address and length
+ }
+
+ return !ArangeDescriptors.empty();
+ }
+ return false;
+}
+
+void DWARFDebugArangeSet::dump(raw_ostream &OS) const {
+ OS << format("Address Range Header: length = 0x%8.8x, version = 0x%4.4x, ",
+ Header.Length, Header.Version)
+ << format("cu_offset = 0x%8.8x, addr_size = 0x%2.2x, seg_size = 0x%2.2x\n",
+ Header.CuOffset, Header.AddrSize, Header.SegSize);
+
+ const uint32_t hex_width = Header.AddrSize * 2;
+ for (DescriptorConstIter pos = ArangeDescriptors.begin(),
+ end = ArangeDescriptors.end(); pos != end; ++pos)
+ OS << format("[0x%*.*llx -", hex_width, hex_width, pos->Address)
+ << format(" 0x%*.*llx)\n", hex_width, hex_width, pos->getEndAddress());
+}
+
+
+namespace {
+ class DescriptorContainsAddress {
+ const uint64_t Address;
+ public:
+ DescriptorContainsAddress(uint64_t address) : Address(address) {}
+ bool operator()(const DWARFDebugArangeSet::Descriptor &desc) const {
+ return Address >= desc.Address && Address < (desc.Address + desc.Length);
+ }
+ };
+}
+
+uint32_t DWARFDebugArangeSet::findAddress(uint64_t address) const {
+ DescriptorConstIter end = ArangeDescriptors.end();
+ DescriptorConstIter pos =
+ std::find_if(ArangeDescriptors.begin(), end, // Range
+ DescriptorContainsAddress(address)); // Predicate
+ if (pos != end)
+ return Header.CuOffset;
+
+ return -1U;
+}
diff --git a/lib/DebugInfo/DWARFDebugArangeSet.h b/lib/DebugInfo/DWARFDebugArangeSet.h
new file mode 100644
index 0000000..9a2a6d0
--- /dev/null
+++ b/lib/DebugInfo/DWARFDebugArangeSet.h
@@ -0,0 +1,75 @@
+//===-- DWARFDebugArangeSet.h -----------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARFDEBUGARANGESET_H
+#define LLVM_DEBUGINFO_DWARFDEBUGARANGESET_H
+
+#include "llvm/Support/DataExtractor.h"
+#include <vector>
+
+namespace llvm {
+
+class raw_ostream;
+
+class DWARFDebugArangeSet {
+public:
+ struct Header {
+ // The total length of the entries for that set, not including the length
+ // field itself.
+ uint32_t Length;
+ // The offset from the beginning of the .debug_info section of the
+ // compilation unit entry referenced by the table.
+ uint32_t CuOffset;
+ // The DWARF version number.
+ uint16_t Version;
+ // The size in bytes of an address on the target architecture. For segmented
+ // addressing, this is the size of the offset portion of the address.
+ uint8_t AddrSize;
+ // The size in bytes of a segment descriptor on the target architecture.
+ // If the target system uses a flat address space, this value is 0.
+ uint8_t SegSize;
+ };
+
+ struct Descriptor {
+ uint64_t Address;
+ uint64_t Length;
+ uint64_t getEndAddress() const { return Address + Length; }
+ };
+
+private:
+ typedef std::vector<Descriptor> DescriptorColl;
+ typedef DescriptorColl::iterator DescriptorIter;
+ typedef DescriptorColl::const_iterator DescriptorConstIter;
+
+ uint32_t Offset;
+ Header Header;
+ DescriptorColl ArangeDescriptors;
+
+public:
+ DWARFDebugArangeSet() { clear(); }
+ void clear();
+ void compact();
+ bool extract(DataExtractor data, uint32_t *offset_ptr);
+ void dump(raw_ostream &OS) const;
+
+ uint32_t getCompileUnitDIEOffset() const { return Header.CuOffset; }
+ uint32_t getOffsetOfNextEntry() const { return Offset + Header.Length + 4; }
+ uint32_t findAddress(uint64_t address) const;
+ uint32_t getNumDescriptors() const { return ArangeDescriptors.size(); }
+ const struct Header &getHeader() const { return Header; }
+ const Descriptor *getDescriptor(uint32_t i) const {
+ if (i < ArangeDescriptors.size())
+ return &ArangeDescriptors[i];
+ return NULL;
+ }
+};
+
+}
+
+#endif
diff --git a/lib/DebugInfo/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARFDebugAranges.cpp
new file mode 100644
index 0000000..576d37d
--- /dev/null
+++ b/lib/DebugInfo/DWARFDebugAranges.cpp
@@ -0,0 +1,223 @@
+//===-- DWARFDebugAranges.cpp -----------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFDebugAranges.h"
+#include "DWARFCompileUnit.h"
+#include "DWARFContext.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+using namespace llvm;
+
+// Compare function DWARFDebugAranges::Range structures
+static bool RangeLessThan(const DWARFDebugAranges::Range &range1,
+ const DWARFDebugAranges::Range &range2) {
+ return range1.LoPC < range2.LoPC;
+}
+
+namespace {
+ class CountArangeDescriptors {
+ public:
+ CountArangeDescriptors(uint32_t &count_ref) : Count(count_ref) {}
+ void operator()(const DWARFDebugArangeSet &set) {
+ Count += set.getNumDescriptors();
+ }
+ uint32_t &Count;
+ };
+
+ class AddArangeDescriptors {
+ public:
+ AddArangeDescriptors(DWARFDebugAranges::RangeColl &ranges)
+ : RangeCollection(ranges) {}
+ void operator()(const DWARFDebugArangeSet& set) {
+ const DWARFDebugArangeSet::Descriptor* arange_desc_ptr;
+ DWARFDebugAranges::Range range;
+ range.Offset = set.getCompileUnitDIEOffset();
+
+ for (uint32_t i=0; (arange_desc_ptr = set.getDescriptor(i)) != NULL; ++i){
+ range.LoPC = arange_desc_ptr->Address;
+ range.Length = arange_desc_ptr->Length;
+
+ // Insert each item in increasing address order so binary searching
+ // can later be done!
+ DWARFDebugAranges::RangeColl::iterator insert_pos =
+ std::lower_bound(RangeCollection.begin(), RangeCollection.end(),
+ range, RangeLessThan);
+ RangeCollection.insert(insert_pos, range);
+ }
+ }
+ DWARFDebugAranges::RangeColl& RangeCollection;
+ };
+}
+
+bool DWARFDebugAranges::extract(DataExtractor debug_aranges_data) {
+ if (debug_aranges_data.isValidOffset(0)) {
+ uint32_t offset = 0;
+
+ typedef std::vector<DWARFDebugArangeSet> SetCollection;
+ typedef SetCollection::const_iterator SetCollectionIter;
+ SetCollection sets;
+
+ DWARFDebugArangeSet set;
+ Range range;
+ while (set.extract(debug_aranges_data, &offset))
+ sets.push_back(set);
+
+ uint32_t count = 0;
+
+ std::for_each(sets.begin(), sets.end(), CountArangeDescriptors(count));
+
+ if (count > 0) {
+ Aranges.reserve(count);
+ AddArangeDescriptors range_adder(Aranges);
+ std::for_each(sets.begin(), sets.end(), range_adder);
+ }
+ }
+ return false;
+}
+
+bool DWARFDebugAranges::generate(DWARFContext *ctx) {
+ clear();
+ if (ctx) {
+ const uint32_t num_compile_units = ctx->getNumCompileUnits();
+ for (uint32_t cu_idx = 0; cu_idx < num_compile_units; ++cu_idx) {
+ DWARFCompileUnit *cu = ctx->getCompileUnitAtIndex(cu_idx);
+ if (cu)
+ cu->buildAddressRangeTable(this, true);
+ }
+ }
+ return !isEmpty();
+}
+
+void DWARFDebugAranges::dump(raw_ostream &OS) const {
+ const uint32_t num_ranges = getNumRanges();
+ for (uint32_t i = 0; i < num_ranges; ++i) {
+ const Range &range = Aranges[i];
+ OS << format("0x%8.8x: [0x%8.8llx - 0x%8.8llx)\n", range.Offset,
+ (uint64_t)range.LoPC, (uint64_t)range.HiPC());
+ }
+}
+
+void DWARFDebugAranges::Range::dump(raw_ostream &OS) const {
+ OS << format("{0x%8.8x}: [0x%8.8llx - 0x%8.8llx)\n", Offset, LoPC, HiPC());
+}
+
+void DWARFDebugAranges::appendRange(uint32_t offset, uint64_t low_pc,
+ uint64_t high_pc) {
+ if (!Aranges.empty()) {
+ if (Aranges.back().Offset == offset && Aranges.back().HiPC() == low_pc) {
+ Aranges.back().setHiPC(high_pc);
+ return;
+ }
+ }
+ Aranges.push_back(Range(low_pc, high_pc, offset));
+}
+
+void DWARFDebugAranges::sort(bool minimize, uint32_t n) {
+ const size_t orig_arange_size = Aranges.size();
+ // Size of one? If so, no sorting is needed
+ if (orig_arange_size <= 1)
+ return;
+ // Sort our address range entries
+ std::stable_sort(Aranges.begin(), Aranges.end(), RangeLessThan);
+
+ if (!minimize)
+ return;
+
+ // Most address ranges are contiguous from function to function
+ // so our new ranges will likely be smaller. We calculate the size
+ // of the new ranges since although std::vector objects can be resized,
+ // the will never reduce their allocated block size and free any excesss
+ // memory, so we might as well start a brand new collection so it is as
+ // small as possible.
+
+ // First calculate the size of the new minimal arange vector
+ // so we don't have to do a bunch of re-allocations as we
+ // copy the new minimal stuff over to the new collection.
+ size_t minimal_size = 1;
+ for (size_t i = 1; i < orig_arange_size; ++i) {
+ if (!Range::SortedOverlapCheck(Aranges[i-1], Aranges[i], n))
+ ++minimal_size;
+ }
+
+ // If the sizes are the same, then no consecutive aranges can be
+ // combined, we are done.
+ if (minimal_size == orig_arange_size)
+ return;
+
+ // Else, make a new RangeColl that _only_ contains what we need.
+ RangeColl minimal_aranges;
+ minimal_aranges.resize(minimal_size);
+ uint32_t j = 0;
+ minimal_aranges[j] = Aranges[0];
+ for (size_t i = 1; i < orig_arange_size; ++i) {
+ if(Range::SortedOverlapCheck (minimal_aranges[j], Aranges[i], n)) {
+ minimal_aranges[j].setHiPC (Aranges[i].HiPC());
+ } else {
+ // Only increment j if we aren't merging
+ minimal_aranges[++j] = Aranges[i];
+ }
+ }
+ assert (j+1 == minimal_size);
+
+ // Now swap our new minimal aranges into place. The local
+ // minimal_aranges will then contian the old big collection
+ // which will get freed.
+ minimal_aranges.swap(Aranges);
+}
+
+uint32_t DWARFDebugAranges::findAddress(uint64_t address) const {
+ if (!Aranges.empty()) {
+ Range range(address);
+ RangeCollIterator begin = Aranges.begin();
+ RangeCollIterator end = Aranges.end();
+ RangeCollIterator pos = lower_bound(begin, end, range, RangeLessThan);
+
+ if (pos != end && pos->LoPC <= address && address < pos->HiPC()) {
+ return pos->Offset;
+ } else if (pos != begin) {
+ --pos;
+ if (pos->LoPC <= address && address < pos->HiPC())
+ return (*pos).Offset;
+ }
+ }
+ return -1U;
+}
+
+bool
+DWARFDebugAranges::allRangesAreContiguous(uint64_t &LoPC, uint64_t &HiPC) const{
+ if (Aranges.empty())
+ return false;
+
+ uint64_t next_addr = 0;
+ RangeCollIterator begin = Aranges.begin();
+ for (RangeCollIterator pos = begin, end = Aranges.end(); pos != end;
+ ++pos) {
+ if (pos != begin && pos->LoPC != next_addr)
+ return false;
+ next_addr = pos->HiPC();
+ }
+ // We checked for empty at the start of function so front() will be valid.
+ LoPC = Aranges.front().LoPC;
+ // We checked for empty at the start of function so back() will be valid.
+ HiPC = Aranges.back().HiPC();
+ return true;
+}
+
+bool DWARFDebugAranges::getMaxRange(uint64_t &LoPC, uint64_t &HiPC) const {
+ if (Aranges.empty())
+ return false;
+ // We checked for empty at the start of function so front() will be valid.
+ LoPC = Aranges.front().LoPC;
+ // We checked for empty at the start of function so back() will be valid.
+ HiPC = Aranges.back().HiPC();
+ return true;
+}
+
diff --git a/lib/DebugInfo/DWARFDebugAranges.h b/lib/DebugInfo/DWARFDebugAranges.h
new file mode 100644
index 0000000..12afb60
--- /dev/null
+++ b/lib/DebugInfo/DWARFDebugAranges.h
@@ -0,0 +1,98 @@
+//===-- DWARFDebugAranges.h -------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARFDEBUGARANGES_H
+#define LLVM_DEBUGINFO_DWARFDEBUGARANGES_H
+
+#include "DWARFDebugArangeSet.h"
+#include <list>
+
+namespace llvm {
+
+class DWARFContext;
+
+class DWARFDebugAranges {
+public:
+ struct Range {
+ explicit Range(uint64_t lo = -1ULL, uint64_t hi = -1ULL,
+ uint32_t off = -1U)
+ : LoPC(lo), Length(hi-lo), Offset(off) {}
+
+ void clear() {
+ LoPC = -1ULL;
+ Length = 0;
+ Offset = -1U;
+ }
+
+ void setHiPC(uint64_t HiPC) {
+ if (HiPC == -1ULL || HiPC <= LoPC)
+ Length = 0;
+ else
+ Length = HiPC - LoPC;
+ }
+ uint64_t HiPC() const {
+ if (Length)
+ return LoPC + Length;
+ return -1ULL;
+ }
+ bool isValidRange() const { return Length > 0; }
+
+ static bool SortedOverlapCheck(const Range &curr_range,
+ const Range &next_range, uint32_t n) {
+ if (curr_range.Offset != next_range.Offset)
+ return false;
+ return curr_range.HiPC() + n >= next_range.LoPC;
+ }
+
+ bool contains(const Range &range) const {
+ return LoPC <= range.LoPC && range.HiPC() <= HiPC();
+ }
+
+ void dump(raw_ostream &OS) const;
+ uint64_t LoPC; // Start of address range
+ uint32_t Length; // End of address range (not including this address)
+ uint32_t Offset; // Offset of the compile unit or die
+ };
+
+ void clear() { Aranges.clear(); }
+ bool allRangesAreContiguous(uint64_t& LoPC, uint64_t& HiPC) const;
+ bool getMaxRange(uint64_t& LoPC, uint64_t& HiPC) const;
+ bool extract(DataExtractor debug_aranges_data);
+ bool generate(DWARFContext *ctx);
+
+ // Use append range multiple times and then call sort
+ void appendRange(uint32_t cu_offset, uint64_t low_pc, uint64_t high_pc);
+ void sort(bool minimize, uint32_t n);
+
+ const Range *rangeAtIndex(uint32_t idx) const {
+ if (idx < Aranges.size())
+ return &Aranges[idx];
+ return NULL;
+ }
+ void dump(raw_ostream &OS) const;
+ uint32_t findAddress(uint64_t address) const;
+ bool isEmpty() const { return Aranges.empty(); }
+ uint32_t getNumRanges() const { return Aranges.size(); }
+
+ uint32_t offsetAtIndex(uint32_t idx) const {
+ if (idx < Aranges.size())
+ return Aranges[idx].Offset;
+ return -1U;
+ }
+
+ typedef std::vector<Range> RangeColl;
+ typedef RangeColl::const_iterator RangeCollIterator;
+
+private:
+ RangeColl Aranges;
+};
+
+}
+
+#endif
diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARFDebugInfoEntry.cpp
new file mode 100644
index 0000000..1b089ad
--- /dev/null
+++ b/lib/DebugInfo/DWARFDebugInfoEntry.cpp
@@ -0,0 +1,444 @@
+//===-- DWARFDebugInfoEntry.cpp --------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFDebugInfoEntry.h"
+#include "DWARFCompileUnit.h"
+#include "DWARFContext.h"
+#include "DWARFDebugAbbrev.h"
+#include "DWARFFormValue.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+using namespace dwarf;
+
+void DWARFDebugInfoEntryMinimal::dump(raw_ostream &OS,
+ const DWARFCompileUnit *cu,
+ unsigned recurseDepth,
+ unsigned indent) const {
+ DataExtractor debug_info_data = cu->getDebugInfoExtractor();
+ uint32_t offset = Offset;
+
+ if (debug_info_data.isValidOffset(offset)) {
+ uint64_t abbrCode = debug_info_data.getULEB128(&offset);
+
+ OS << format("\n0x%8.8x: ", Offset);
+ if (abbrCode) {
+ if (AbbrevDecl) {
+ const char *tagString = TagString(getTag());
+ if (tagString)
+ OS.indent(indent) << tagString;
+ else
+ OS.indent(indent) << format("DW_TAG_Unknown_%x", getTag());
+ OS << format(" [%u] %c\n", abbrCode,
+ AbbrevDecl->hasChildren() ? '*' : ' ');
+
+ // Dump all data in the .debug_info for the attributes
+ const uint32_t numAttributes = AbbrevDecl->getNumAttributes();
+ for (uint32_t i = 0; i != numAttributes; ++i) {
+ uint16_t attr = AbbrevDecl->getAttrByIndex(i);
+ uint16_t form = AbbrevDecl->getFormByIndex(i);
+ dumpAttribute(OS, cu, &offset, attr, form, indent);
+ }
+
+ const DWARFDebugInfoEntryMinimal *child = getFirstChild();
+ if (recurseDepth > 0 && child) {
+ while (child) {
+ child->dump(OS, cu, recurseDepth-1, indent+2);
+ child = child->getSibling();
+ }
+ }
+ } else {
+ OS << "Abbreviation code not found in 'debug_abbrev' class for code: "
+ << abbrCode << '\n';
+ }
+ } else {
+ OS.indent(indent) << "NULL\n";
+ }
+ }
+}
+
+void DWARFDebugInfoEntryMinimal::dumpAttribute(raw_ostream &OS,
+ const DWARFCompileUnit *cu,
+ uint32_t* offset_ptr,
+ uint16_t attr,
+ uint16_t form,
+ unsigned indent) const {
+ OS << format("0x%8.8x: ", *offset_ptr);
+ OS.indent(indent+2);
+ const char *attrString = AttributeString(attr);
+ if (attrString)
+ OS << attrString;
+ else
+ OS << format("DW_AT_Unknown_%x", attr);
+ const char *formString = FormEncodingString(form);
+ if (formString)
+ OS << " [" << formString << ']';
+ else
+ OS << format(" [DW_FORM_Unknown_%x]", form);
+
+ DWARFFormValue formValue(form);
+
+ if (!formValue.extractValue(cu->getDebugInfoExtractor(), offset_ptr, cu))
+ return;
+
+ OS << "\t(";
+ formValue.dump(OS, cu);
+ OS << ")\n";
+}
+
+bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu,
+ const uint8_t *fixed_form_sizes,
+ uint32_t *offset_ptr) {
+ Offset = *offset_ptr;
+
+ DataExtractor debug_info_data = cu->getDebugInfoExtractor();
+ uint64_t abbrCode = debug_info_data.getULEB128(offset_ptr);
+
+ assert (fixed_form_sizes); // For best performance this should be specified!
+
+ if (abbrCode) {
+ uint32_t offset = *offset_ptr;
+
+ AbbrevDecl = cu->getAbbreviations()->getAbbreviationDeclaration(abbrCode);
+
+ // Skip all data in the .debug_info for the attributes
+ const uint32_t numAttributes = AbbrevDecl->getNumAttributes();
+ uint32_t i;
+ uint16_t form;
+ for (i=0; i<numAttributes; ++i) {
+ form = AbbrevDecl->getFormByIndex(i);
+
+ const uint8_t fixed_skip_size = fixed_form_sizes[form];
+ if (fixed_skip_size)
+ offset += fixed_skip_size;
+ else {
+ bool form_is_indirect = false;
+ do {
+ form_is_indirect = false;
+ uint32_t form_size = 0;
+ switch (form) {
+ // Blocks if inlined data that have a length field and the data bytes
+ // inlined in the .debug_info.
+ case DW_FORM_block:
+ form_size = debug_info_data.getULEB128(&offset);
+ break;
+ case DW_FORM_block1:
+ form_size = debug_info_data.getU8(&offset);
+ break;
+ case DW_FORM_block2:
+ form_size = debug_info_data.getU16(&offset);
+ break;
+ case DW_FORM_block4:
+ form_size = debug_info_data.getU32(&offset);
+ break;
+
+ // Inlined NULL terminated C-strings
+ case DW_FORM_string:
+ debug_info_data.getCStr(&offset);
+ break;
+
+ // Compile unit address sized values
+ case DW_FORM_addr:
+ case DW_FORM_ref_addr:
+ form_size = cu->getAddressByteSize();
+ break;
+
+ // 1 byte values
+ case DW_FORM_data1:
+ case DW_FORM_flag:
+ case DW_FORM_ref1:
+ form_size = 1;
+ break;
+
+ // 2 byte values
+ case DW_FORM_data2:
+ case DW_FORM_ref2:
+ form_size = 2;
+ break;
+
+ // 4 byte values
+ case DW_FORM_strp:
+ case DW_FORM_data4:
+ case DW_FORM_ref4:
+ form_size = 4;
+ break;
+
+ // 8 byte values
+ case DW_FORM_data8:
+ case DW_FORM_ref8:
+ form_size = 8;
+ break;
+
+ // signed or unsigned LEB 128 values
+ case DW_FORM_sdata:
+ case DW_FORM_udata:
+ case DW_FORM_ref_udata:
+ debug_info_data.getULEB128(&offset);
+ break;
+
+ case DW_FORM_indirect:
+ form_is_indirect = true;
+ form = debug_info_data.getULEB128(&offset);
+ break;
+
+ default:
+ *offset_ptr = Offset;
+ return false;
+ }
+ offset += form_size;
+
+ } while (form_is_indirect);
+ }
+ }
+ *offset_ptr = offset;
+ return true;
+ } else {
+ AbbrevDecl = NULL;
+ return true; // NULL debug tag entry
+ }
+
+ return false;
+}
+
+bool
+DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *cu,
+ uint32_t *offset_ptr) {
+ DataExtractor debug_info_data = cu->getDebugInfoExtractor();
+ const uint32_t cu_end_offset = cu->getNextCompileUnitOffset();
+ const uint8_t cu_addr_size = cu->getAddressByteSize();
+ uint32_t offset = *offset_ptr;
+ if ((offset < cu_end_offset) && debug_info_data.isValidOffset(offset)) {
+ Offset = offset;
+
+ uint64_t abbrCode = debug_info_data.getULEB128(&offset);
+
+ if (abbrCode) {
+ AbbrevDecl = cu->getAbbreviations()->getAbbreviationDeclaration(abbrCode);
+
+ if (AbbrevDecl) {
+ uint16_t tag = AbbrevDecl->getTag();
+
+ bool isCompileUnitTag = tag == DW_TAG_compile_unit;
+ if(cu && isCompileUnitTag)
+ const_cast<DWARFCompileUnit*>(cu)->setBaseAddress(0);
+
+ // Skip all data in the .debug_info for the attributes
+ const uint32_t numAttributes = AbbrevDecl->getNumAttributes();
+ for (uint32_t i = 0; i != numAttributes; ++i) {
+ uint16_t attr = AbbrevDecl->getAttrByIndex(i);
+ uint16_t form = AbbrevDecl->getFormByIndex(i);
+
+ if (isCompileUnitTag &&
+ ((attr == DW_AT_entry_pc) || (attr == DW_AT_low_pc))) {
+ DWARFFormValue form_value(form);
+ if (form_value.extractValue(debug_info_data, &offset, cu)) {
+ if (attr == DW_AT_low_pc || attr == DW_AT_entry_pc)
+ const_cast<DWARFCompileUnit*>(cu)
+ ->setBaseAddress(form_value.getUnsigned());
+ }
+ } else {
+ bool form_is_indirect = false;
+ do {
+ form_is_indirect = false;
+ register uint32_t form_size = 0;
+ switch (form) {
+ // Blocks if inlined data that have a length field and the data
+ // bytes // inlined in the .debug_info
+ case DW_FORM_block:
+ form_size = debug_info_data.getULEB128(&offset);
+ break;
+ case DW_FORM_block1:
+ form_size = debug_info_data.getU8(&offset);
+ break;
+ case DW_FORM_block2:
+ form_size = debug_info_data.getU16(&offset);
+ break;
+ case DW_FORM_block4:
+ form_size = debug_info_data.getU32(&offset);
+ break;
+
+ // Inlined NULL terminated C-strings
+ case DW_FORM_string:
+ debug_info_data.getCStr(&offset);
+ break;
+
+ // Compile unit address sized values
+ case DW_FORM_addr:
+ case DW_FORM_ref_addr:
+ form_size = cu_addr_size;
+ break;
+
+ // 1 byte values
+ case DW_FORM_data1:
+ case DW_FORM_flag:
+ case DW_FORM_ref1:
+ form_size = 1;
+ break;
+
+ // 2 byte values
+ case DW_FORM_data2:
+ case DW_FORM_ref2:
+ form_size = 2;
+ break;
+
+ // 4 byte values
+ case DW_FORM_strp:
+ form_size = 4;
+ break;
+
+ case DW_FORM_data4:
+ case DW_FORM_ref4:
+ form_size = 4;
+ break;
+
+ // 8 byte values
+ case DW_FORM_data8:
+ case DW_FORM_ref8:
+ form_size = 8;
+ break;
+
+ // signed or unsigned LEB 128 values
+ case DW_FORM_sdata:
+ case DW_FORM_udata:
+ case DW_FORM_ref_udata:
+ debug_info_data.getULEB128(&offset);
+ break;
+
+ case DW_FORM_indirect:
+ form = debug_info_data.getULEB128(&offset);
+ form_is_indirect = true;
+ break;
+
+ default:
+ *offset_ptr = offset;
+ return false;
+ }
+
+ offset += form_size;
+ } while (form_is_indirect);
+ }
+ }
+ *offset_ptr = offset;
+ return true;
+ }
+ } else {
+ AbbrevDecl = NULL;
+ *offset_ptr = offset;
+ return true; // NULL debug tag entry
+ }
+ }
+
+ return false;
+}
+
+uint32_t
+DWARFDebugInfoEntryMinimal::getAttributeValue(const DWARFCompileUnit *cu,
+ const uint16_t attr,
+ DWARFFormValue &form_value,
+ uint32_t *end_attr_offset_ptr)
+ const {
+ if (AbbrevDecl) {
+ uint32_t attr_idx = AbbrevDecl->findAttributeIndex(attr);
+
+ if (attr_idx != -1U) {
+ uint32_t offset = getOffset();
+
+ DataExtractor debug_info_data = cu->getDebugInfoExtractor();
+
+ // Skip the abbreviation code so we are at the data for the attributes
+ debug_info_data.getULEB128(&offset);
+
+ uint32_t idx = 0;
+ while (idx < attr_idx)
+ DWARFFormValue::skipValue(AbbrevDecl->getFormByIndex(idx++),
+ debug_info_data, &offset, cu);
+
+ const uint32_t attr_offset = offset;
+ form_value = DWARFFormValue(AbbrevDecl->getFormByIndex(idx));
+ if (form_value.extractValue(debug_info_data, &offset, cu)) {
+ if (end_attr_offset_ptr)
+ *end_attr_offset_ptr = offset;
+ return attr_offset;
+ }
+ }
+ }
+
+ return 0;
+}
+
+const char*
+DWARFDebugInfoEntryMinimal::getAttributeValueAsString(
+ const DWARFCompileUnit* cu,
+ const uint16_t attr,
+ const char* fail_value) const {
+ DWARFFormValue form_value;
+ if (getAttributeValue(cu, attr, form_value)) {
+ DataExtractor stringExtractor(cu->getContext().getStringSection(),
+ false, 0);
+ return form_value.getAsCString(&stringExtractor);
+ }
+ return fail_value;
+}
+
+uint64_t
+DWARFDebugInfoEntryMinimal::getAttributeValueAsUnsigned(
+ const DWARFCompileUnit* cu,
+ const uint16_t attr,
+ uint64_t fail_value) const {
+ DWARFFormValue form_value;
+ if (getAttributeValue(cu, attr, form_value))
+ return form_value.getUnsigned();
+ return fail_value;
+}
+
+int64_t
+DWARFDebugInfoEntryMinimal::getAttributeValueAsSigned(
+ const DWARFCompileUnit* cu,
+ const uint16_t attr,
+ int64_t fail_value) const {
+ DWARFFormValue form_value;
+ if (getAttributeValue(cu, attr, form_value))
+ return form_value.getSigned();
+ return fail_value;
+}
+
+uint64_t
+DWARFDebugInfoEntryMinimal::getAttributeValueAsReference(
+ const DWARFCompileUnit* cu,
+ const uint16_t attr,
+ uint64_t fail_value) const {
+ DWARFFormValue form_value;
+ if (getAttributeValue(cu, attr, form_value))
+ return form_value.getReference(cu);
+ return fail_value;
+}
+
+void
+DWARFDebugInfoEntryMinimal::buildAddressRangeTable(const DWARFCompileUnit *cu,
+ DWARFDebugAranges *debug_aranges)
+ const {
+ if (AbbrevDecl) {
+ uint16_t tag = AbbrevDecl->getTag();
+ if (tag == DW_TAG_subprogram) {
+ uint64_t hi_pc = -1ULL;
+ uint64_t lo_pc = getAttributeValueAsUnsigned(cu, DW_AT_low_pc, -1ULL);
+ if (lo_pc != -1ULL)
+ hi_pc = getAttributeValueAsUnsigned(cu, DW_AT_high_pc, -1ULL);
+ if (hi_pc != -1ULL)
+ debug_aranges->appendRange(cu->getOffset(), lo_pc, hi_pc);
+ }
+
+ const DWARFDebugInfoEntryMinimal *child = getFirstChild();
+ while (child) {
+ child->buildAddressRangeTable(cu, debug_aranges);
+ child = child->getSibling();
+ }
+ }
+}
diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.h b/lib/DebugInfo/DWARFDebugInfoEntry.h
new file mode 100644
index 0000000..aff2e85
--- /dev/null
+++ b/lib/DebugInfo/DWARFDebugInfoEntry.h
@@ -0,0 +1,135 @@
+//===-- DWARFDebugInfoEntry.h -----------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARFDEBUGINFOENTRY_H
+#define LLVM_DEBUGINFO_DWARFDEBUGINFOENTRY_H
+
+#include "DWARFAbbreviationDeclaration.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class DWARFDebugAranges;
+class DWARFCompileUnit;
+class DWARFContext;
+class DWARFFormValue;
+
+/// DWARFDebugInfoEntryMinimal - A DIE with only the minimum required data.
+class DWARFDebugInfoEntryMinimal {
+ /// Offset within the .debug_info of the start of this entry.
+ uint64_t Offset;
+
+ /// How many to subtract from "this" to get the parent.
+ /// If zero this die has no parent.
+ uint32_t ParentIdx;
+
+ /// How many to add to "this" to get the sibling.
+ uint32_t SiblingIdx;
+
+ const DWARFAbbreviationDeclaration *AbbrevDecl;
+public:
+ DWARFDebugInfoEntryMinimal()
+ : Offset(0), ParentIdx(0), SiblingIdx(0), AbbrevDecl(0) {}
+
+ void dump(raw_ostream &OS, const DWARFCompileUnit *cu,
+ unsigned recurseDepth, unsigned indent = 0) const;
+ void dumpAttribute(raw_ostream &OS, const DWARFCompileUnit *cu,
+ uint32_t *offset_ptr, uint16_t attr, uint16_t form,
+ unsigned indent = 0) const;
+
+ bool extractFast(const DWARFCompileUnit *cu, const uint8_t *fixed_form_sizes,
+ uint32_t *offset_ptr);
+
+ /// Extract a debug info entry for a given compile unit from the
+ /// .debug_info and .debug_abbrev data starting at the given offset.
+ bool extract(const DWARFCompileUnit *cu, uint32_t *offset_ptr);
+
+ uint32_t getTag() const { return AbbrevDecl ? AbbrevDecl->getTag() : 0; }
+ bool isNULL() const { return AbbrevDecl == 0; }
+ uint64_t getOffset() const { return Offset; }
+ uint32_t getNumAttributes() const {
+ return !isNULL() ? AbbrevDecl->getNumAttributes() : 0;
+ }
+ bool hasChildren() const { return !isNULL() && AbbrevDecl->hasChildren(); }
+
+ // We know we are kept in a vector of contiguous entries, so we know
+ // our parent will be some index behind "this".
+ DWARFDebugInfoEntryMinimal *getParent() {
+ return ParentIdx > 0 ? this - ParentIdx : 0;
+ }
+ const DWARFDebugInfoEntryMinimal *getParent() const {
+ return ParentIdx > 0 ? this - ParentIdx : 0;
+ }
+ // We know we are kept in a vector of contiguous entries, so we know
+ // our sibling will be some index after "this".
+ DWARFDebugInfoEntryMinimal *getSibling() {
+ return SiblingIdx > 0 ? this + SiblingIdx : 0;
+ }
+ const DWARFDebugInfoEntryMinimal *getSibling() const {
+ return SiblingIdx > 0 ? this + SiblingIdx : 0;
+ }
+ // We know we are kept in a vector of contiguous entries, so we know
+ // we don't need to store our child pointer, if we have a child it will
+ // be the next entry in the list...
+ DWARFDebugInfoEntryMinimal *getFirstChild() {
+ return hasChildren() ? this + 1 : 0;
+ }
+ const DWARFDebugInfoEntryMinimal *getFirstChild() const {
+ return hasChildren() ? this + 1 : 0;
+ }
+
+ void setParent(DWARFDebugInfoEntryMinimal *parent) {
+ if (parent) {
+ // We know we are kept in a vector of contiguous entries, so we know
+ // our parent will be some index behind "this".
+ ParentIdx = this - parent;
+ } else
+ ParentIdx = 0;
+ }
+ void setSibling(DWARFDebugInfoEntryMinimal *sibling) {
+ if (sibling) {
+ // We know we are kept in a vector of contiguous entries, so we know
+ // our sibling will be some index after "this".
+ SiblingIdx = sibling - this;
+ sibling->setParent(getParent());
+ } else
+ SiblingIdx = 0;
+ }
+
+ const DWARFAbbreviationDeclaration *getAbbreviationDeclarationPtr() const {
+ return AbbrevDecl;
+ }
+
+ uint32_t getAttributeValue(const DWARFCompileUnit *cu,
+ const uint16_t attr, DWARFFormValue &formValue,
+ uint32_t *end_attr_offset_ptr = 0) const;
+
+ const char* getAttributeValueAsString(const DWARFCompileUnit* cu,
+ const uint16_t attr,
+ const char *fail_value) const;
+
+ uint64_t getAttributeValueAsUnsigned(const DWARFCompileUnit *cu,
+ const uint16_t attr,
+ uint64_t fail_value) const;
+
+ uint64_t getAttributeValueAsReference(const DWARFCompileUnit *cu,
+ const uint16_t attr,
+ uint64_t fail_value) const;
+
+ int64_t getAttributeValueAsSigned(const DWARFCompileUnit* cu,
+ const uint16_t attr,
+ int64_t fail_value) const;
+
+ void buildAddressRangeTable(const DWARFCompileUnit *cu,
+ DWARFDebugAranges *debug_aranges) const;
+};
+
+}
+
+#endif
diff --git a/lib/DebugInfo/DWARFDebugLine.cpp b/lib/DebugInfo/DWARFDebugLine.cpp
new file mode 100644
index 0000000..fe1ef78
--- /dev/null
+++ b/lib/DebugInfo/DWARFDebugLine.cpp
@@ -0,0 +1,475 @@
+//===-- DWARFDebugLine.cpp ------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFDebugLine.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+using namespace dwarf;
+
+void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const {
+ OS << "Line table prologue:\n"
+ << format(" total_length: 0x%8.8x\n", TotalLength)
+ << format(" version: %u\n", Version)
+ << format("prologue_length: 0x%8.8x\n", PrologueLength)
+ << format("min_inst_length: %u\n", MinInstLength)
+ << format("default_is_stmt: %u\n", DefaultIsStmt)
+ << format(" line_base: %i\n", LineBase)
+ << format(" line_range: %u\n", LineRange)
+ << format(" opcode_base: %u\n", OpcodeBase);
+
+ for (uint32_t i = 0; i < StandardOpcodeLengths.size(); ++i)
+ OS << format("standard_opcode_lengths[%s] = %u\n", LNStandardString(i+1),
+ StandardOpcodeLengths[i]);
+
+ if (!IncludeDirectories.empty())
+ for (uint32_t i = 0; i < IncludeDirectories.size(); ++i)
+ OS << format("include_directories[%3u] = '", i+1)
+ << IncludeDirectories[i] << "'\n";
+
+ if (!FileNames.empty()) {
+ OS << " Dir Mod Time File Len File Name\n"
+ << " ---- ---------- ---------- -----------"
+ "----------------\n";
+ for (uint32_t i = 0; i < FileNames.size(); ++i) {
+ const FileNameEntry& fileEntry = FileNames[i];
+ OS << format("file_names[%3u] %4u ", i+1, fileEntry.DirIdx)
+ << format("0x%8.8x 0x%8.8x ", fileEntry.ModTime, fileEntry.Length)
+ << fileEntry.Name << '\n';
+ }
+ }
+}
+
+void DWARFDebugLine::Row::postAppend() {
+ BasicBlock = false;
+ PrologueEnd = false;
+ EpilogueBegin = false;
+}
+
+void DWARFDebugLine::Row::reset(bool default_is_stmt) {
+ Address = 0;
+ Line = 1;
+ Column = 0;
+ File = 1;
+ Isa = 0;
+ IsStmt = default_is_stmt;
+ BasicBlock = false;
+ EndSequence = false;
+ PrologueEnd = false;
+ EpilogueBegin = false;
+}
+
+void DWARFDebugLine::Row::dump(raw_ostream &OS) const {
+ OS << format("0x%16.16llx %6u %6u", Address, Line, Column)
+ << format(" %6u %3u ", File, Isa)
+ << (IsStmt ? " is_stmt" : "")
+ << (BasicBlock ? " basic_block" : "")
+ << (PrologueEnd ? " prologue_end" : "")
+ << (EpilogueBegin ? " epilogue_begin" : "")
+ << (EndSequence ? " end_sequence" : "")
+ << '\n';
+}
+
+void DWARFDebugLine::LineTable::dump(raw_ostream &OS) const {
+ Prologue.dump(OS);
+ OS << '\n';
+
+ if (!Rows.empty()) {
+ OS << "Address Line Column File ISA Flags\n"
+ << "------------------ ------ ------ ------ --- -------------\n";
+ for (std::vector<Row>::const_iterator pos = Rows.begin(),
+ end = Rows.end(); pos != end; ++pos)
+ pos->dump(OS);
+ }
+}
+
+DWARFDebugLine::State::~State() {}
+
+void DWARFDebugLine::State::appendRowToMatrix(uint32_t offset) {
+ ++row; // Increase the row number.
+ LineTable::appendRow(*this);
+ Row::postAppend();
+}
+
+DWARFDebugLine::DumpingState::~DumpingState() {}
+
+void DWARFDebugLine::DumpingState::finalize(uint32_t offset) {
+ LineTable::dump(OS);
+}
+
+const DWARFDebugLine::LineTable *
+DWARFDebugLine::getLineTable(uint32_t offset) const {
+ LineTableConstIter pos = LineTableMap.find(offset);
+ if (pos != LineTableMap.end())
+ return &pos->second;
+ return 0;
+}
+
+const DWARFDebugLine::LineTable *
+DWARFDebugLine::getOrParseLineTable(DataExtractor debug_line_data,
+ uint32_t offset) {
+ std::pair<LineTableIter, bool> pos =
+ LineTableMap.insert(LineTableMapTy::value_type(offset, LineTable()));
+ if (pos.second) {
+ // Parse and cache the line table for at this offset.
+ State state;
+ if (!parseStatementTable(debug_line_data, &offset, state))
+ return 0;
+ pos.first->second = state;
+ }
+ return &pos.first->second;
+}
+
+bool
+DWARFDebugLine::parsePrologue(DataExtractor debug_line_data,
+ uint32_t *offset_ptr, Prologue *prologue) {
+ const uint32_t prologue_offset = *offset_ptr;
+
+ prologue->clear();
+ prologue->TotalLength = debug_line_data.getU32(offset_ptr);
+ prologue->Version = debug_line_data.getU16(offset_ptr);
+ if (prologue->Version != 2)
+ return false;
+
+ prologue->PrologueLength = debug_line_data.getU32(offset_ptr);
+ const uint32_t end_prologue_offset = prologue->PrologueLength + *offset_ptr;
+ prologue->MinInstLength = debug_line_data.getU8(offset_ptr);
+ prologue->DefaultIsStmt = debug_line_data.getU8(offset_ptr);
+ prologue->LineBase = debug_line_data.getU8(offset_ptr);
+ prologue->LineRange = debug_line_data.getU8(offset_ptr);
+ prologue->OpcodeBase = debug_line_data.getU8(offset_ptr);
+
+ prologue->StandardOpcodeLengths.reserve(prologue->OpcodeBase-1);
+ for (uint32_t i = 1; i < prologue->OpcodeBase; ++i) {
+ uint8_t op_len = debug_line_data.getU8(offset_ptr);
+ prologue->StandardOpcodeLengths.push_back(op_len);
+ }
+
+ while (*offset_ptr < end_prologue_offset) {
+ const char *s = debug_line_data.getCStr(offset_ptr);
+ if (s && s[0])
+ prologue->IncludeDirectories.push_back(s);
+ else
+ break;
+ }
+
+ while (*offset_ptr < end_prologue_offset) {
+ const char *name = debug_line_data.getCStr(offset_ptr);
+ if (name && name[0]) {
+ FileNameEntry fileEntry;
+ fileEntry.Name = name;
+ fileEntry.DirIdx = debug_line_data.getULEB128(offset_ptr);
+ fileEntry.ModTime = debug_line_data.getULEB128(offset_ptr);
+ fileEntry.Length = debug_line_data.getULEB128(offset_ptr);
+ prologue->FileNames.push_back(fileEntry);
+ } else {
+ break;
+ }
+ }
+
+ if (*offset_ptr != end_prologue_offset) {
+ fprintf(stderr, "warning: parsing line table prologue at 0x%8.8x should"
+ " have ended at 0x%8.8x but it ended ad 0x%8.8x\n",
+ prologue_offset, end_prologue_offset, *offset_ptr);
+ }
+ return end_prologue_offset;
+}
+
+bool
+DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data,
+ uint32_t *offset_ptr, State &state) {
+ const uint32_t debug_line_offset = *offset_ptr;
+
+ Prologue *prologue = &state.Prologue;
+
+ if (!parsePrologue(debug_line_data, offset_ptr, prologue)) {
+ // Restore our offset and return false to indicate failure!
+ *offset_ptr = debug_line_offset;
+ return false;
+ }
+
+ const uint32_t end_offset = debug_line_offset + prologue->TotalLength +
+ sizeof(prologue->TotalLength);
+
+ state.reset();
+
+ while (*offset_ptr < end_offset) {
+ uint8_t opcode = debug_line_data.getU8(offset_ptr);
+
+ if (opcode == 0) {
+ // Extended Opcodes always start with a zero opcode followed by
+ // a uleb128 length so you can skip ones you don't know about
+ uint32_t ext_offset = *offset_ptr;
+ uint64_t len = debug_line_data.getULEB128(offset_ptr);
+ uint32_t arg_size = len - (*offset_ptr - ext_offset);
+
+ uint8_t sub_opcode = debug_line_data.getU8(offset_ptr);
+ switch (sub_opcode) {
+ case DW_LNE_end_sequence:
+ // Set the end_sequence register of the state machine to true and
+ // append a row to the matrix using the current values of the
+ // state-machine registers. Then reset the registers to the initial
+ // values specified above. Every statement program sequence must end
+ // with a DW_LNE_end_sequence instruction which creates a row whose
+ // address is that of the byte after the last target machine instruction
+ // of the sequence.
+ state.EndSequence = true;
+ state.appendRowToMatrix(*offset_ptr);
+ state.reset();
+ break;
+
+ case DW_LNE_set_address:
+ // Takes a single relocatable address as an operand. The size of the
+ // operand is the size appropriate to hold an address on the target
+ // machine. Set the address register to the value given by the
+ // relocatable address. All of the other statement program opcodes
+ // that affect the address register add a delta to it. This instruction
+ // stores a relocatable value into it instead.
+ state.Address = debug_line_data.getAddress(offset_ptr);
+ break;
+
+ case DW_LNE_define_file:
+ // Takes 4 arguments. The first is a null terminated string containing
+ // a source file name. The second is an unsigned LEB128 number
+ // representing the directory index of the directory in which the file
+ // was found. The third is an unsigned LEB128 number representing the
+ // time of last modification of the file. The fourth is an unsigned
+ // LEB128 number representing the length in bytes of the file. The time
+ // and length fields may contain LEB128(0) if the information is not
+ // available.
+ //
+ // The directory index represents an entry in the include_directories
+ // section of the statement program prologue. The index is LEB128(0)
+ // if the file was found in the current directory of the compilation,
+ // LEB128(1) if it was found in the first directory in the
+ // include_directories section, and so on. The directory index is
+ // ignored for file names that represent full path names.
+ //
+ // The files are numbered, starting at 1, in the order in which they
+ // appear; the names in the prologue come before names defined by
+ // the DW_LNE_define_file instruction. These numbers are used in the
+ // the file register of the state machine.
+ {
+ FileNameEntry fileEntry;
+ fileEntry.Name = debug_line_data.getCStr(offset_ptr);
+ fileEntry.DirIdx = debug_line_data.getULEB128(offset_ptr);
+ fileEntry.ModTime = debug_line_data.getULEB128(offset_ptr);
+ fileEntry.Length = debug_line_data.getULEB128(offset_ptr);
+ prologue->FileNames.push_back(fileEntry);
+ }
+ break;
+
+ default:
+ // Length doesn't include the zero opcode byte or the length itself, but
+ // it does include the sub_opcode, so we have to adjust for that below
+ (*offset_ptr) += arg_size;
+ break;
+ }
+ } else if (opcode < prologue->OpcodeBase) {
+ switch (opcode) {
+ // Standard Opcodes
+ case DW_LNS_copy:
+ // Takes no arguments. Append a row to the matrix using the
+ // current values of the state-machine registers. Then set
+ // the basic_block register to false.
+ state.appendRowToMatrix(*offset_ptr);
+ break;
+
+ case DW_LNS_advance_pc:
+ // Takes a single unsigned LEB128 operand, multiplies it by the
+ // min_inst_length field of the prologue, and adds the
+ // result to the address register of the state machine.
+ state.Address += debug_line_data.getULEB128(offset_ptr) *
+ prologue->MinInstLength;
+ break;
+
+ case DW_LNS_advance_line:
+ // Takes a single signed LEB128 operand and adds that value to
+ // the line register of the state machine.
+ state.Line += debug_line_data.getSLEB128(offset_ptr);
+ break;
+
+ case DW_LNS_set_file:
+ // Takes a single unsigned LEB128 operand and stores it in the file
+ // register of the state machine.
+ state.File = debug_line_data.getULEB128(offset_ptr);
+ break;
+
+ case DW_LNS_set_column:
+ // Takes a single unsigned LEB128 operand and stores it in the
+ // column register of the state machine.
+ state.Column = debug_line_data.getULEB128(offset_ptr);
+ break;
+
+ case DW_LNS_negate_stmt:
+ // Takes no arguments. Set the is_stmt register of the state
+ // machine to the logical negation of its current value.
+ state.IsStmt = !state.IsStmt;
+ break;
+
+ case DW_LNS_set_basic_block:
+ // Takes no arguments. Set the basic_block register of the
+ // state machine to true
+ state.BasicBlock = true;
+ break;
+
+ case DW_LNS_const_add_pc:
+ // Takes no arguments. Add to the address register of the state
+ // machine the address increment value corresponding to special
+ // opcode 255. The motivation for DW_LNS_const_add_pc is this:
+ // when the statement program needs to advance the address by a
+ // small amount, it can use a single special opcode, which occupies
+ // a single byte. When it needs to advance the address by up to
+ // twice the range of the last special opcode, it can use
+ // DW_LNS_const_add_pc followed by a special opcode, for a total
+ // of two bytes. Only if it needs to advance the address by more
+ // than twice that range will it need to use both DW_LNS_advance_pc
+ // and a special opcode, requiring three or more bytes.
+ {
+ uint8_t adjust_opcode = 255 - prologue->OpcodeBase;
+ uint64_t addr_offset = (adjust_opcode / prologue->LineRange) *
+ prologue->MinInstLength;
+ state.Address += addr_offset;
+ }
+ break;
+
+ case DW_LNS_fixed_advance_pc:
+ // Takes a single uhalf operand. Add to the address register of
+ // the state machine the value of the (unencoded) operand. This
+ // is the only extended opcode that takes an argument that is not
+ // a variable length number. The motivation for DW_LNS_fixed_advance_pc
+ // is this: existing assemblers cannot emit DW_LNS_advance_pc or
+ // special opcodes because they cannot encode LEB128 numbers or
+ // judge when the computation of a special opcode overflows and
+ // requires the use of DW_LNS_advance_pc. Such assemblers, however,
+ // can use DW_LNS_fixed_advance_pc instead, sacrificing compression.
+ state.Address += debug_line_data.getU16(offset_ptr);
+ break;
+
+ case DW_LNS_set_prologue_end:
+ // Takes no arguments. Set the prologue_end register of the
+ // state machine to true
+ state.PrologueEnd = true;
+ break;
+
+ case DW_LNS_set_epilogue_begin:
+ // Takes no arguments. Set the basic_block register of the
+ // state machine to true
+ state.EpilogueBegin = true;
+ break;
+
+ case DW_LNS_set_isa:
+ // Takes a single unsigned LEB128 operand and stores it in the
+ // column register of the state machine.
+ state.Isa = debug_line_data.getULEB128(offset_ptr);
+ break;
+
+ default:
+ // Handle any unknown standard opcodes here. We know the lengths
+ // of such opcodes because they are specified in the prologue
+ // as a multiple of LEB128 operands for each opcode.
+ {
+ assert(opcode - 1U < prologue->StandardOpcodeLengths.size());
+ uint8_t opcode_length = prologue->StandardOpcodeLengths[opcode - 1];
+ for (uint8_t i=0; i<opcode_length; ++i)
+ debug_line_data.getULEB128(offset_ptr);
+ }
+ break;
+ }
+ } else {
+ // Special Opcodes
+
+ // A special opcode value is chosen based on the amount that needs
+ // to be added to the line and address registers. The maximum line
+ // increment for a special opcode is the value of the line_base
+ // field in the header, plus the value of the line_range field,
+ // minus 1 (line base + line range - 1). If the desired line
+ // increment is greater than the maximum line increment, a standard
+ // opcode must be used instead of a special opcode. The "address
+ // advance" is calculated by dividing the desired address increment
+ // by the minimum_instruction_length field from the header. The
+ // special opcode is then calculated using the following formula:
+ //
+ // opcode = (desired line increment - line_base) +
+ // (line_range * address advance) + opcode_base
+ //
+ // If the resulting opcode is greater than 255, a standard opcode
+ // must be used instead.
+ //
+ // To decode a special opcode, subtract the opcode_base from the
+ // opcode itself to give the adjusted opcode. The amount to
+ // increment the address register is the result of the adjusted
+ // opcode divided by the line_range multiplied by the
+ // minimum_instruction_length field from the header. That is:
+ //
+ // address increment = (adjusted opcode / line_range) *
+ // minimum_instruction_length
+ //
+ // The amount to increment the line register is the line_base plus
+ // the result of the adjusted opcode modulo the line_range. That is:
+ //
+ // line increment = line_base + (adjusted opcode % line_range)
+
+ uint8_t adjust_opcode = opcode - prologue->OpcodeBase;
+ uint64_t addr_offset = (adjust_opcode / prologue->LineRange) *
+ prologue->MinInstLength;
+ int32_t line_offset = prologue->LineBase +
+ (adjust_opcode % prologue->LineRange);
+ state.Line += line_offset;
+ state.Address += addr_offset;
+ state.appendRowToMatrix(*offset_ptr);
+ }
+ }
+
+ state.finalize(*offset_ptr);
+
+ return end_offset;
+}
+
+static bool findMatchingAddress(const DWARFDebugLine::Row& row1,
+ const DWARFDebugLine::Row& row2) {
+ return row1.Address < row2.Address;
+}
+
+uint32_t
+DWARFDebugLine::LineTable::lookupAddress(uint64_t address,
+ uint64_t cu_high_pc) const {
+ uint32_t index = UINT32_MAX;
+ if (!Rows.empty()) {
+ // Use the lower_bound algorithm to perform a binary search since we know
+ // that our line table data is ordered by address.
+ DWARFDebugLine::Row row;
+ row.Address = address;
+ typedef std::vector<Row>::const_iterator iterator;
+ iterator begin_pos = Rows.begin();
+ iterator end_pos = Rows.end();
+ iterator pos = std::lower_bound(begin_pos, end_pos, row,
+ findMatchingAddress);
+ if (pos == end_pos) {
+ if (address < cu_high_pc)
+ return Rows.size()-1;
+ } else {
+ // Rely on fact that we are using a std::vector and we can do
+ // pointer arithmetic to find the row index (which will be one less
+ // that what we found since it will find the first position after
+ // the current address) since std::vector iterators are just
+ // pointers to the container type.
+ index = pos - begin_pos;
+ if (pos->Address > address) {
+ if (index > 0)
+ --index;
+ else
+ index = UINT32_MAX;
+ }
+ }
+ }
+ return index; // Failed to find address.
+}
diff --git a/lib/DebugInfo/DWARFDebugLine.h b/lib/DebugInfo/DWARFDebugLine.h
new file mode 100644
index 0000000..bc6a70b
--- /dev/null
+++ b/lib/DebugInfo/DWARFDebugLine.h
@@ -0,0 +1,190 @@
+//===-- DWARFDebugLine.h ----------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARFDEBUGLINE_H
+#define LLVM_DEBUGINFO_DWARFDEBUGLINE_H
+
+#include "llvm/Support/DataExtractor.h"
+#include <map>
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+class raw_ostream;
+
+class DWARFDebugLine {
+public:
+ struct FileNameEntry {
+ FileNameEntry() : DirIdx(0), ModTime(0), Length(0) {}
+
+ std::string Name;
+ uint64_t DirIdx;
+ uint64_t ModTime;
+ uint64_t Length;
+ };
+
+ struct Prologue {
+ Prologue()
+ : TotalLength(0), Version(0), PrologueLength(0), MinInstLength(0),
+ DefaultIsStmt(0), LineBase(0), LineRange(0), OpcodeBase(0) {}
+
+ // The size in bytes of the statement information for this compilation unit
+ // (not including the total_length field itself).
+ uint32_t TotalLength;
+ // Version identifier for the statement information format.
+ uint16_t Version;
+ // The number of bytes following the prologue_length field to the beginning
+ // of the first byte of the statement program itself.
+ uint32_t PrologueLength;
+ // The size in bytes of the smallest target machine instruction. Statement
+ // program opcodes that alter the address register first multiply their
+ // operands by this value.
+ uint8_t MinInstLength;
+ // The initial value of theis_stmtregister.
+ uint8_t DefaultIsStmt;
+ // This parameter affects the meaning of the special opcodes. See below.
+ int8_t LineBase;
+ // This parameter affects the meaning of the special opcodes. See below.
+ uint8_t LineRange;
+ // The number assigned to the first special opcode.
+ uint8_t OpcodeBase;
+ std::vector<uint8_t> StandardOpcodeLengths;
+ std::vector<std::string> IncludeDirectories;
+ std::vector<FileNameEntry> FileNames;
+
+ // Length of the prologue in bytes.
+ uint32_t getLength() const {
+ return PrologueLength + sizeof(TotalLength) + sizeof(Version) +
+ sizeof(PrologueLength);
+ }
+ // Length of the line table data in bytes (not including the prologue).
+ uint32_t getStatementTableLength() const {
+ return TotalLength + sizeof(TotalLength) - getLength();
+ }
+ int32_t getMaxLineIncrementForSpecialOpcode() const {
+ return LineBase + (int8_t)LineRange - 1;
+ }
+ void dump(raw_ostream &OS) const;
+ void clear() {
+ TotalLength = Version = PrologueLength = 0;
+ MinInstLength = LineBase = LineRange = OpcodeBase = 0;
+ StandardOpcodeLengths.clear();
+ IncludeDirectories.clear();
+ FileNames.clear();
+ }
+ };
+
+ // Standard .debug_line state machine structure.
+ struct Row {
+ Row(bool default_is_stmt = false) { reset(default_is_stmt); }
+ /// Called after a row is appended to the matrix.
+ void postAppend();
+ void reset(bool default_is_stmt);
+ void dump(raw_ostream &OS) const;
+
+ // The program-counter value corresponding to a machine instruction
+ // generated by the compiler.
+ uint64_t Address;
+ // An unsigned integer indicating a source line number. Lines are numbered
+ // beginning at 1. The compiler may emit the value 0 in cases where an
+ // instruction cannot be attributed to any source line.
+ uint32_t Line;
+ // An unsigned integer indicating a column number within a source line.
+ // Columns are numbered beginning at 1. The value 0 is reserved to indicate
+ // that a statement begins at the 'left edge' of the line.
+ uint16_t Column;
+ // An unsigned integer indicating the identity of the source file
+ // corresponding to a machine instruction.
+ uint16_t File;
+ // An unsigned integer whose value encodes the applicable instruction set
+ // architecture for the current instruction.
+ uint8_t Isa;
+ // A boolean indicating that the current instruction is the beginning of a
+ // statement.
+ uint8_t IsStmt:1,
+ // A boolean indicating that the current instruction is the
+ // beginning of a basic block.
+ BasicBlock:1,
+ // A boolean indicating that the current address is that of the
+ // first byte after the end of a sequence of target machine
+ // instructions.
+ EndSequence:1,
+ // A boolean indicating that the current address is one (of possibly
+ // many) where execution should be suspended for an entry breakpoint
+ // of a function.
+ PrologueEnd:1,
+ // A boolean indicating that the current address is one (of possibly
+ // many) where execution should be suspended for an exit breakpoint
+ // of a function.
+ EpilogueBegin:1;
+ };
+
+ struct LineTable {
+ void appendRow(const DWARFDebugLine::Row &state) { Rows.push_back(state); }
+ void clear() {
+ Prologue.clear();
+ Rows.clear();
+ }
+
+ uint32_t lookupAddress(uint64_t address, uint64_t cu_high_pc) const;
+ void dump(raw_ostream &OS) const;
+
+ struct Prologue Prologue;
+ std::vector<Row> Rows;
+ };
+
+ struct State : public Row, public LineTable {
+ // Special row codes.
+ enum {
+ StartParsingLineTable = 0,
+ DoneParsingLineTable = -1
+ };
+
+ State() : row(StartParsingLineTable) {}
+ virtual ~State();
+
+ virtual void appendRowToMatrix(uint32_t offset);
+ virtual void finalize(uint32_t offset) { row = DoneParsingLineTable; }
+ virtual void reset() { Row::reset(Prologue.DefaultIsStmt); }
+
+ // The row number that starts at zero for the prologue, and increases for
+ // each row added to the matrix.
+ unsigned row;
+ };
+
+ struct DumpingState : public State {
+ DumpingState(raw_ostream &OS) : OS(OS) {}
+ virtual ~DumpingState();
+ virtual void finalize(uint32_t offset);
+ private:
+ raw_ostream &OS;
+ };
+
+ static bool parsePrologue(DataExtractor debug_line_data, uint32_t *offset_ptr,
+ Prologue *prologue);
+ /// Parse a single line table (prologue and all rows).
+ static bool parseStatementTable(DataExtractor debug_line_data,
+ uint32_t *offset_ptr, State &state);
+
+ const LineTable *getLineTable(uint32_t offset) const;
+ const LineTable *getOrParseLineTable(DataExtractor debug_line_data,
+ uint32_t offset);
+
+private:
+ typedef std::map<uint32_t, LineTable> LineTableMapTy;
+ typedef LineTableMapTy::iterator LineTableIter;
+ typedef LineTableMapTy::const_iterator LineTableConstIter;
+
+ LineTableMapTy LineTableMap;
+};
+
+}
+
+#endif
diff --git a/lib/DebugInfo/DWARFFormValue.cpp b/lib/DebugInfo/DWARFFormValue.cpp
new file mode 100644
index 0000000..705efe5
--- /dev/null
+++ b/lib/DebugInfo/DWARFFormValue.cpp
@@ -0,0 +1,427 @@
+//===-- DWARFFormValue.cpp ------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFFormValue.h"
+#include "DWARFCompileUnit.h"
+#include "DWARFContext.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+using namespace llvm;
+using namespace dwarf;
+
+static const uint8_t form_sizes_addr4[] = {
+ 0, // 0x00 unused
+ 4, // 0x01 DW_FORM_addr
+ 0, // 0x02 unused
+ 0, // 0x03 DW_FORM_block2
+ 0, // 0x04 DW_FORM_block4
+ 2, // 0x05 DW_FORM_data2
+ 4, // 0x06 DW_FORM_data4
+ 8, // 0x07 DW_FORM_data8
+ 0, // 0x08 DW_FORM_string
+ 0, // 0x09 DW_FORM_block
+ 0, // 0x0a DW_FORM_block1
+ 1, // 0x0b DW_FORM_data1
+ 1, // 0x0c DW_FORM_flag
+ 0, // 0x0d DW_FORM_sdata
+ 4, // 0x0e DW_FORM_strp
+ 0, // 0x0f DW_FORM_udata
+ 4, // 0x10 DW_FORM_ref_addr
+ 1, // 0x11 DW_FORM_ref1
+ 2, // 0x12 DW_FORM_ref2
+ 4, // 0x13 DW_FORM_ref4
+ 8, // 0x14 DW_FORM_ref8
+ 0, // 0x15 DW_FORM_ref_udata
+ 0, // 0x16 DW_FORM_indirect
+};
+
+static const uint8_t form_sizes_addr8[] = {
+ 0, // 0x00 unused
+ 8, // 0x01 DW_FORM_addr
+ 0, // 0x02 unused
+ 0, // 0x03 DW_FORM_block2
+ 0, // 0x04 DW_FORM_block4
+ 2, // 0x05 DW_FORM_data2
+ 4, // 0x06 DW_FORM_data4
+ 8, // 0x07 DW_FORM_data8
+ 0, // 0x08 DW_FORM_string
+ 0, // 0x09 DW_FORM_block
+ 0, // 0x0a DW_FORM_block1
+ 1, // 0x0b DW_FORM_data1
+ 1, // 0x0c DW_FORM_flag
+ 0, // 0x0d DW_FORM_sdata
+ 4, // 0x0e DW_FORM_strp
+ 0, // 0x0f DW_FORM_udata
+ 8, // 0x10 DW_FORM_ref_addr
+ 1, // 0x11 DW_FORM_ref1
+ 2, // 0x12 DW_FORM_ref2
+ 4, // 0x13 DW_FORM_ref4
+ 8, // 0x14 DW_FORM_ref8
+ 0, // 0x15 DW_FORM_ref_udata
+ 0, // 0x16 DW_FORM_indirect
+};
+
+const uint8_t *
+DWARFFormValue::getFixedFormSizesForAddressSize(uint8_t addr_size) {
+ switch (addr_size) {
+ case 4: return form_sizes_addr4;
+ case 8: return form_sizes_addr8;
+ }
+ return NULL;
+}
+
+bool
+DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
+ const DWARFCompileUnit *cu) {
+ bool indirect = false;
+ bool is_block = false;
+ Value.data = NULL;
+ // Read the value for the form into value and follow and DW_FORM_indirect
+ // instances we run into
+ do {
+ indirect = false;
+ switch (Form) {
+ case DW_FORM_addr:
+ case DW_FORM_ref_addr:
+ Value.uval = data.getUnsigned(offset_ptr, cu->getAddressByteSize());
+ break;
+ case DW_FORM_block:
+ Value.uval = data.getULEB128(offset_ptr);
+ is_block = true;
+ break;
+ case DW_FORM_block1:
+ Value.uval = data.getU8(offset_ptr);
+ is_block = true;
+ break;
+ case DW_FORM_block2:
+ Value.uval = data.getU16(offset_ptr);
+ is_block = true;
+ break;
+ case DW_FORM_block4:
+ Value.uval = data.getU32(offset_ptr);
+ is_block = true;
+ break;
+ case DW_FORM_data1:
+ case DW_FORM_ref1:
+ case DW_FORM_flag:
+ Value.uval = data.getU8(offset_ptr);
+ break;
+ case DW_FORM_data2:
+ case DW_FORM_ref2:
+ Value.uval = data.getU16(offset_ptr);
+ break;
+ case DW_FORM_data4:
+ case DW_FORM_ref4:
+ Value.uval = data.getU32(offset_ptr);
+ break;
+ case DW_FORM_data8:
+ case DW_FORM_ref8:
+ Value.uval = data.getU64(offset_ptr);
+ break;
+ case DW_FORM_sdata:
+ Value.sval = data.getSLEB128(offset_ptr);
+ break;
+ case DW_FORM_strp:
+ Value.uval = data.getU32(offset_ptr);
+ break;
+ case DW_FORM_udata:
+ case DW_FORM_ref_udata:
+ Value.uval = data.getULEB128(offset_ptr);
+ break;
+ case DW_FORM_string:
+ Value.cstr = data.getCStr(offset_ptr);
+ // Set the string value to also be the data for inlined cstr form
+ // values only so we can tell the differnence between DW_FORM_string
+ // and DW_FORM_strp form values
+ Value.data = (uint8_t*)Value.cstr;
+ break;
+ case DW_FORM_indirect:
+ Form = data.getULEB128(offset_ptr);
+ indirect = true;
+ break;
+ default:
+ return false;
+ }
+ } while (indirect);
+
+ if (is_block) {
+ StringRef str = data.getData().substr(*offset_ptr, Value.uval);
+ Value.data = NULL;
+ if (!str.empty()) {
+ Value.data = reinterpret_cast<const uint8_t *>(str.data());
+ *offset_ptr += Value.uval;
+ }
+ }
+
+ return true;
+}
+
+bool
+DWARFFormValue::skipValue(DataExtractor debug_info_data, uint32_t* offset_ptr,
+ const DWARFCompileUnit *cu) const {
+ return DWARFFormValue::skipValue(Form, debug_info_data, offset_ptr, cu);
+}
+
+bool
+DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data,
+ uint32_t *offset_ptr, const DWARFCompileUnit *cu) {
+ bool indirect = false;
+ do {
+ indirect = false;
+ switch (form) {
+ // Blocks if inlined data that have a length field and the data bytes
+ // inlined in the .debug_info
+ case DW_FORM_block: {
+ uint64_t size = debug_info_data.getULEB128(offset_ptr);
+ *offset_ptr += size;
+ return true;
+ }
+ case DW_FORM_block1: {
+ uint8_t size = debug_info_data.getU8(offset_ptr);
+ *offset_ptr += size;
+ return true;
+ }
+ case DW_FORM_block2: {
+ uint16_t size = debug_info_data.getU16(offset_ptr);
+ *offset_ptr += size;
+ return true;
+ }
+ case DW_FORM_block4: {
+ uint32_t size = debug_info_data.getU32(offset_ptr);
+ *offset_ptr += size;
+ return true;
+ }
+
+ // Inlined NULL terminated C-strings
+ case DW_FORM_string:
+ debug_info_data.getCStr(offset_ptr);
+ return true;
+
+ // Compile unit address sized values
+ case DW_FORM_addr:
+ case DW_FORM_ref_addr:
+ *offset_ptr += cu->getAddressByteSize();
+ return true;
+
+ // 1 byte values
+ case DW_FORM_data1:
+ case DW_FORM_flag:
+ case DW_FORM_ref1:
+ *offset_ptr += 1;
+ return true;
+
+ // 2 byte values
+ case DW_FORM_data2:
+ case DW_FORM_ref2:
+ *offset_ptr += 2;
+ return true;
+
+ // 4 byte values
+ case DW_FORM_strp:
+ case DW_FORM_data4:
+ case DW_FORM_ref4:
+ *offset_ptr += 4;
+ return true;
+
+ // 8 byte values
+ case DW_FORM_data8:
+ case DW_FORM_ref8:
+ *offset_ptr += 8;
+ return true;
+
+ // signed or unsigned LEB 128 values
+ // case DW_FORM_APPLE_db_str:
+ case DW_FORM_sdata:
+ case DW_FORM_udata:
+ case DW_FORM_ref_udata:
+ debug_info_data.getULEB128(offset_ptr);
+ return true;
+
+ case DW_FORM_indirect:
+ indirect = true;
+ form = debug_info_data.getULEB128(offset_ptr);
+ break;
+ default:
+ return false;
+ }
+ } while (indirect);
+ return true;
+}
+
+void
+DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const {
+ DataExtractor debug_str_data(cu->getContext().getStringSection(), true, 0);
+ uint64_t uvalue = getUnsigned();
+ bool cu_relative_offset = false;
+
+ switch (Form) {
+ case DW_FORM_addr: OS << format("0x%016x", uvalue); break;
+ case DW_FORM_flag:
+ case DW_FORM_data1: OS << format("0x%02x", uvalue); break;
+ case DW_FORM_data2: OS << format("0x%04x", uvalue); break;
+ case DW_FORM_data4: OS << format("0x%08x", uvalue); break;
+ case DW_FORM_data8: OS << format("0x%016x", uvalue); break;
+ case DW_FORM_string:
+ OS << '"';
+ OS.write_escaped(getAsCString(NULL));
+ OS << '"';
+ break;
+ case DW_FORM_block:
+ case DW_FORM_block1:
+ case DW_FORM_block2:
+ case DW_FORM_block4:
+ if (uvalue > 0) {
+ switch (Form) {
+ case DW_FORM_block: OS << format("<0x%llx> ", uvalue); break;
+ case DW_FORM_block1: OS << format("<0x%2.2x> ", (uint8_t)uvalue); break;
+ case DW_FORM_block2: OS << format("<0x%4.4x> ", (uint16_t)uvalue); break;
+ case DW_FORM_block4: OS << format("<0x%8.8x> ", (uint32_t)uvalue); break;
+ default: break;
+ }
+
+ const uint8_t* data_ptr = Value.data;
+ if (data_ptr) {
+ // uvalue contains size of block
+ const uint8_t* end_data_ptr = data_ptr + uvalue;
+ while (data_ptr < end_data_ptr) {
+ OS << format("%2.2x ", *data_ptr);
+ ++data_ptr;
+ }
+ }
+ else
+ OS << "NULL";
+ }
+ break;
+
+ case DW_FORM_sdata: OS << getSigned(); break;
+ case DW_FORM_udata: OS << getUnsigned(); break;
+ case DW_FORM_strp: {
+ OS << format(" .debug_str[0x%8.8x] = ", (uint32_t)uvalue);
+ const char* dbg_str = getAsCString(&debug_str_data);
+ if (dbg_str) {
+ OS << '"';
+ OS.write_escaped(dbg_str);
+ OS << '"';
+ }
+ break;
+ }
+ case DW_FORM_ref_addr:
+ OS << format("0x%016x", uvalue);
+ break;
+ case DW_FORM_ref1:
+ cu_relative_offset = true;
+ OS << format("cu + 0x%2.2x", (uint8_t)uvalue);
+ break;
+ case DW_FORM_ref2:
+ cu_relative_offset = true;
+ OS << format("cu + 0x%4.4x", (uint16_t)uvalue);
+ break;
+ case DW_FORM_ref4:
+ cu_relative_offset = true;
+ OS << format("cu + 0x%4.4x", (uint32_t)uvalue);
+ break;
+ case DW_FORM_ref8:
+ cu_relative_offset = true;
+ OS << format("cu + 0x%8.8llx", uvalue);
+ break;
+ case DW_FORM_ref_udata:
+ cu_relative_offset = true;
+ OS << format("cu + 0x%llx", uvalue);
+ break;
+
+ // All DW_FORM_indirect attributes should be resolved prior to calling
+ // this function
+ case DW_FORM_indirect:
+ OS << "DW_FORM_indirect";
+ break;
+ default:
+ OS << format("DW_FORM(0x%4.4x)", Form);
+ break;
+ }
+
+ if (cu_relative_offset)
+ OS << format(" => {0x%8.8x}", (uvalue + (cu ? cu->getOffset() : 0)));
+}
+
+const char*
+DWARFFormValue::getAsCString(const DataExtractor *debug_str_data_ptr) const {
+ if (isInlinedCStr()) {
+ return Value.cstr;
+ } else if (debug_str_data_ptr) {
+ uint32_t offset = Value.uval;
+ return debug_str_data_ptr->getCStr(&offset);
+ }
+ return NULL;
+}
+
+uint64_t DWARFFormValue::getReference(const DWARFCompileUnit *cu) const {
+ uint64_t die_offset = Value.uval;
+ switch (Form) {
+ case DW_FORM_ref1:
+ case DW_FORM_ref2:
+ case DW_FORM_ref4:
+ case DW_FORM_ref8:
+ case DW_FORM_ref_udata:
+ die_offset += (cu ? cu->getOffset() : 0);
+ break;
+ default:
+ break;
+ }
+
+ return die_offset;
+}
+
+bool
+DWARFFormValue::resolveCompileUnitReferences(const DWARFCompileUnit *cu) {
+ switch (Form) {
+ case DW_FORM_ref1:
+ case DW_FORM_ref2:
+ case DW_FORM_ref4:
+ case DW_FORM_ref8:
+ case DW_FORM_ref_udata:
+ Value.uval += cu->getOffset();
+ Form = DW_FORM_ref_addr;
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+const uint8_t *DWARFFormValue::BlockData() const {
+ if (!isInlinedCStr())
+ return Value.data;
+ return NULL;
+}
+
+bool DWARFFormValue::isBlockForm(uint16_t form) {
+ switch (form) {
+ case DW_FORM_block:
+ case DW_FORM_block1:
+ case DW_FORM_block2:
+ case DW_FORM_block4:
+ return true;
+ }
+ return false;
+}
+
+bool DWARFFormValue::isDataForm(uint16_t form) {
+ switch (form) {
+ case DW_FORM_sdata:
+ case DW_FORM_udata:
+ case DW_FORM_data1:
+ case DW_FORM_data2:
+ case DW_FORM_data4:
+ case DW_FORM_data8:
+ return true;
+ }
+ return false;
+}
diff --git a/lib/DebugInfo/DWARFFormValue.h b/lib/DebugInfo/DWARFFormValue.h
new file mode 100644
index 0000000..22ac011
--- /dev/null
+++ b/lib/DebugInfo/DWARFFormValue.h
@@ -0,0 +1,78 @@
+//===-- DWARFFormValue.h ----------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARFFORMVALUE_H
+#define LLVM_DEBUGINFO_DWARFFORMVALUE_H
+
+#include "llvm/Support/DataExtractor.h"
+
+namespace llvm {
+
+class DWARFCompileUnit;
+class raw_ostream;
+
+class DWARFFormValue {
+public:
+ struct ValueType {
+ ValueType() : data(NULL) {
+ uval = 0;
+ }
+
+ union {
+ uint64_t uval;
+ int64_t sval;
+ const char* cstr;
+ };
+ const uint8_t* data;
+ };
+
+ enum {
+ eValueTypeInvalid = 0,
+ eValueTypeUnsigned,
+ eValueTypeSigned,
+ eValueTypeCStr,
+ eValueTypeBlock
+ };
+
+private:
+ uint16_t Form; // Form for this value.
+ ValueType Value; // Contains all data for the form.
+
+public:
+ DWARFFormValue(uint16_t form = 0) : Form(form) {}
+ uint16_t getForm() const { return Form; }
+ const ValueType& value() const { return Value; }
+ void dump(raw_ostream &OS, const DWARFCompileUnit* cu) const;
+ bool extractValue(DataExtractor data, uint32_t *offset_ptr,
+ const DWARFCompileUnit *cu);
+ bool isInlinedCStr() const {
+ return Value.data != NULL && Value.data == (uint8_t*)Value.cstr;
+ }
+ const uint8_t *BlockData() const;
+ uint64_t getReference(const DWARFCompileUnit* cu) const;
+
+ /// Resolve any compile unit specific references so that we don't need
+ /// the compile unit at a later time in order to work with the form
+ /// value.
+ bool resolveCompileUnitReferences(const DWARFCompileUnit* cu);
+ uint64_t getUnsigned() const { return Value.uval; }
+ int64_t getSigned() const { return Value.sval; }
+ const char *getAsCString(const DataExtractor *debug_str_data_ptr) const;
+ bool skipValue(DataExtractor debug_info_data, uint32_t *offset_ptr,
+ const DWARFCompileUnit *cu) const;
+ static bool skipValue(uint16_t form, DataExtractor debug_info_data,
+ uint32_t *offset_ptr, const DWARFCompileUnit *cu);
+ static bool isBlockForm(uint16_t form);
+ static bool isDataForm(uint16_t form);
+ static const uint8_t *getFixedFormSizesForAddressSize(uint8_t addr_size);
+};
+
+}
+
+#endif
diff --git a/lib/DebugInfo/Makefile b/lib/DebugInfo/Makefile
new file mode 100644
index 0000000..1292b57
--- /dev/null
+++ b/lib/DebugInfo/Makefile
@@ -0,0 +1,14 @@
+##===- lib/DebugInfo/Makefile ------------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMDebugInfo
+BUILD_ARCHIVE := 1
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt
index 58caae8..fb14d41 100644
--- a/lib/ExecutionEngine/CMakeLists.txt
+++ b/lib/ExecutionEngine/CMakeLists.txt
@@ -4,6 +4,13 @@ add_llvm_library(LLVMExecutionEngine
TargetSelect.cpp
)
+add_llvm_library_dependencies(LLVMExecutionEngine
+ LLVMCore
+ LLVMMC
+ LLVMSupport
+ LLVMTarget
+ )
+
add_subdirectory(Interpreter)
add_subdirectory(JIT)
add_subdirectory(MCJIT)
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index 7652090..525877b 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -93,7 +93,7 @@ public:
/// \brief Returns the address the GlobalVariable should be written into. The
/// GVMemoryBlock object prefixes that.
static char *Create(const GlobalVariable *GV, const TargetData& TD) {
- const Type *ElTy = GV->getType()->getElementType();
+ Type *ElTy = GV->getType()->getElementType();
size_t GVSize = (size_t)TD.getTypeAllocSize(ElTy);
void *RawMemory = ::operator new(
TargetData::RoundUpAlignment(sizeof(GVMemoryBlock),
@@ -272,7 +272,7 @@ void *ArgvArray::reset(LLVMContext &C, ExecutionEngine *EE,
Array = new char[(InputArgv.size()+1)*PtrSize];
DEBUG(dbgs() << "JIT: ARGV = " << (void*)Array << "\n");
- const Type *SBytePtr = Type::getInt8PtrTy(C);
+ Type *SBytePtr = Type::getInt8PtrTy(C);
for (unsigned i = 0; i != InputArgv.size(); ++i) {
unsigned Size = InputArgv[i].size()+1;
@@ -361,8 +361,8 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn,
// Check main() type
unsigned NumArgs = Fn->getFunctionType()->getNumParams();
- const FunctionType *FTy = Fn->getFunctionType();
- const Type* PPInt8Ty = Type::getInt8PtrTy(Fn->getContext())->getPointerTo();
+ FunctionType *FTy = Fn->getFunctionType();
+ Type* PPInt8Ty = Type::getInt8PtrTy(Fn->getContext())->getPointerTo();
// Check the argument types.
if (NumArgs > 3)
@@ -422,6 +422,7 @@ ExecutionEngine *ExecutionEngine::createJIT(Module *M,
JITMemoryManager *JMM,
CodeGenOpt::Level OptLevel,
bool GVsWithCode,
+ Reloc::Model RM,
CodeModel::Model CMM) {
if (ExecutionEngine::JITCtor == 0) {
if (ErrorStr)
@@ -436,9 +437,8 @@ ExecutionEngine *ExecutionEngine::createJIT(Module *M,
SmallVector<std::string, 1> MAttrs;
TargetMachine *TM =
- EngineBuilder::selectTarget(M, MArch, MCPU, MAttrs, ErrorStr);
+ EngineBuilder::selectTarget(M, MArch, MCPU, MAttrs, RM, CMM, ErrorStr);
if (!TM || (ErrorStr && ErrorStr->length() > 0)) return 0;
- TM->setCodeModel(CMM);
return ExecutionEngine::JITCtor(M, ErrorStr, JMM, OptLevel, GVsWithCode, TM);
}
@@ -465,10 +465,9 @@ ExecutionEngine *EngineBuilder::create() {
// Unless the interpreter was explicitly selected or the JIT is not linked,
// try making a JIT.
if (WhichEngine & EngineKind::JIT) {
- if (TargetMachine *TM =
- EngineBuilder::selectTarget(M, MArch, MCPU, MAttrs, ErrorStr)) {
- TM->setCodeModel(CMModel);
-
+ if (TargetMachine *TM = EngineBuilder::selectTarget(M, MArch, MCPU, MAttrs,
+ RelocModel, CMModel,
+ ErrorStr)) {
if (UseMCJIT && ExecutionEngine::MCJITCtor) {
ExecutionEngine *EE =
ExecutionEngine::MCJITCtor(M, ErrorStr, JMM, OptLevel,
@@ -548,8 +547,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
// Compute the index
GenericValue Result = getConstantValue(Op0);
SmallVector<Value*, 8> Indices(CE->op_begin()+1, CE->op_end());
- uint64_t Offset =
- TD->getIndexedOffset(Op0->getType(), &Indices[0], Indices.size());
+ uint64_t Offset = TD->getIndexedOffset(Op0->getType(), Indices);
char* tmp = (char*) Result.PointerVal;
Result = PTOGV(tmp + Offset);
@@ -651,7 +649,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
}
case Instruction::BitCast: {
GenericValue GV = getConstantValue(Op0);
- const Type* DestTy = CE->getType();
+ Type* DestTy = CE->getType();
switch (Op0->getType()->getTypeID()) {
default: llvm_unreachable("Invalid bitcast operand");
case Type::IntegerTyID:
@@ -847,7 +845,7 @@ static void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
}
void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
- GenericValue *Ptr, const Type *Ty) {
+ GenericValue *Ptr, Type *Ty) {
const unsigned StoreBytes = getTargetData()->getTypeStoreSize(Ty);
switch (Ty->getTypeID()) {
@@ -909,7 +907,7 @@ static void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
///
void ExecutionEngine::LoadValueFromMemory(GenericValue &Result,
GenericValue *Ptr,
- const Type *Ty) {
+ Type *Ty) {
const unsigned LoadBytes = getTargetData()->getTypeStoreSize(Ty);
switch (Ty->getTypeID()) {
@@ -932,7 +930,7 @@ void ExecutionEngine::LoadValueFromMemory(GenericValue &Result,
// FIXME: Will not trap if loading a signaling NaN.
uint64_t y[2];
memcpy(y, Ptr, 10);
- Result.IntVal = APInt(80, 2, y);
+ Result.IntVal = APInt(80, y);
break;
}
default:
@@ -986,7 +984,7 @@ void ExecutionEngine::emitGlobals() {
// Loop over all of the global variables in the program, allocating the memory
// to hold them. If there is more than one module, do a prepass over globals
// to figure out how the different modules should link together.
- std::map<std::pair<std::string, const Type*>,
+ std::map<std::pair<std::string, Type*>,
const GlobalValue*> LinkedGlobalsMap;
if (Modules.size() != 1) {
@@ -1101,7 +1099,7 @@ void ExecutionEngine::EmitGlobalVariable(const GlobalVariable *GV) {
if (!GV->isThreadLocal())
InitializeMemory(GV->getInitializer(), GA);
- const Type *ElTy = GV->getType()->getElementType();
+ Type *ElTy = GV->getType()->getElementType();
size_t GVSize = (size_t)getTargetData()->getTypeAllocSize(ElTy);
NumInitBytes += (unsigned)GVSize;
++NumGlobals;
diff --git a/lib/ExecutionEngine/Interpreter/CMakeLists.txt b/lib/ExecutionEngine/Interpreter/CMakeLists.txt
index d331f83..4fb58c2 100644
--- a/lib/ExecutionEngine/Interpreter/CMakeLists.txt
+++ b/lib/ExecutionEngine/Interpreter/CMakeLists.txt
@@ -12,6 +12,14 @@ add_llvm_library(LLVMInterpreter
Interpreter.cpp
)
+add_llvm_library_dependencies(LLVMInterpreter
+ LLVMCodeGen
+ LLVMCore
+ LLVMExecutionEngine
+ LLVMSupport
+ LLVMTarget
+ )
+
if( LLVM_ENABLE_FFI )
target_link_libraries( LLVMInterpreter ${FFI_LIBRARY_PATH} )
endif()
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 498063b..27917da 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -51,7 +51,7 @@ static void SetValue(Value *V, GenericValue Val, ExecutionContext &SF) {
break
static void executeFAddInst(GenericValue &Dest, GenericValue Src1,
- GenericValue Src2, const Type *Ty) {
+ GenericValue Src2, Type *Ty) {
switch (Ty->getTypeID()) {
IMPLEMENT_BINARY_OPERATOR(+, Float);
IMPLEMENT_BINARY_OPERATOR(+, Double);
@@ -62,7 +62,7 @@ static void executeFAddInst(GenericValue &Dest, GenericValue Src1,
}
static void executeFSubInst(GenericValue &Dest, GenericValue Src1,
- GenericValue Src2, const Type *Ty) {
+ GenericValue Src2, Type *Ty) {
switch (Ty->getTypeID()) {
IMPLEMENT_BINARY_OPERATOR(-, Float);
IMPLEMENT_BINARY_OPERATOR(-, Double);
@@ -73,7 +73,7 @@ static void executeFSubInst(GenericValue &Dest, GenericValue Src1,
}
static void executeFMulInst(GenericValue &Dest, GenericValue Src1,
- GenericValue Src2, const Type *Ty) {
+ GenericValue Src2, Type *Ty) {
switch (Ty->getTypeID()) {
IMPLEMENT_BINARY_OPERATOR(*, Float);
IMPLEMENT_BINARY_OPERATOR(*, Double);
@@ -84,7 +84,7 @@ static void executeFMulInst(GenericValue &Dest, GenericValue Src1,
}
static void executeFDivInst(GenericValue &Dest, GenericValue Src1,
- GenericValue Src2, const Type *Ty) {
+ GenericValue Src2, Type *Ty) {
switch (Ty->getTypeID()) {
IMPLEMENT_BINARY_OPERATOR(/, Float);
IMPLEMENT_BINARY_OPERATOR(/, Double);
@@ -95,7 +95,7 @@ static void executeFDivInst(GenericValue &Dest, GenericValue Src1,
}
static void executeFRemInst(GenericValue &Dest, GenericValue Src1,
- GenericValue Src2, const Type *Ty) {
+ GenericValue Src2, Type *Ty) {
switch (Ty->getTypeID()) {
case Type::FloatTyID:
Dest.FloatVal = fmod(Src1.FloatVal, Src2.FloatVal);
@@ -125,7 +125,7 @@ static void executeFRemInst(GenericValue &Dest, GenericValue Src1,
break;
static GenericValue executeICMP_EQ(GenericValue Src1, GenericValue Src2,
- const Type *Ty) {
+ Type *Ty) {
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(eq,Ty);
@@ -138,7 +138,7 @@ static GenericValue executeICMP_EQ(GenericValue Src1, GenericValue Src2,
}
static GenericValue executeICMP_NE(GenericValue Src1, GenericValue Src2,
- const Type *Ty) {
+ Type *Ty) {
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(ne,Ty);
@@ -151,7 +151,7 @@ static GenericValue executeICMP_NE(GenericValue Src1, GenericValue Src2,
}
static GenericValue executeICMP_ULT(GenericValue Src1, GenericValue Src2,
- const Type *Ty) {
+ Type *Ty) {
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(ult,Ty);
@@ -164,7 +164,7 @@ static GenericValue executeICMP_ULT(GenericValue Src1, GenericValue Src2,
}
static GenericValue executeICMP_SLT(GenericValue Src1, GenericValue Src2,
- const Type *Ty) {
+ Type *Ty) {
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(slt,Ty);
@@ -177,7 +177,7 @@ static GenericValue executeICMP_SLT(GenericValue Src1, GenericValue Src2,
}
static GenericValue executeICMP_UGT(GenericValue Src1, GenericValue Src2,
- const Type *Ty) {
+ Type *Ty) {
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(ugt,Ty);
@@ -190,7 +190,7 @@ static GenericValue executeICMP_UGT(GenericValue Src1, GenericValue Src2,
}
static GenericValue executeICMP_SGT(GenericValue Src1, GenericValue Src2,
- const Type *Ty) {
+ Type *Ty) {
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(sgt,Ty);
@@ -203,7 +203,7 @@ static GenericValue executeICMP_SGT(GenericValue Src1, GenericValue Src2,
}
static GenericValue executeICMP_ULE(GenericValue Src1, GenericValue Src2,
- const Type *Ty) {
+ Type *Ty) {
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(ule,Ty);
@@ -216,7 +216,7 @@ static GenericValue executeICMP_ULE(GenericValue Src1, GenericValue Src2,
}
static GenericValue executeICMP_SLE(GenericValue Src1, GenericValue Src2,
- const Type *Ty) {
+ Type *Ty) {
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(sle,Ty);
@@ -229,7 +229,7 @@ static GenericValue executeICMP_SLE(GenericValue Src1, GenericValue Src2,
}
static GenericValue executeICMP_UGE(GenericValue Src1, GenericValue Src2,
- const Type *Ty) {
+ Type *Ty) {
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(uge,Ty);
@@ -242,7 +242,7 @@ static GenericValue executeICMP_UGE(GenericValue Src1, GenericValue Src2,
}
static GenericValue executeICMP_SGE(GenericValue Src1, GenericValue Src2,
- const Type *Ty) {
+ Type *Ty) {
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(sge,Ty);
@@ -256,7 +256,7 @@ static GenericValue executeICMP_SGE(GenericValue Src1, GenericValue Src2,
void Interpreter::visitICmpInst(ICmpInst &I) {
ExecutionContext &SF = ECStack.back();
- const Type *Ty = I.getOperand(0)->getType();
+ Type *Ty = I.getOperand(0)->getType();
GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
GenericValue R; // Result
@@ -286,7 +286,7 @@ void Interpreter::visitICmpInst(ICmpInst &I) {
break
static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2,
- const Type *Ty) {
+ Type *Ty) {
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_FCMP(==, Float);
@@ -299,7 +299,7 @@ static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2,
}
static GenericValue executeFCMP_ONE(GenericValue Src1, GenericValue Src2,
- const Type *Ty) {
+ Type *Ty) {
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_FCMP(!=, Float);
@@ -313,7 +313,7 @@ static GenericValue executeFCMP_ONE(GenericValue Src1, GenericValue Src2,
}
static GenericValue executeFCMP_OLE(GenericValue Src1, GenericValue Src2,
- const Type *Ty) {
+ Type *Ty) {
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_FCMP(<=, Float);
@@ -326,7 +326,7 @@ static GenericValue executeFCMP_OLE(GenericValue Src1, GenericValue Src2,
}
static GenericValue executeFCMP_OGE(GenericValue Src1, GenericValue Src2,
- const Type *Ty) {
+ Type *Ty) {
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_FCMP(>=, Float);
@@ -339,7 +339,7 @@ static GenericValue executeFCMP_OGE(GenericValue Src1, GenericValue Src2,
}
static GenericValue executeFCMP_OLT(GenericValue Src1, GenericValue Src2,
- const Type *Ty) {
+ Type *Ty) {
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_FCMP(<, Float);
@@ -352,7 +352,7 @@ static GenericValue executeFCMP_OLT(GenericValue Src1, GenericValue Src2,
}
static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2,
- const Type *Ty) {
+ Type *Ty) {
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_FCMP(>, Float);
@@ -377,49 +377,49 @@ static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2,
static GenericValue executeFCMP_UEQ(GenericValue Src1, GenericValue Src2,
- const Type *Ty) {
+ Type *Ty) {
GenericValue Dest;
IMPLEMENT_UNORDERED(Ty, Src1, Src2)
return executeFCMP_OEQ(Src1, Src2, Ty);
}
static GenericValue executeFCMP_UNE(GenericValue Src1, GenericValue Src2,
- const Type *Ty) {
+ Type *Ty) {
GenericValue Dest;
IMPLEMENT_UNORDERED(Ty, Src1, Src2)
return executeFCMP_ONE(Src1, Src2, Ty);
}
static GenericValue executeFCMP_ULE(GenericValue Src1, GenericValue Src2,
- const Type *Ty) {
+ Type *Ty) {
GenericValue Dest;
IMPLEMENT_UNORDERED(Ty, Src1, Src2)
return executeFCMP_OLE(Src1, Src2, Ty);
}
static GenericValue executeFCMP_UGE(GenericValue Src1, GenericValue Src2,
- const Type *Ty) {
+ Type *Ty) {
GenericValue Dest;
IMPLEMENT_UNORDERED(Ty, Src1, Src2)
return executeFCMP_OGE(Src1, Src2, Ty);
}
static GenericValue executeFCMP_ULT(GenericValue Src1, GenericValue Src2,
- const Type *Ty) {
+ Type *Ty) {
GenericValue Dest;
IMPLEMENT_UNORDERED(Ty, Src1, Src2)
return executeFCMP_OLT(Src1, Src2, Ty);
}
static GenericValue executeFCMP_UGT(GenericValue Src1, GenericValue Src2,
- const Type *Ty) {
+ Type *Ty) {
GenericValue Dest;
IMPLEMENT_UNORDERED(Ty, Src1, Src2)
return executeFCMP_OGT(Src1, Src2, Ty);
}
static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2,
- const Type *Ty) {
+ Type *Ty) {
GenericValue Dest;
if (Ty->isFloatTy())
Dest.IntVal = APInt(1,(Src1.FloatVal == Src1.FloatVal &&
@@ -431,7 +431,7 @@ static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2,
}
static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2,
- const Type *Ty) {
+ Type *Ty) {
GenericValue Dest;
if (Ty->isFloatTy())
Dest.IntVal = APInt(1,(Src1.FloatVal != Src1.FloatVal ||
@@ -444,7 +444,7 @@ static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2,
void Interpreter::visitFCmpInst(FCmpInst &I) {
ExecutionContext &SF = ECStack.back();
- const Type *Ty = I.getOperand(0)->getType();
+ Type *Ty = I.getOperand(0)->getType();
GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
GenericValue R; // Result
@@ -475,7 +475,7 @@ void Interpreter::visitFCmpInst(FCmpInst &I) {
}
static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1,
- GenericValue Src2, const Type *Ty) {
+ GenericValue Src2, Type *Ty) {
GenericValue Result;
switch (predicate) {
case ICmpInst::ICMP_EQ: return executeICMP_EQ(Src1, Src2, Ty);
@@ -520,7 +520,7 @@ static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1,
void Interpreter::visitBinaryOperator(BinaryOperator &I) {
ExecutionContext &SF = ECStack.back();
- const Type *Ty = I.getOperand(0)->getType();
+ Type *Ty = I.getOperand(0)->getType();
GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
GenericValue R; // Result
@@ -585,7 +585,7 @@ void Interpreter::exitCalled(GenericValue GV) {
/// care of switching to the normal destination BB, if we are returning
/// from an invoke.
///
-void Interpreter::popStackAndReturnValueToCaller(const Type *RetTy,
+void Interpreter::popStackAndReturnValueToCaller(Type *RetTy,
GenericValue Result) {
// Pop the current stack frame.
ECStack.pop_back();
@@ -613,7 +613,7 @@ void Interpreter::popStackAndReturnValueToCaller(const Type *RetTy,
void Interpreter::visitReturnInst(ReturnInst &I) {
ExecutionContext &SF = ECStack.back();
- const Type *RetTy = Type::getVoidTy(I.getContext());
+ Type *RetTy = Type::getVoidTy(I.getContext());
GenericValue Result;
// Save away the return value... (if we are not 'ret void')
@@ -662,18 +662,21 @@ void Interpreter::visitBranchInst(BranchInst &I) {
void Interpreter::visitSwitchInst(SwitchInst &I) {
ExecutionContext &SF = ECStack.back();
- GenericValue CondVal = getOperandValue(I.getOperand(0), SF);
- const Type *ElTy = I.getOperand(0)->getType();
+ Value* Cond = I.getCondition();
+ Type *ElTy = Cond->getType();
+ GenericValue CondVal = getOperandValue(Cond, SF);
// Check to see if any of the cases match...
BasicBlock *Dest = 0;
- for (unsigned i = 2, e = I.getNumOperands(); i != e; i += 2)
- if (executeICMP_EQ(CondVal, getOperandValue(I.getOperand(i), SF), ElTy)
- .IntVal != 0) {
- Dest = cast<BasicBlock>(I.getOperand(i+1));
+ unsigned NumCases = I.getNumCases();
+ // Skip the first item since that's the default case.
+ for (unsigned i = 1; i < NumCases; ++i) {
+ GenericValue CaseVal = getOperandValue(I.getCaseValue(i), SF);
+ if (executeICMP_EQ(CondVal, CaseVal, ElTy).IntVal != 0) {
+ Dest = cast<BasicBlock>(I.getSuccessor(i));
break;
}
-
+ }
if (!Dest) Dest = I.getDefaultDest(); // No cases matched: use default
SwitchToNewBasicBlock(Dest, SF);
}
@@ -730,7 +733,7 @@ void Interpreter::SwitchToNewBasicBlock(BasicBlock *Dest, ExecutionContext &SF){
void Interpreter::visitAllocaInst(AllocaInst &I) {
ExecutionContext &SF = ECStack.back();
- const Type *Ty = I.getType()->getElementType(); // Type to be allocated
+ Type *Ty = I.getType()->getElementType(); // Type to be allocated
// Get the number of elements being allocated by the array...
unsigned NumElements =
@@ -767,7 +770,7 @@ GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I,
uint64_t Total = 0;
for (; I != E; ++I) {
- if (const StructType *STy = dyn_cast<StructType>(*I)) {
+ if (StructType *STy = dyn_cast<StructType>(*I)) {
const StructLayout *SLO = TD.getStructLayout(STy);
const ConstantInt *CPU = cast<ConstantInt>(I.getOperand());
@@ -775,7 +778,7 @@ GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I,
Total += SLO->getElementOffset(Index);
} else {
- const SequentialType *ST = cast<SequentialType>(*I);
+ SequentialType *ST = cast<SequentialType>(*I);
// Get the index number for the array... which must be long type...
GenericValue IdxGV = getOperandValue(I.getOperand(), SF);
@@ -929,34 +932,34 @@ void Interpreter::visitAShr(BinaryOperator &I) {
SetValue(&I, Dest, SF);
}
-GenericValue Interpreter::executeTruncInst(Value *SrcVal, const Type *DstTy,
+GenericValue Interpreter::executeTruncInst(Value *SrcVal, Type *DstTy,
ExecutionContext &SF) {
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
- const IntegerType *DITy = cast<IntegerType>(DstTy);
+ IntegerType *DITy = cast<IntegerType>(DstTy);
unsigned DBitWidth = DITy->getBitWidth();
Dest.IntVal = Src.IntVal.trunc(DBitWidth);
return Dest;
}
-GenericValue Interpreter::executeSExtInst(Value *SrcVal, const Type *DstTy,
+GenericValue Interpreter::executeSExtInst(Value *SrcVal, Type *DstTy,
ExecutionContext &SF) {
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
- const IntegerType *DITy = cast<IntegerType>(DstTy);
+ IntegerType *DITy = cast<IntegerType>(DstTy);
unsigned DBitWidth = DITy->getBitWidth();
Dest.IntVal = Src.IntVal.sext(DBitWidth);
return Dest;
}
-GenericValue Interpreter::executeZExtInst(Value *SrcVal, const Type *DstTy,
+GenericValue Interpreter::executeZExtInst(Value *SrcVal, Type *DstTy,
ExecutionContext &SF) {
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
- const IntegerType *DITy = cast<IntegerType>(DstTy);
+ IntegerType *DITy = cast<IntegerType>(DstTy);
unsigned DBitWidth = DITy->getBitWidth();
Dest.IntVal = Src.IntVal.zext(DBitWidth);
return Dest;
}
-GenericValue Interpreter::executeFPTruncInst(Value *SrcVal, const Type *DstTy,
+GenericValue Interpreter::executeFPTruncInst(Value *SrcVal, Type *DstTy,
ExecutionContext &SF) {
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
assert(SrcVal->getType()->isDoubleTy() && DstTy->isFloatTy() &&
@@ -965,7 +968,7 @@ GenericValue Interpreter::executeFPTruncInst(Value *SrcVal, const Type *DstTy,
return Dest;
}
-GenericValue Interpreter::executeFPExtInst(Value *SrcVal, const Type *DstTy,
+GenericValue Interpreter::executeFPExtInst(Value *SrcVal, Type *DstTy,
ExecutionContext &SF) {
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
assert(SrcVal->getType()->isFloatTy() && DstTy->isDoubleTy() &&
@@ -974,9 +977,9 @@ GenericValue Interpreter::executeFPExtInst(Value *SrcVal, const Type *DstTy,
return Dest;
}
-GenericValue Interpreter::executeFPToUIInst(Value *SrcVal, const Type *DstTy,
+GenericValue Interpreter::executeFPToUIInst(Value *SrcVal, Type *DstTy,
ExecutionContext &SF) {
- const Type *SrcTy = SrcVal->getType();
+ Type *SrcTy = SrcVal->getType();
uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
assert(SrcTy->isFloatingPointTy() && "Invalid FPToUI instruction");
@@ -988,9 +991,9 @@ GenericValue Interpreter::executeFPToUIInst(Value *SrcVal, const Type *DstTy,
return Dest;
}
-GenericValue Interpreter::executeFPToSIInst(Value *SrcVal, const Type *DstTy,
+GenericValue Interpreter::executeFPToSIInst(Value *SrcVal, Type *DstTy,
ExecutionContext &SF) {
- const Type *SrcTy = SrcVal->getType();
+ Type *SrcTy = SrcVal->getType();
uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
assert(SrcTy->isFloatingPointTy() && "Invalid FPToSI instruction");
@@ -1002,7 +1005,7 @@ GenericValue Interpreter::executeFPToSIInst(Value *SrcVal, const Type *DstTy,
return Dest;
}
-GenericValue Interpreter::executeUIToFPInst(Value *SrcVal, const Type *DstTy,
+GenericValue Interpreter::executeUIToFPInst(Value *SrcVal, Type *DstTy,
ExecutionContext &SF) {
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
assert(DstTy->isFloatingPointTy() && "Invalid UIToFP instruction");
@@ -1014,7 +1017,7 @@ GenericValue Interpreter::executeUIToFPInst(Value *SrcVal, const Type *DstTy,
return Dest;
}
-GenericValue Interpreter::executeSIToFPInst(Value *SrcVal, const Type *DstTy,
+GenericValue Interpreter::executeSIToFPInst(Value *SrcVal, Type *DstTy,
ExecutionContext &SF) {
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
assert(DstTy->isFloatingPointTy() && "Invalid SIToFP instruction");
@@ -1027,7 +1030,7 @@ GenericValue Interpreter::executeSIToFPInst(Value *SrcVal, const Type *DstTy,
}
-GenericValue Interpreter::executePtrToIntInst(Value *SrcVal, const Type *DstTy,
+GenericValue Interpreter::executePtrToIntInst(Value *SrcVal, Type *DstTy,
ExecutionContext &SF) {
uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
@@ -1037,7 +1040,7 @@ GenericValue Interpreter::executePtrToIntInst(Value *SrcVal, const Type *DstTy,
return Dest;
}
-GenericValue Interpreter::executeIntToPtrInst(Value *SrcVal, const Type *DstTy,
+GenericValue Interpreter::executeIntToPtrInst(Value *SrcVal, Type *DstTy,
ExecutionContext &SF) {
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
assert(DstTy->isPointerTy() && "Invalid PtrToInt instruction");
@@ -1050,10 +1053,10 @@ GenericValue Interpreter::executeIntToPtrInst(Value *SrcVal, const Type *DstTy,
return Dest;
}
-GenericValue Interpreter::executeBitCastInst(Value *SrcVal, const Type *DstTy,
+GenericValue Interpreter::executeBitCastInst(Value *SrcVal, Type *DstTy,
ExecutionContext &SF) {
- const Type *SrcTy = SrcVal->getType();
+ Type *SrcTy = SrcVal->getType();
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
if (DstTy->isPointerTy()) {
assert(SrcTy->isPointerTy() && "Invalid BitCast");
@@ -1155,7 +1158,7 @@ void Interpreter::visitVAArgInst(VAArgInst &I) {
GenericValue Dest;
GenericValue Src = ECStack[VAList.UIntPairVal.first]
.VarArgs[VAList.UIntPairVal.second];
- const Type *Ty = I.getType();
+ Type *Ty = I.getType();
switch (Ty->getTypeID()) {
case Type::IntegerTyID: Dest.IntVal = Src.IntVal;
IMPLEMENT_VAARG(Pointer);
@@ -1222,7 +1225,7 @@ GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE,
GenericValue Op0 = getOperandValue(CE->getOperand(0), SF);
GenericValue Op1 = getOperandValue(CE->getOperand(1), SF);
GenericValue Dest;
- const Type * Ty = CE->getOperand(0)->getType();
+ Type * Ty = CE->getOperand(0)->getType();
switch (CE->getOpcode()) {
case Instruction::Add: Dest.IntVal = Op0.IntVal + Op1.IntVal; break;
case Instruction::Sub: Dest.IntVal = Op0.IntVal - Op1.IntVal; break;
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index f7e2a4d..055875c 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -48,7 +48,7 @@ using namespace llvm;
static ManagedStatic<sys::Mutex> FunctionsLock;
-typedef GenericValue (*ExFunc)(const FunctionType *,
+typedef GenericValue (*ExFunc)(FunctionType *,
const std::vector<GenericValue> &);
static ManagedStatic<std::map<const Function *, ExFunc> > ExportedFunctions;
static std::map<std::string, ExFunc> FuncNames;
@@ -60,7 +60,7 @@ static ManagedStatic<std::map<const Function *, RawFunc> > RawFunctions;
static Interpreter *TheInterpreter;
-static char getTypeID(const Type *Ty) {
+static char getTypeID(Type *Ty) {
switch (Ty->getTypeID()) {
case Type::VoidTyID: return 'V';
case Type::IntegerTyID:
@@ -91,7 +91,7 @@ static ExFunc lookupFunction(const Function *F) {
// Function not found, look it up... start by figuring out what the
// composite function name should be.
std::string ExtName = "lle_";
- const FunctionType *FT = F->getFunctionType();
+ FunctionType *FT = F->getFunctionType();
for (unsigned i = 0, e = FT->getNumContainedTypes(); i != e; ++i)
ExtName += getTypeID(FT->getContainedType(i));
ExtName + "_" + F->getNameStr();
@@ -109,7 +109,7 @@ static ExFunc lookupFunction(const Function *F) {
}
#ifdef USE_LIBFFI
-static ffi_type *ffiTypeFor(const Type *Ty) {
+static ffi_type *ffiTypeFor(Type *Ty) {
switch (Ty->getTypeID()) {
case Type::VoidTyID: return &ffi_type_void;
case Type::IntegerTyID:
@@ -129,7 +129,7 @@ static ffi_type *ffiTypeFor(const Type *Ty) {
return NULL;
}
-static void *ffiValueFor(const Type *Ty, const GenericValue &AV,
+static void *ffiValueFor(Type *Ty, const GenericValue &AV,
void *ArgDataPtr) {
switch (Ty->getTypeID()) {
case Type::IntegerTyID:
@@ -181,7 +181,7 @@ static bool ffiInvoke(RawFunc Fn, Function *F,
const std::vector<GenericValue> &ArgVals,
const TargetData *TD, GenericValue &Result) {
ffi_cif cif;
- const FunctionType *FTy = F->getFunctionType();
+ FunctionType *FTy = F->getFunctionType();
const unsigned NumArgs = F->arg_size();
// TODO: We don't have type information about the remaining arguments, because
@@ -197,7 +197,7 @@ static bool ffiInvoke(RawFunc Fn, Function *F,
for (Function::const_arg_iterator A = F->arg_begin(), E = F->arg_end();
A != E; ++A) {
const unsigned ArgNo = A->getArgNo();
- const Type *ArgTy = FTy->getParamType(ArgNo);
+ Type *ArgTy = FTy->getParamType(ArgNo);
args[ArgNo] = ffiTypeFor(ArgTy);
ArgBytes += TD->getTypeStoreSize(ArgTy);
}
@@ -209,12 +209,12 @@ static bool ffiInvoke(RawFunc Fn, Function *F,
for (Function::const_arg_iterator A = F->arg_begin(), E = F->arg_end();
A != E; ++A) {
const unsigned ArgNo = A->getArgNo();
- const Type *ArgTy = FTy->getParamType(ArgNo);
+ Type *ArgTy = FTy->getParamType(ArgNo);
values[ArgNo] = ffiValueFor(ArgTy, ArgVals[ArgNo], ArgDataPtr);
ArgDataPtr += TD->getTypeStoreSize(ArgTy);
}
- const Type *RetTy = FTy->getReturnType();
+ Type *RetTy = FTy->getReturnType();
ffi_type *rtype = ffiTypeFor(RetTy);
if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, NumArgs, rtype, &args[0]) == FFI_OK) {
@@ -304,7 +304,7 @@ GenericValue Interpreter::callExternalFunction(Function *F,
extern "C" { // Don't add C++ manglings to llvm mangling :)
// void atexit(Function*)
-GenericValue lle_X_atexit(const FunctionType *FT,
+GenericValue lle_X_atexit(FunctionType *FT,
const std::vector<GenericValue> &Args) {
assert(Args.size() == 1);
TheInterpreter->addAtExitHandler((Function*)GVTOP(Args[0]));
@@ -314,14 +314,14 @@ GenericValue lle_X_atexit(const FunctionType *FT,
}
// void exit(int)
-GenericValue lle_X_exit(const FunctionType *FT,
+GenericValue lle_X_exit(FunctionType *FT,
const std::vector<GenericValue> &Args) {
TheInterpreter->exitCalled(Args[0]);
return GenericValue();
}
// void abort(void)
-GenericValue lle_X_abort(const FunctionType *FT,
+GenericValue lle_X_abort(FunctionType *FT,
const std::vector<GenericValue> &Args) {
//FIXME: should we report or raise here?
//report_fatal_error("Interpreted program raised SIGABRT");
@@ -331,7 +331,7 @@ GenericValue lle_X_abort(const FunctionType *FT,
// int sprintf(char *, const char *, ...) - a very rough implementation to make
// output useful.
-GenericValue lle_X_sprintf(const FunctionType *FT,
+GenericValue lle_X_sprintf(FunctionType *FT,
const std::vector<GenericValue> &Args) {
char *OutputBuffer = (char *)GVTOP(Args[0]);
const char *FmtStr = (const char *)GVTOP(Args[1]);
@@ -413,7 +413,7 @@ GenericValue lle_X_sprintf(const FunctionType *FT,
// int printf(const char *, ...) - a very rough implementation to make output
// useful.
-GenericValue lle_X_printf(const FunctionType *FT,
+GenericValue lle_X_printf(FunctionType *FT,
const std::vector<GenericValue> &Args) {
char Buffer[10000];
std::vector<GenericValue> NewArgs;
@@ -425,7 +425,7 @@ GenericValue lle_X_printf(const FunctionType *FT,
}
// int sscanf(const char *format, ...);
-GenericValue lle_X_sscanf(const FunctionType *FT,
+GenericValue lle_X_sscanf(FunctionType *FT,
const std::vector<GenericValue> &args) {
assert(args.size() < 10 && "Only handle up to 10 args to sscanf right now!");
@@ -440,7 +440,7 @@ GenericValue lle_X_sscanf(const FunctionType *FT,
}
// int scanf(const char *format, ...);
-GenericValue lle_X_scanf(const FunctionType *FT,
+GenericValue lle_X_scanf(FunctionType *FT,
const std::vector<GenericValue> &args) {
assert(args.size() < 10 && "Only handle up to 10 args to scanf right now!");
@@ -456,7 +456,7 @@ GenericValue lle_X_scanf(const FunctionType *FT,
// int fprintf(FILE *, const char *, ...) - a very rough implementation to make
// output useful.
-GenericValue lle_X_fprintf(const FunctionType *FT,
+GenericValue lle_X_fprintf(FunctionType *FT,
const std::vector<GenericValue> &Args) {
assert(Args.size() >= 2);
char Buffer[10000];
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h
index bfebe3d..ee2b459 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -174,7 +174,7 @@ public:
void visitVAArgInst(VAArgInst &I);
void visitInstruction(Instruction &I) {
- errs() << I;
+ errs() << I << "\n";
llvm_unreachable("Instruction not interpretable yet!");
}
@@ -207,33 +207,33 @@ private: // Helper functions
void initializeExternalFunctions();
GenericValue getConstantExprValue(ConstantExpr *CE, ExecutionContext &SF);
GenericValue getOperandValue(Value *V, ExecutionContext &SF);
- GenericValue executeTruncInst(Value *SrcVal, const Type *DstTy,
+ GenericValue executeTruncInst(Value *SrcVal, Type *DstTy,
ExecutionContext &SF);
- GenericValue executeSExtInst(Value *SrcVal, const Type *DstTy,
+ GenericValue executeSExtInst(Value *SrcVal, Type *DstTy,
ExecutionContext &SF);
- GenericValue executeZExtInst(Value *SrcVal, const Type *DstTy,
+ GenericValue executeZExtInst(Value *SrcVal, Type *DstTy,
ExecutionContext &SF);
- GenericValue executeFPTruncInst(Value *SrcVal, const Type *DstTy,
+ GenericValue executeFPTruncInst(Value *SrcVal, Type *DstTy,
ExecutionContext &SF);
- GenericValue executeFPExtInst(Value *SrcVal, const Type *DstTy,
+ GenericValue executeFPExtInst(Value *SrcVal, Type *DstTy,
ExecutionContext &SF);
- GenericValue executeFPToUIInst(Value *SrcVal, const Type *DstTy,
+ GenericValue executeFPToUIInst(Value *SrcVal, Type *DstTy,
ExecutionContext &SF);
- GenericValue executeFPToSIInst(Value *SrcVal, const Type *DstTy,
+ GenericValue executeFPToSIInst(Value *SrcVal, Type *DstTy,
ExecutionContext &SF);
- GenericValue executeUIToFPInst(Value *SrcVal, const Type *DstTy,
+ GenericValue executeUIToFPInst(Value *SrcVal, Type *DstTy,
ExecutionContext &SF);
- GenericValue executeSIToFPInst(Value *SrcVal, const Type *DstTy,
+ GenericValue executeSIToFPInst(Value *SrcVal, Type *DstTy,
ExecutionContext &SF);
- GenericValue executePtrToIntInst(Value *SrcVal, const Type *DstTy,
+ GenericValue executePtrToIntInst(Value *SrcVal, Type *DstTy,
ExecutionContext &SF);
- GenericValue executeIntToPtrInst(Value *SrcVal, const Type *DstTy,
+ GenericValue executeIntToPtrInst(Value *SrcVal, Type *DstTy,
ExecutionContext &SF);
- GenericValue executeBitCastInst(Value *SrcVal, const Type *DstTy,
+ GenericValue executeBitCastInst(Value *SrcVal, Type *DstTy,
ExecutionContext &SF);
GenericValue executeCastOperation(Instruction::CastOps opcode, Value *SrcVal,
- const Type *Ty, ExecutionContext &SF);
- void popStackAndReturnValueToCaller(const Type *RetTy, GenericValue Result);
+ Type *Ty, ExecutionContext &SF);
+ void popStackAndReturnValueToCaller(Type *RetTy, GenericValue Result);
};
diff --git a/lib/ExecutionEngine/JIT/CMakeLists.txt b/lib/ExecutionEngine/JIT/CMakeLists.txt
index cefb0ae..598e50e 100644
--- a/lib/ExecutionEngine/JIT/CMakeLists.txt
+++ b/lib/ExecutionEngine/JIT/CMakeLists.txt
@@ -10,3 +10,11 @@ add_llvm_library(LLVMJIT
JITMemoryManager.cpp
OProfileJITEventListener.cpp
)
+
+add_llvm_library_dependencies(LLVMJIT
+ LLVMCore
+ LLVMExecutionEngine
+ LLVMRuntimeDyld
+ LLVMSupport
+ LLVMTarget
+ )
diff --git a/lib/ExecutionEngine/JIT/Intercept.cpp b/lib/ExecutionEngine/JIT/Intercept.cpp
index fa8bee4..2251a8e 100644
--- a/lib/ExecutionEngine/JIT/Intercept.cpp
+++ b/lib/ExecutionEngine/JIT/Intercept.cpp
@@ -52,6 +52,7 @@ static void runAtExitHandlers() {
#include <sys/stat.h>
#endif
#include <fcntl.h>
+#include <unistd.h>
/* stat functions are redirecting to __xstat with a version number. On x86-64
* linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat'
* available as an exported symbol, so we have to add it explicitly.
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index 445d2d0..d773009 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -390,8 +390,8 @@ GenericValue JIT::runFunction(Function *F,
void *FPtr = getPointerToFunction(F);
assert(FPtr && "Pointer to fn's code was null after getPointerToFunction");
- const FunctionType *FTy = F->getFunctionType();
- const Type *RetTy = FTy->getReturnType();
+ FunctionType *FTy = F->getFunctionType();
+ Type *RetTy = FTy->getReturnType();
assert((FTy->getNumParams() == ArgValues.size() ||
(FTy->isVarArg() && FTy->getNumParams() <= ArgValues.size())) &&
@@ -500,7 +500,7 @@ GenericValue JIT::runFunction(Function *F,
SmallVector<Value*, 8> Args;
for (unsigned i = 0, e = ArgValues.size(); i != e; ++i) {
Constant *C = 0;
- const Type *ArgTy = FTy->getParamType(i);
+ Type *ArgTy = FTy->getParamType(i);
const GenericValue &AV = ArgValues[i];
switch (ArgTy->getTypeID()) {
default: llvm_unreachable("Unknown argument type for function call!");
@@ -788,7 +788,7 @@ char* JIT::getMemoryForGV(const GlobalVariable* GV) {
// be allocated into the same buffer, but in general globals are allocated
// through the memory manager which puts them near the code but not in the
// same buffer.
- const Type *GlobalType = GV->getType()->getElementType();
+ Type *GlobalType = GV->getType()->getElementType();
size_t S = getTargetData()->getTypeAllocSize(GlobalType);
size_t A = getTargetData()->getPreferredAlignment(GV);
if (GV->isThreadLocal()) {
diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h
index b879fc3..92dcb0e 100644
--- a/lib/ExecutionEngine/JIT/JIT.h
+++ b/lib/ExecutionEngine/JIT/JIT.h
@@ -100,9 +100,10 @@ public:
CodeGenOpt::Level OptLevel =
CodeGenOpt::Default,
bool GVsWithCode = true,
- CodeModel::Model CMM = CodeModel::Default) {
+ Reloc::Model RM = Reloc::Default,
+ CodeModel::Model CMM = CodeModel::JITDefault) {
return ExecutionEngine::createJIT(M, Err, JMM, OptLevel, GVsWithCode,
- CMM);
+ RM, CMM);
}
virtual void addModule(Module *M);
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
index ddb0d54..8f84ac7 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
@@ -18,12 +18,12 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/JITCodeEmitter.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/ExecutionEngine/JITMemoryManager.h"
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/MC/MachineLocation.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetFrameLowering.h"
@@ -45,7 +45,7 @@ unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F,
TD = TM.getTargetData();
stackGrowthDirection = TM.getFrameLowering()->getStackGrowthDirection();
RI = TM.getRegisterInfo();
- TFI = TM.getFrameLowering();
+ MAI = TM.getMCAsmInfo();
JCE = &jce;
unsigned char* ExceptionTable = EmitExceptionTable(&F, StartFunction,
@@ -523,9 +523,7 @@ JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const {
JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
}
- std::vector<MachineMove> Moves;
- TFI->getInitialFrameState(Moves);
- EmitFrameMoves(0, Moves);
+ EmitFrameMoves(0, MAI->getInitialFrameState());
JCE->emitAlignmentWithFill(PointerSize, dwarf::DW_CFA_nop);
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
index e1d0045..8dc99ab 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
@@ -22,8 +22,8 @@ class JITCodeEmitter;
class MachineFunction;
class MachineModuleInfo;
class MachineMove;
+class MCAsmInfo;
class TargetData;
-class TargetFrameLowering;
class TargetMachine;
class TargetRegisterInfo;
@@ -31,7 +31,7 @@ class JITDwarfEmitter {
const TargetData* TD;
JITCodeEmitter* JCE;
const TargetRegisterInfo* RI;
- const TargetFrameLowering *TFI;
+ const MCAsmInfo *MAI;
MachineModuleInfo* MMI;
JIT& Jit;
bool stackGrowthDirection;
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index d046b8a..24020ee 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -668,6 +668,7 @@ void *JITResolver::JITCompilerFn(void *Stub) {
DEBUG(dbgs() << "JIT: Lazily resolving function '" << F->getName()
<< "' In stub ptr = " << Stub << " actual ptr = "
<< ActualPtr << "\n");
+ (void)ActualPtr;
Result = JR->TheJIT->getPointerToFunction(F);
}
@@ -770,7 +771,7 @@ static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP,
MachineConstantPoolEntry CPE = Constants[i];
unsigned AlignMask = CPE.getAlignment() - 1;
Size = (Size + AlignMask) & ~AlignMask;
- const Type *Ty = CPE.getType();
+ Type *Ty = CPE.getType();
Size += TD->getTypeAllocSize(Ty);
}
return Size;
@@ -1098,7 +1099,7 @@ void JITEmitter::emitConstantPool(MachineConstantPool *MCP) {
DEBUG(dbgs() << "JIT: CP" << i << " at [0x";
dbgs().write_hex(CAddr) << "]\n");
- const Type *Ty = CPE.Val.ConstVal->getType();
+ Type *Ty = CPE.Val.ConstVal->getType();
Offset += TheJIT->getTargetData()->getTypeAllocSize(Ty);
}
}
diff --git a/lib/ExecutionEngine/MCJIT/CMakeLists.txt b/lib/ExecutionEngine/MCJIT/CMakeLists.txt
index 38fdffa..aae8a1b 100644
--- a/lib/ExecutionEngine/MCJIT/CMakeLists.txt
+++ b/lib/ExecutionEngine/MCJIT/CMakeLists.txt
@@ -2,3 +2,11 @@ add_llvm_library(LLVMMCJIT
MCJIT.cpp
Intercept.cpp
)
+
+add_llvm_library_dependencies(LLVMMCJIT
+ LLVMCore
+ LLVMExecutionEngine
+ LLVMRuntimeDyld
+ LLVMSupport
+ LLVMTarget
+ )
diff --git a/lib/ExecutionEngine/MCJIT/Intercept.cpp b/lib/ExecutionEngine/MCJIT/Intercept.cpp
index e431c84..f83f428 100644
--- a/lib/ExecutionEngine/MCJIT/Intercept.cpp
+++ b/lib/ExecutionEngine/MCJIT/Intercept.cpp
@@ -52,6 +52,7 @@ static void runAtExitHandlers() {
#include <sys/stat.h>
#endif
#include <fcntl.h>
+#include <unistd.h>
/* stat functions are redirecting to __xstat with a version number. On x86-64
* linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat'
* available as an exported symbol, so we have to add it explicitly.
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index 4475f4d..7c8a740 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -59,6 +59,7 @@ MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji,
bool AllocateGVsWithCode)
: ExecutionEngine(m), TM(tm), MemMgr(MM), M(m), OS(Buffer), Dyld(MM) {
+ setTargetData(TM->getTargetData());
PM.add(new TargetData(*TM->getTargetData()));
// Turn the machine code intermediate representation into bytes in memory
@@ -124,8 +125,8 @@ GenericValue MCJIT::runFunction(Function *F,
void *FPtr = getPointerToFunction(F);
assert(FPtr && "Pointer to fn's code was null after getPointerToFunction");
- const FunctionType *FTy = F->getFunctionType();
- const Type *RetTy = FTy->getReturnType();
+ FunctionType *FTy = F->getFunctionType();
+ Type *RetTy = FTy->getReturnType();
assert((FTy->getNumParams() == ArgValues.size() ||
(FTy->isVarArg() && FTy->getNumParams() <= ArgValues.size())) &&
@@ -216,6 +217,6 @@ GenericValue MCJIT::runFunction(Function *F,
}
}
- assert("Full-featured argument passing not supported yet!");
+ assert(0 && "Full-featured argument passing not supported yet!");
return GenericValue();
}
diff --git a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
index 59bdfee..c236d1d 100644
--- a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
+++ b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
@@ -2,3 +2,8 @@ add_llvm_library(LLVMRuntimeDyld
RuntimeDyld.cpp
RuntimeDyldMachO.cpp
)
+
+add_llvm_library_dependencies(LLVMRuntimeDyld
+ LLVMObject
+ LLVMSupport
+ )
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index bcdfb04..7190a3c 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -143,7 +143,7 @@ public:
bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const {
return isKnownFormat(InputBuffer);
- };
+ }
};
} // end namespace llvm
diff --git a/lib/ExecutionEngine/TargetSelect.cpp b/lib/ExecutionEngine/TargetSelect.cpp
index f51aff3..004b865 100644
--- a/lib/ExecutionEngine/TargetSelect.cpp
+++ b/lib/ExecutionEngine/TargetSelect.cpp
@@ -17,11 +17,11 @@
#include "llvm/Module.h"
#include "llvm/ADT/Triple.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Host.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
/// selectTarget - Pick a target either via -march or by guessing the native
@@ -30,6 +30,8 @@ TargetMachine *EngineBuilder::selectTarget(Module *Mod,
StringRef MArch,
StringRef MCPU,
const SmallVectorImpl<std::string>& MAttrs,
+ Reloc::Model RM,
+ CodeModel::Model CM,
std::string *ErrorStr) {
Triple TheTriple(Mod->getTargetTriple());
if (TheTriple.getTriple().empty())
@@ -83,8 +85,9 @@ TargetMachine *EngineBuilder::selectTarget(Module *Mod,
}
// Allocate a target...
- TargetMachine *Target =
- TheTarget->createTargetMachine(TheTriple.getTriple(), MCPU, FeaturesStr);
+ TargetMachine *Target = TheTarget->createTargetMachine(TheTriple.getTriple(),
+ MCPU, FeaturesStr,
+ RM, CM);
assert(Target && "Could not allocate target machine!");
return Target;
}
diff --git a/lib/Linker/CMakeLists.txt b/lib/Linker/CMakeLists.txt
index 0b6d2f4..4d8824b 100644
--- a/lib/Linker/CMakeLists.txt
+++ b/lib/Linker/CMakeLists.txt
@@ -4,3 +4,11 @@ add_llvm_library(LLVMLinker
LinkModules.cpp
Linker.cpp
)
+
+add_llvm_library_dependencies(LLVMLinker
+ LLVMArchive
+ LLVMBitReader
+ LLVMCore
+ LLVMSupport
+ LLVMTransformUtils
+ )
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index 55aa9bf..03a962e 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -14,9 +14,12 @@
#include "llvm/Linker.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
#include "llvm/Module.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Path.h"
+#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
using namespace llvm;
@@ -139,7 +142,7 @@ bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) {
return false;
} else if (StructType *DSTy = dyn_cast<StructType>(DstTy)) {
StructType *SSTy = cast<StructType>(SrcTy);
- if (DSTy->isAnonymous() != SSTy->isAnonymous() ||
+ if (DSTy->isLiteral() != SSTy->isLiteral() ||
DSTy->isPacked() != SSTy->isPacked())
return false;
} else if (ArrayType *DATy = dyn_cast<ArrayType>(DstTy)) {
@@ -223,7 +226,7 @@ Type *TypeMapTy::getImpl(Type *Ty) {
// If this is not a named struct type, then just map all of the elements and
// then rebuild the type from inside out.
- if (!isa<StructType>(Ty) || cast<StructType>(Ty)->isAnonymous()) {
+ if (!isa<StructType>(Ty) || cast<StructType>(Ty)->isLiteral()) {
// If there are no element types to map, then the type is itself. This is
// true for the anonymous {} struct, things like 'float', integers, etc.
if (Ty->getNumContainedTypes() == 0)
@@ -261,7 +264,7 @@ Type *TypeMapTy::getImpl(Type *Ty) {
cast<PointerType>(Ty)->getAddressSpace());
case Type::FunctionTyID:
return *Entry = FunctionType::get(ElementTypes[0],
- ArrayRef<Type*>(ElementTypes).slice(1),
+ makeArrayRef(ElementTypes).slice(1),
cast<FunctionType>(Ty)->isVarArg());
case Type::StructTyID:
// Note that this is only reached for anonymous structs.
@@ -302,7 +305,7 @@ Type *TypeMapTy::getImpl(Type *Ty) {
// Otherwise we create a new type and resolve its body later. This will be
// resolved by the top level of get().
DefinitionsToResolve.push_back(STy);
- return *Entry = StructType::createNamed(STy->getContext(), "");
+ return *Entry = StructType::create(STy->getContext());
}
@@ -333,10 +336,16 @@ namespace {
std::vector<AppendingVarInfo> AppendingVars;
+ unsigned Mode; // Mode to treat source module.
+
+ // Set of items not to link in from source.
+ SmallPtrSet<const Value*, 16> DoNotLinkFromSource;
+
public:
std::string ErrorMsg;
- ModuleLinker(Module *dstM, Module *srcM) : DstM(dstM), SrcM(srcM) { }
+ ModuleLinker(Module *dstM, Module *srcM, unsigned mode)
+ : DstM(dstM), SrcM(srcM), Mode(mode) { }
bool run();
@@ -596,9 +605,9 @@ bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV,
DstGV->replaceAllUsesWith(ConstantExpr::getBitCast(NG, DstGV->getType()));
DstGV->eraseFromParent();
- // Zap the initializer in the source variable so we don't try to link it.
- SrcGV->setInitializer(0);
- SrcGV->setLinkage(GlobalValue::ExternalLinkage);
+ // Track the source variable so we don't try to link it.
+ DoNotLinkFromSource.insert(SrcGV);
+
return false;
}
@@ -633,11 +642,10 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) {
// Make sure to remember this mapping.
ValueMap[SGV] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGV->getType()));
- // Destroy the source global's initializer (and convert it to a prototype)
- // so that we don't attempt to copy it over when processing global
- // initializers.
- SGV->setInitializer(0);
- SGV->setLinkage(GlobalValue::ExternalLinkage);
+ // Track the source global so that we don't attempt to copy it over when
+ // processing global initializers.
+ DoNotLinkFromSource.insert(SGV);
+
return false;
}
}
@@ -682,8 +690,10 @@ bool ModuleLinker::linkFunctionProto(Function *SF) {
// Make sure to remember this mapping.
ValueMap[SF] = ConstantExpr::getBitCast(DGV, TypeMap.get(SF->getType()));
- // Remove the body from the source module so we don't attempt to remap it.
- SF->deleteBody();
+ // Track the function from the source module so we don't attempt to remap
+ // it.
+ DoNotLinkFromSource.insert(SF);
+
return false;
}
}
@@ -722,8 +732,9 @@ bool ModuleLinker::linkAliasProto(GlobalAlias *SGA) {
// Make sure to remember this mapping.
ValueMap[SGA] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGA->getType()));
- // Remove the body from the source module so we don't attempt to remap it.
- SGA->setAliasee(0);
+ // Track the alias from the source module so we don't attempt to remap it.
+ DoNotLinkFromSource.insert(SGA);
+
return false;
}
}
@@ -779,7 +790,9 @@ void ModuleLinker::linkGlobalInits() {
// Loop over all of the globals in the src module, mapping them over as we go
for (Module::const_global_iterator I = SrcM->global_begin(),
E = SrcM->global_end(); I != E; ++I) {
- if (!I->hasInitializer()) continue; // Only process initialized GV's.
+
+ // Only process initialized GV's or ones not already in dest.
+ if (!I->hasInitializer() || DoNotLinkFromSource.count(I)) continue;
// Grab destination global variable.
GlobalVariable *DGV = cast<GlobalVariable>(ValueMap[I]);
@@ -805,31 +818,42 @@ void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) {
ValueMap[I] = DI;
}
- // Splice the body of the source function into the dest function.
- Dst->getBasicBlockList().splice(Dst->end(), Src->getBasicBlockList());
-
- // At this point, all of the instructions and values of the function are now
- // copied over. The only problem is that they are still referencing values in
- // the Source function as operands. Loop through all of the operands of the
- // functions and patch them up to point to the local versions.
- for (Function::iterator BB = Dst->begin(), BE = Dst->end(); BB != BE; ++BB)
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
- RemapInstruction(I, ValueMap, RF_IgnoreMissingEntries, &TypeMap);
-
+ if (Mode == Linker::DestroySource) {
+ // Splice the body of the source function into the dest function.
+ Dst->getBasicBlockList().splice(Dst->end(), Src->getBasicBlockList());
+
+ // At this point, all of the instructions and values of the function are now
+ // copied over. The only problem is that they are still referencing values in
+ // the Source function as operands. Loop through all of the operands of the
+ // functions and patch them up to point to the local versions.
+ for (Function::iterator BB = Dst->begin(), BE = Dst->end(); BB != BE; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ RemapInstruction(I, ValueMap, RF_IgnoreMissingEntries, &TypeMap);
+
+ } else {
+ // Clone the body of the function into the dest function.
+ SmallVector<ReturnInst*, 8> Returns; // Ignore returns.
+ CloneFunctionInto(Dst, Src, ValueMap, false, Returns);
+ }
+
// There is no need to map the arguments anymore.
for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end();
I != E; ++I)
ValueMap.erase(I);
+
}
void ModuleLinker::linkAliasBodies() {
for (Module::alias_iterator I = SrcM->alias_begin(), E = SrcM->alias_end();
- I != E; ++I)
+ I != E; ++I) {
+ if (DoNotLinkFromSource.count(I))
+ continue;
if (Constant *Aliasee = I->getAliasee()) {
GlobalAlias *DA = cast<GlobalAlias>(ValueMap[I]);
DA->setAliasee(MapValue(Aliasee, ValueMap, RF_None, &TypeMap));
}
+ }
}
/// linkNamedMDNodes - Insert all of the named mdnodes in Src into the Dest
@@ -891,16 +915,10 @@ bool ModuleLinker::run() {
StringRef ModuleId = SrcM->getModuleIdentifier();
if (!ModuleId.empty())
DstM->removeLibrary(sys::path::stem(ModuleId));
-
// Loop over all of the linked values to compute type mappings.
computeTypeMapping();
- // Remap all of the named mdnoes in Src into the DstM module. We do this
- // after linking GlobalValues so that MDNodes that reference GlobalValues
- // are properly remapped.
- linkNamedMDNodes();
-
// Insert all of the globals in src into the DstM module... without linking
// initializers (which could refer to functions not yet mapped over).
for (Module::global_iterator I = SrcM->global_begin(),
@@ -933,7 +951,17 @@ bool ModuleLinker::run() {
// Link in the function bodies that are defined in the source module into
// DstM.
for (Module::iterator SF = SrcM->begin(), E = SrcM->end(); SF != E; ++SF) {
- if (SF->isDeclaration()) continue; // No body if function is external.
+
+ // Skip if not linking from source.
+ if (DoNotLinkFromSource.count(SF)) continue;
+
+ // Skip if no body (function is external) or materialize.
+ if (SF->isDeclaration()) {
+ if (!SF->isMaterializable())
+ continue;
+ if (SF->Materialize(&ErrorMsg))
+ return true;
+ }
linkFunctionBody(cast<Function>(ValueMap[SF]), SF);
}
@@ -941,6 +969,11 @@ bool ModuleLinker::run() {
// Resolve all uses of aliases with aliasees.
linkAliasBodies();
+ // Remap all of the named mdnoes in Src into the DstM module. We do this
+ // after linking GlobalValues so that MDNodes that reference GlobalValues
+ // are properly remapped.
+ linkNamedMDNodes();
+
// Now that all of the types from the source are used, resolve any structs
// copied over to the dest that didn't exist there.
TypeMap.linkDefinedTypeBodies();
@@ -957,8 +990,9 @@ bool ModuleLinker::run() {
// error occurs, true is returned and ErrorMsg (if not null) is set to indicate
// the problem. Upon failure, the Dest module could be in a modified state, and
// shouldn't be relied on to be consistent.
-bool Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) {
- ModuleLinker TheLinker(Dest, Src);
+bool Linker::LinkModules(Module *Dest, Module *Src, unsigned Mode,
+ std::string *ErrorMsg) {
+ ModuleLinker TheLinker(Dest, Src, Mode);
if (TheLinker.run()) {
if (ErrorMsg) *ErrorMsg = TheLinker.ErrorMsg;
return true;
diff --git a/lib/Linker/Linker.cpp b/lib/Linker/Linker.cpp
index fba91da..59fbceb 100644
--- a/lib/Linker/Linker.cpp
+++ b/lib/Linker/Linker.cpp
@@ -141,6 +141,14 @@ static inline sys::Path IsLibrary(StringRef Name,
if (FullPath.isBitcodeFile()) // .so file containing bitcode?
return FullPath;
+ // Try libX form, to make it possible to add dependency on the
+ // specific version of .so, like liblzma.so.1.0.0
+ FullPath.eraseSuffix();
+ if (FullPath.isDynamicLibrary()) // Native shared library?
+ return FullPath;
+ if (FullPath.isBitcodeFile()) // .so file containing bitcode?
+ return FullPath;
+
// Not found .. fall through
// Indicate that the library was not found in the directory.
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index 22afa7e..a4ac1bf 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -1,25 +1,31 @@
add_llvm_library(LLVMMC
ELFObjectWriter.cpp
+ MCAsmBackend.cpp
MCAsmInfo.cpp
MCAsmInfoCOFF.cpp
MCAsmInfoDarwin.cpp
MCAsmStreamer.cpp
MCAssembler.cpp
+ MCAtom.cpp
MCCodeEmitter.cpp
+ MCCodeGenInfo.cpp
MCContext.cpp
MCDisassembler.cpp
+ MCDwarf.cpp
MCELF.cpp
MCELFObjectTargetWriter.cpp
MCELFStreamer.cpp
MCExpr.cpp
MCInst.cpp
MCInstPrinter.cpp
+ MCInstrAnalysis.cpp
MCLabel.cpp
- MCDwarf.cpp
MCLoggingStreamer.cpp
MCMachOStreamer.cpp
MCMachObjectTargetWriter.cpp
+ MCModule.cpp
MCNullStreamer.cpp
+ MCObjectFileInfo.cpp
MCObjectStreamer.cpp
MCObjectWriter.cpp
MCPureStreamer.cpp
@@ -30,13 +36,18 @@ add_llvm_library(LLVMMC
MCStreamer.cpp
MCSubtargetInfo.cpp
MCSymbol.cpp
+ MCTargetAsmLexer.cpp
MCValue.cpp
MCWin64EH.cpp
MachObjectWriter.cpp
- WinCOFFStreamer.cpp
- WinCOFFObjectWriter.cpp
SubtargetFeature.cpp
- TargetAsmBackend.cpp
+ WinCOFFObjectWriter.cpp
+ WinCOFFStreamer.cpp
+ )
+
+add_llvm_library_dependencies(LLVMMC
+ LLVMObject
+ LLVMSupport
)
add_subdirectory(MCParser)
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index 59e1b8e..3d16de5 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -23,13 +24,13 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ELF.h"
-#include "llvm/Target/TargetAsmBackend.h"
-#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSwitch.h"
-#include "../Target/X86/X86FixupKinds.h"
-#include "../Target/ARM/ARMFixupKinds.h"
+#include "../Target/X86/MCTargetDesc/X86FixupKinds.h"
+#include "../Target/ARM/MCTargetDesc/ARMFixupKinds.h"
+#include "../Target/PowerPC/MCTargetDesc/PPCFixupKinds.h"
#include <vector>
using namespace llvm;
@@ -124,12 +125,12 @@ void ELFObjectWriter::WriteHeader(uint64_t SectionDataSize,
// e_shnum = # of section header ents
if (NumberOfSections >= ELF::SHN_LORESERVE)
- Write16(0);
+ Write16(ELF::SHN_UNDEF);
else
Write16(NumberOfSections);
// e_shstrndx = Section # of '.shstrtab'
- if (NumberOfSections >= ELF::SHN_LORESERVE)
+ if (ShstrtabIndex >= ELF::SHN_LORESERVE)
Write16(ELF::SHN_XINDEX);
else
Write16(ShstrtabIndex);
@@ -301,7 +302,8 @@ void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF,
if (Section.getType() == ELF::SHT_RELA ||
Section.getType() == ELF::SHT_REL ||
Section.getType() == ELF::SHT_STRTAB ||
- Section.getType() == ELF::SHT_SYMTAB)
+ Section.getType() == ELF::SHT_SYMTAB ||
+ Section.getType() == ELF::SHT_SYMTAB_SHNDX)
continue;
WriteSymbolEntry(SymtabF, ShndxF, 0, ELF::STT_SECTION, 0, 0,
ELF::STV_DEFAULT, SectionIndexMap.lookup(&Section), false);
@@ -447,8 +449,16 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
uint64_t RelocOffset = Layout.getFragmentOffset(Fragment) +
Fixup.getOffset();
+ adjustFixupOffset(Fixup, RelocOffset);
+
if (!hasRelocationAddend())
Addend = 0;
+
+ if (is64Bit())
+ assert(isInt<64>(Addend));
+ else
+ assert(isInt<32>(Addend));
+
ELFRelocationEntry ERE(RelocOffset, Index, Type, RelocSymbol, Addend);
Relocations[Fragment->getParent()].push_back(ERE);
}
@@ -656,6 +666,9 @@ void ELFObjectWriter::ComputeSymbolTable(MCAssembler &Asm,
ExternalSymbolData[i].SymbolData->setIndex(Index++);
for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
UndefinedSymbolData[i].SymbolData->setIndex(Index++);
+
+ if (NumRegularSections > ELF::SHN_LORESERVE)
+ NeedsSymtabShndx = true;
}
void ELFObjectWriter::CreateRelocationSections(MCAssembler &Asm,
@@ -992,11 +1005,10 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm,
// Nothing to do.
break;
- case ELF::SHT_GROUP: {
+ case ELF::SHT_GROUP:
sh_link = SymbolTableIndex;
sh_info = GroupSymbolIndex;
break;
- }
default:
assert(0 && "FIXME: sh_type value not supported!");
@@ -1224,7 +1236,7 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm,
FileOff = OS.tell();
- // ... and then the remainting sections ...
+ // ... and then the remaining sections ...
for (unsigned i = NumRegularSections + 1; i < NumSections; ++i)
WriteDataSectionData(Asm, Layout, *Sections[i]);
}
@@ -1252,6 +1264,11 @@ MCObjectWriter *llvm::createELFObjectWriter(MCELFObjectTargetWriter *MOTW,
return new ARMELFObjectWriter(MOTW, OS, IsLittleEndian); break;
case ELF::EM_MBLAZE:
return new MBlazeELFObjectWriter(MOTW, OS, IsLittleEndian); break;
+ case ELF::EM_PPC:
+ case ELF::EM_PPC64:
+ return new PPCELFObjectWriter(MOTW, OS, IsLittleEndian); break;
+ case ELF::EM_MIPS:
+ return new MipsELFObjectWriter(MOTW, OS, IsLittleEndian); break;
default: llvm_unreachable("Unsupported architecture"); break;
}
}
@@ -1503,6 +1520,76 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
return Type;
}
+//===- PPCELFObjectWriter -------------------------------------------===//
+
+PPCELFObjectWriter::PPCELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+ raw_ostream &_OS,
+ bool IsLittleEndian)
+ : ELFObjectWriter(MOTW, _OS, IsLittleEndian) {
+}
+
+PPCELFObjectWriter::~PPCELFObjectWriter() {
+}
+
+unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel,
+ bool IsRelocWithSymbol,
+ int64_t Addend) {
+ // determine the type of the relocation
+ unsigned Type;
+ if (IsPCRel) {
+ switch ((unsigned)Fixup.getKind()) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case PPC::fixup_ppc_br24:
+ Type = ELF::R_PPC_REL24;
+ break;
+ case FK_PCRel_4:
+ Type = ELF::R_PPC_REL32;
+ break;
+ }
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+ case PPC::fixup_ppc_br24:
+ Type = ELF::R_PPC_ADDR24;
+ break;
+ case PPC::fixup_ppc_brcond14:
+ Type = ELF::R_PPC_ADDR14_BRTAKEN; // XXX: or BRNTAKEN?_
+ break;
+ case PPC::fixup_ppc_ha16:
+ Type = ELF::R_PPC_ADDR16_HA;
+ break;
+ case PPC::fixup_ppc_lo16:
+ Type = ELF::R_PPC_ADDR16_LO;
+ break;
+ case PPC::fixup_ppc_lo14:
+ Type = ELF::R_PPC_ADDR14;
+ break;
+ case FK_Data_4:
+ Type = ELF::R_PPC_ADDR32;
+ break;
+ case FK_Data_2:
+ Type = ELF::R_PPC_ADDR16;
+ break;
+ }
+ }
+ return Type;
+}
+
+void
+PPCELFObjectWriter::adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) {
+ switch ((unsigned)Fixup.getKind()) {
+ case PPC::fixup_ppc_ha16:
+ case PPC::fixup_ppc_lo16:
+ RelocOffset += 2;
+ break;
+ default:
+ break;
+ }
+}
+
//===- MBlazeELFObjectWriter -------------------------------------------===//
MBlazeELFObjectWriter::MBlazeELFObjectWriter(MCELFObjectTargetWriter *MOTW,
@@ -1624,7 +1711,6 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
default: llvm_unreachable("invalid fixup kind!");
case FK_Data_8: Type = ELF::R_X86_64_64; break;
case X86::reloc_signed_4byte:
- assert(isInt<32>(Target.getConstant()));
switch (Modifier) {
default:
llvm_unreachable("Unimplemented");
@@ -1728,3 +1814,19 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
return Type;
}
+
+MipsELFObjectWriter::MipsELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+ raw_ostream &_OS,
+ bool IsLittleEndian)
+ : ELFObjectWriter(MOTW, _OS, IsLittleEndian) {}
+
+MipsELFObjectWriter::~MipsELFObjectWriter() {}
+
+unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel,
+ bool IsRelocWithSymbol,
+ int64_t Addend) {
+ // tbd
+ return 1;
+}
diff --git a/lib/MC/ELFObjectWriter.h b/lib/MC/ELFObjectWriter.h
index 7593099..862b085 100644
--- a/lib/MC/ELFObjectWriter.h
+++ b/lib/MC/ELFObjectWriter.h
@@ -347,6 +347,7 @@ class ELFObjectWriter : public MCObjectWriter {
virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsPCRel, bool IsRelocWithSymbol,
int64_t Addend) = 0;
+ virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) { }
};
//===- X86ELFObjectWriter -------------------------------------------===//
@@ -395,6 +396,22 @@ class ELFObjectWriter : public MCObjectWriter {
};
+ //===- PPCELFObjectWriter -------------------------------------------===//
+
+ class PPCELFObjectWriter : public ELFObjectWriter {
+ public:
+ PPCELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+ raw_ostream &_OS,
+ bool IsLittleEndian);
+
+ virtual ~PPCELFObjectWriter();
+ protected:
+ virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, bool IsRelocWithSymbol,
+ int64_t Addend);
+ virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset);
+ };
+
//===- MBlazeELFObjectWriter -------------------------------------------===//
class MBlazeELFObjectWriter : public ELFObjectWriter {
@@ -409,6 +426,21 @@ class ELFObjectWriter : public MCObjectWriter {
bool IsPCRel, bool IsRelocWithSymbol,
int64_t Addend);
};
+
+ //===- MipsELFObjectWriter -------------------------------------------===//
+
+ class MipsELFObjectWriter : public ELFObjectWriter {
+ public:
+ MipsELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+ raw_ostream &_OS,
+ bool IsLittleEndian);
+
+ virtual ~MipsELFObjectWriter();
+ protected:
+ virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, bool IsRelocWithSymbol,
+ int64_t Addend);
+ };
}
#endif
diff --git a/lib/MC/TargetAsmBackend.cpp b/lib/MC/MCAsmBackend.cpp
index 1927557..2c150f4 100644
--- a/lib/MC/TargetAsmBackend.cpp
+++ b/lib/MC/MCAsmBackend.cpp
@@ -1,4 +1,4 @@
-//===-- TargetAsmBackend.cpp - Target Assembly Backend ---------------------==//
+//===-- MCAsmBackend.cpp - Target MC Assembly Backend ----------------------==//
//
// The LLVM Compiler Infrastructure
//
@@ -7,19 +7,19 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/MC/MCAsmBackend.h"
using namespace llvm;
-TargetAsmBackend::TargetAsmBackend()
+MCAsmBackend::MCAsmBackend()
: HasReliableSymbolDifference(false)
{
}
-TargetAsmBackend::~TargetAsmBackend() {
+MCAsmBackend::~MCAsmBackend() {
}
const MCFixupKindInfo &
-TargetAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
+MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
static const MCFixupKindInfo Builtins[] = {
{ "FK_Data_1", 0, 8, 0 },
{ "FK_Data_2", 0, 16, 0 },
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index 502b60b..95861bc 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -29,6 +29,7 @@ MCAsmInfo::MCAsmInfo() {
HasSubsectionsViaSymbols = false;
HasMachoZeroFillDirective = false;
HasMachoTBSSDirective = false;
+ StructorOutputOrder = Structors::ReversePriorityOrder;
HasStaticCtorDtorReferenceInStaticMode = false;
LinkerRequiresNonEmptyDwarfLines = false;
MaxInstLength = 4;
@@ -42,6 +43,9 @@ MCAsmInfo::MCAsmInfo() {
LinkerPrivateGlobalPrefix = "";
InlineAsmStart = "APP";
InlineAsmEnd = "NO_APP";
+ Code16Directive = ".code16";
+ Code32Directive = ".code32";
+ Code64Directive = ".code64";
AssemblerDialect = 0;
AllowQuotesInName = false;
AllowNameToStartWithDigit = false;
@@ -53,6 +57,12 @@ MCAsmInfo::MCAsmInfo() {
Data16bitsDirective = "\t.short\t";
Data32bitsDirective = "\t.long\t";
Data64bitsDirective = "\t.quad\t";
+ DataBegin = "$d.";
+ CodeBegin = "$a.";
+ JT8Begin = "$d.";
+ JT16Begin = "$d.";
+ JT32Begin = "$d.";
+ SupportsDataRegions = false;
SunStyleELFSectionSwitchSyntax = false;
UsesELFSectionDirectiveForBSS = false;
AlignDirective = "\t.align\t";
@@ -62,7 +72,7 @@ MCAsmInfo::MCAsmInfo() {
GlobalDirective = "\t.globl\t";
HasSetDirective = true;
HasAggressiveSymbolFolding = true;
- HasLCOMMDirective = false;
+ LCOMMDirectiveType = LCOMM::None;
COMMDirectiveAlignmentIsInBytes = true;
HasDotTypeDotSizeDirective = true;
HasSingleParameterDotFile = true;
diff --git a/lib/MC/MCAsmInfoCOFF.cpp b/lib/MC/MCAsmInfoCOFF.cpp
index 7fc7d7a..434d910 100644
--- a/lib/MC/MCAsmInfoCOFF.cpp
+++ b/lib/MC/MCAsmInfoCOFF.cpp
@@ -19,7 +19,7 @@ using namespace llvm;
MCAsmInfoCOFF::MCAsmInfoCOFF() {
GlobalPrefix = "_";
COMMDirectiveAlignmentIsInBytes = false;
- HasLCOMMDirective = true;
+ LCOMMDirectiveType = LCOMM::ByteAlignment;
HasDotTypeDotSizeDirective = false;
HasSingleParameterDotFile = false;
PrivateGlobalPrefix = "L"; // Prefix for private global symbols
@@ -27,11 +27,14 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() {
LinkOnceDirective = "\t.linkonce discard\n";
// Doesn't support visibility:
- HiddenVisibilityAttr = ProtectedVisibilityAttr = MCSA_Invalid;
+ HiddenVisibilityAttr = HiddenDeclarationVisibilityAttr = MCSA_Invalid;
+ ProtectedVisibilityAttr = MCSA_Invalid;
// Set up DWARF directives
HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
SupportsDebugInformation = true;
DwarfSectionOffsetDirective = "\t.secrel32\t";
HasMicrosoftFastStdCallMangling = true;
+
+ SupportsDataRegions = false;
}
diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp
index 5851cb0..b20e338 100644
--- a/lib/MC/MCAsmInfoDarwin.cpp
+++ b/lib/MC/MCAsmInfoDarwin.cpp
@@ -39,8 +39,16 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
ZeroDirective = "\t.space\t"; // ".space N" emits N zeros.
HasMachoZeroFillDirective = true; // Uses .zerofill
HasMachoTBSSDirective = true; // Uses .tbss
+ StructorOutputOrder = Structors::PriorityOrder;
HasStaticCtorDtorReferenceInStaticMode = true;
+ CodeBegin = "L$start$code$";
+ DataBegin = "L$start$data$";
+ JT8Begin = "L$start$jt8$";
+ JT16Begin = "L$start$jt16$";
+ JT32Begin = "L$start$jt32$";
+ SupportsDataRegions = true;
+
// FIXME: Darwin 10 and newer don't need this.
LinkerRequiresNonEmptyDwarfLines = true;
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index d5d08e8..3fcbb05 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -15,8 +15,12 @@
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCAsmBackend.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
@@ -25,9 +29,6 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Target/TargetAsmBackend.h"
-#include "llvm/Target/TargetAsmInfo.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
#include <cctype>
using namespace llvm;
@@ -40,7 +41,7 @@ protected:
private:
OwningPtr<MCInstPrinter> InstPrinter;
OwningPtr<MCCodeEmitter> Emitter;
- OwningPtr<TargetAsmBackend> AsmBackend;
+ OwningPtr<MCAsmBackend> AsmBackend;
SmallString<128> CommentToEmit;
raw_svector_ostream CommentStream;
@@ -63,7 +64,7 @@ public:
MCAsmStreamer(MCContext &Context, formatted_raw_ostream &os,
bool isVerboseAsm, bool useLoc, bool useCFI,
MCInstPrinter *printer, MCCodeEmitter *emitter,
- TargetAsmBackend *asmbackend,
+ MCAsmBackend *asmbackend,
bool showInst)
: MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()),
InstPrinter(printer), Emitter(emitter), AsmBackend(asmbackend),
@@ -157,7 +158,9 @@ public:
///
/// @param Symbol - The common symbol to emit.
/// @param Size - The size of the common symbol.
- virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
+ /// @param Size - The alignment of the common symbol in bytes.
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment);
virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
unsigned Size = 0, unsigned ByteAlignment = 0);
@@ -334,8 +337,9 @@ void MCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
default: assert(0 && "Invalid flag!");
case MCAF_SyntaxUnified: OS << "\t.syntax unified"; break;
case MCAF_SubsectionsViaSymbols: OS << ".subsections_via_symbols"; break;
- case MCAF_Code16: OS << "\t.code\t16"; break;
- case MCAF_Code32: OS << "\t.code\t32"; break;
+ case MCAF_Code16: OS << '\t'<< MAI.getCode16Directive(); break;
+ case MCAF_Code32: OS << '\t'<< MAI.getCode32Directive(); break;
+ case MCAF_Code64: OS << '\t'<< MAI.getCode64Directive(); break;
}
EmitEOL();
}
@@ -482,9 +486,16 @@ void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
///
/// @param Symbol - The common symbol to emit.
/// @param Size - The size of the common symbol.
-void MCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
- assert(MAI.hasLCOMMDirective() && "Doesn't have .lcomm, can't emit it!");
+void MCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlign) {
+ assert(MAI.getLCOMMDirectiveType() != LCOMM::None &&
+ "Doesn't have .lcomm, can't emit it!");
OS << "\t.lcomm\t" << *Symbol << ',' << Size;
+ if (ByteAlign > 1) {
+ assert(MAI.getLCOMMDirectiveType() == LCOMM::ByteAlignment &&
+ "Alignment not supported on .lcomm!");
+ OS << ',' << ByteAlign;
+ }
EmitEOL();
}
@@ -827,8 +838,8 @@ void MCAsmStreamer::EmitCFIEndProc() {
void MCAsmStreamer::EmitRegisterName(int64_t Register) {
if (InstPrinter && !MAI.useDwarfRegNumForCFI()) {
- const TargetAsmInfo &TAI = getContext().getTargetAsmInfo();
- unsigned LLVMRegister = TAI.getLLVMRegNum(Register, true);
+ const MCRegisterInfo &MRI = getContext().getRegisterInfo();
+ unsigned LLVMRegister = MRI.getLLVMRegNum(Register, true);
InstPrinter->printRegName(OS, LLVMRegister);
} else {
OS << Register;
@@ -994,6 +1005,19 @@ void MCAsmStreamer::EmitWin64EHHandler(const MCSymbol *Sym, bool Unwind,
EmitEOL();
}
+static const MCSection *getWin64EHTableSection(StringRef suffix,
+ MCContext &context) {
+ // FIXME: This doesn't belong in MCObjectFileInfo. However,
+ /// this duplicate code in MCWin64EH.cpp.
+ if (suffix == "")
+ return context.getObjectFileInfo()->getXDataSection();
+ return context.getCOFFSection((".xdata"+suffix).str(),
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+}
+
void MCAsmStreamer::EmitWin64EHHandlerData() {
MCStreamer::EmitWin64EHHandlerData();
@@ -1003,8 +1027,7 @@ void MCAsmStreamer::EmitWin64EHHandlerData() {
// data block is visible.
MCWin64EHUnwindInfo *CurFrame = getCurrentW64UnwindInfo();
StringRef suffix=MCWin64EHUnwindEmitter::GetSectionSuffix(CurFrame->Function);
- const MCSection *xdataSect =
- getContext().getTargetAsmInfo().getWin64EHTableSection(suffix);
+ const MCSection *xdataSect = getWin64EHTableSection(suffix, getContext());
if (xdataSect)
SwitchSectionNoChange(xdataSect);
@@ -1221,7 +1244,7 @@ void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
// If we have an AsmPrinter, use that to print, otherwise print the MCInst.
if (InstPrinter)
- InstPrinter->printInst(&Inst, OS);
+ InstPrinter->printInst(&Inst, OS, "");
else
Inst.print(OS, &MAI);
EmitEOL();
@@ -1249,8 +1272,8 @@ MCStreamer *llvm::createAsmStreamer(MCContext &Context,
formatted_raw_ostream &OS,
bool isVerboseAsm, bool useLoc,
bool useCFI, MCInstPrinter *IP,
- MCCodeEmitter *CE, TargetAsmBackend *TAB,
+ MCCodeEmitter *CE, MCAsmBackend *MAB,
bool ShowInst) {
return new MCAsmStreamer(Context, OS, isVerboseAsm, useLoc, useCFI,
- IP, CE, TAB, ShowInst);
+ IP, CE, MAB, ShowInst);
}
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 527a63c..06c8aec 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -18,6 +18,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCAsmBackend.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
@@ -25,8 +26,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -194,7 +194,7 @@ MCSymbolData::MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment,
/* *** */
-MCAssembler::MCAssembler(MCContext &Context_, TargetAsmBackend &Backend_,
+MCAssembler::MCAssembler(MCContext &Context_, MCAsmBackend &Backend_,
MCCodeEmitter &Emitter_, MCObjectWriter &Writer_,
raw_ostream &OS_)
: Context(Context_), Backend(Backend_), Emitter(Emitter_), Writer(Writer_),
diff --git a/lib/MC/MCAtom.cpp b/lib/MC/MCAtom.cpp
new file mode 100644
index 0000000..d714443
--- /dev/null
+++ b/lib/MC/MCAtom.cpp
@@ -0,0 +1,97 @@
+//===- lib/MC/MCAtom.cpp - MCAtom implementation --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAtom.h"
+#include "llvm/MC/MCModule.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+void MCAtom::addInst(const MCInst &I, uint64_t Address, unsigned Size) {
+ assert(Type == TextAtom && "Trying to add MCInst to a non-text atom!");
+
+ assert(Address < End+Size &&
+ "Instruction not contiguous with end of atom!");
+ if (Address > End)
+ Parent->remap(this, Begin, End+Size);
+
+ Text.push_back(std::make_pair(Address, I));
+}
+
+void MCAtom::addData(const MCData &D) {
+ assert(Type == DataAtom && "Trying to add MCData to a non-data atom!");
+ Parent->remap(this, Begin, End+1);
+
+ Data.push_back(D);
+}
+
+MCAtom *MCAtom::split(uint64_t SplitPt) {
+ assert((SplitPt > Begin && SplitPt <= End) &&
+ "Splitting at point not contained in atom!");
+
+ // Compute the new begin/end points.
+ uint64_t LeftBegin = Begin;
+ uint64_t LeftEnd = SplitPt - 1;
+ uint64_t RightBegin = SplitPt;
+ uint64_t RightEnd = End;
+
+ // Remap this atom to become the lower of the two new ones.
+ Parent->remap(this, LeftBegin, LeftEnd);
+
+ // Create a new atom for the higher atom.
+ MCAtom *RightAtom = Parent->createAtom(Type, RightBegin, RightEnd);
+
+ // Split the contents of the original atom between it and the new one. The
+ // precise method depends on whether this is a data or a text atom.
+ if (isDataAtom()) {
+ std::vector<MCData>::iterator I = Data.begin() + (RightBegin - LeftBegin);
+
+ assert(I != Data.end() && "Split point not found in range!");
+
+ std::copy(I, Data.end(), RightAtom->Data.end());
+ Data.erase(I, Data.end());
+ } else if (isTextAtom()) {
+ std::vector<std::pair<uint64_t, MCInst> >::iterator I = Text.begin();
+
+ while (I != Text.end() && I->first < SplitPt) ++I;
+
+ assert(I != Text.end() && "Split point not found in disassembly!");
+ assert(I->first == SplitPt &&
+ "Split point does not fall on instruction boundary!");
+
+ std::copy(I, Text.end(), RightAtom->Text.end());
+ Text.erase(I, Text.end());
+ } else
+ llvm_unreachable("Unknown atom type!");
+
+ return RightAtom;
+}
+
+void MCAtom::truncate(uint64_t TruncPt) {
+ assert((TruncPt >= Begin && TruncPt < End) &&
+ "Truncation point not contained in atom!");
+
+ Parent->remap(this, Begin, TruncPt);
+
+ if (isDataAtom()) {
+ Data.resize(TruncPt - Begin + 1);
+ } else if (isTextAtom()) {
+ std::vector<std::pair<uint64_t, MCInst> >::iterator I = Text.begin();
+
+ while (I != Text.end() && I->first <= TruncPt) ++I;
+
+ assert(I != Text.end() && "Truncation point not found in disassembly!");
+ assert(I->first == TruncPt+1 &&
+ "Truncation point does not fall on instruction boundary");
+
+ Text.erase(I, Text.end());
+ } else
+ llvm_unreachable("Unknown atom type!");
+}
+
diff --git a/lib/MC/MCCodeGenInfo.cpp b/lib/MC/MCCodeGenInfo.cpp
new file mode 100644
index 0000000..236e7de
--- /dev/null
+++ b/lib/MC/MCCodeGenInfo.cpp
@@ -0,0 +1,21 @@
+//===-- MCCodeGenInfo.cpp - Target CodeGen Info -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file tracks information about the target which can affect codegen,
+// asm parsing, and asm printing. For example, relocation model.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCCodeGenInfo.h"
+using namespace llvm;
+
+void MCCodeGenInfo::InitMCCodeGenInfo(Reloc::Model RM, CodeModel::Model CM) {
+ RelocationModel = RM;
+ CMModel = CM;
+}
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 8faa72e..82690ee 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -9,13 +9,14 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCLabel.h"
#include "llvm/MC/MCDwarf.h"
-#include "llvm/Target/TargetAsmInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/ELF.h"
@@ -26,8 +27,9 @@ typedef StringMap<const MCSectionELF*> ELFUniqueMapTy;
typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy;
-MCContext::MCContext(const MCAsmInfo &mai, const TargetAsmInfo *tai) :
- MAI(mai), TAI(tai),
+MCContext::MCContext(const MCAsmInfo &mai, const MCRegisterInfo &mri,
+ const MCObjectFileInfo *mofi) :
+ MAI(mai), MRI(mri), MOFI(mofi),
Allocator(), Symbols(Allocator), UsedNames(Allocator),
NextUniqueID(0),
CurrentDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0),
@@ -54,8 +56,6 @@ MCContext::~MCContext() {
// If the stream for the .secure_log_unique directive was created free it.
delete (raw_ostream*)SecureLog;
-
- delete TAI;
}
//===----------------------------------------------------------------------===//
@@ -279,7 +279,8 @@ unsigned MCContext::GetDwarfFile(StringRef FileName, unsigned FileNumber) {
} else {
StringRef Directory = Slash.first;
Name = Slash.second;
- for (DirIndex = 0; DirIndex < MCDwarfDirs.size(); DirIndex++) {
+ DirIndex = 0;
+ for (unsigned End = MCDwarfDirs.size(); DirIndex < End; DirIndex++) {
if (Directory == MCDwarfDirs[DirIndex])
break;
}
diff --git a/lib/MC/MCDisassembler/CMakeLists.txt b/lib/MC/MCDisassembler/CMakeLists.txt
index 0ce359d..4debb28 100644
--- a/lib/MC/MCDisassembler/CMakeLists.txt
+++ b/lib/MC/MCDisassembler/CMakeLists.txt
@@ -1,4 +1,3 @@
-
add_llvm_library(LLVMMCDisassembler
Disassembler.cpp
EDDisassembler.cpp
@@ -6,3 +5,26 @@ add_llvm_library(LLVMMCDisassembler
EDOperand.cpp
EDToken.cpp
)
+
+add_llvm_library_dependencies(LLVMMCDisassembler
+ LLVMMC
+ LLVMMCParser
+ LLVMSupport
+ LLVMTarget
+ )
+
+foreach(t ${LLVM_TARGETS_TO_BUILD})
+ set(td ${LLVM_MAIN_SRC_DIR}/lib/Target/${t})
+ if(EXISTS ${td}/TargetInfo/CMakeLists.txt)
+ add_llvm_library_dependencies(LLVMMCDisassembler "LLVM${t}Info")
+ endif()
+ if(EXISTS ${td}/MCTargetDesc/CMakeLists.txt)
+ add_llvm_library_dependencies(LLVMMCDisassembler "LLVM${t}Desc")
+ endif()
+ if(EXISTS ${td}/AsmParser/CMakeLists.txt)
+ add_llvm_library_dependencies(LLVMMCDisassembler "LLVM${t}AsmParser")
+ endif()
+ if(EXISTS ${td}/Disassembler/CMakeLists.txt)
+ add_llvm_library_dependencies(LLVMMCDisassembler "LLVM${t}Disassembler")
+ endif()
+endforeach(t)
diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp
index 5480b4b..16e66dc 100644
--- a/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/lib/MC/MCDisassembler/Disassembler.cpp
@@ -1,4 +1,4 @@
-//===-- lib/MC/Disassembler.cpp - Disassembler Public C Interface -*- C -*-===//
+//===-- lib/MC/Disassembler.cpp - Disassembler Public C Interface ---------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,15 +11,14 @@
#include "llvm-c/Disassembler.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Target/TargetAsmInfo.h" // FIXME.
-#include "llvm/Target/TargetMachine.h" // FIXME.
-#include "llvm/Target/TargetSelect.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
namespace llvm {
class Target;
@@ -38,10 +37,7 @@ LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo,
LLVMSymbolLookupCallback SymbolLookUp) {
// Initialize targets and assembly printers/parsers.
llvm::InitializeAllTargetInfos();
- // FIXME: We shouldn't need to initialize the Target(Machine)s.
- llvm::InitializeAllTargets();
- llvm::InitializeAllMCAsmInfos();
- llvm::InitializeAllAsmPrinters();
+ llvm::InitializeAllTargetMCs();
llvm::InitializeAllAsmParsers();
llvm::InitializeAllDisassemblers();
@@ -54,41 +50,38 @@ LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo,
const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(TripleName);
assert(MAI && "Unable to create target asm info!");
+ const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(TripleName);
+ assert(MRI && "Unable to create target register info!");
+
// Package up features to be passed to target/subtarget
std::string FeaturesStr;
std::string CPU;
- // FIXME: We shouldn't need to do this (and link in codegen).
- // When we split this out, we should do it in a way that makes
- // it straightforward to switch subtargets on the fly.
- TargetMachine *TM = TheTarget->createTargetMachine(TripleName, CPU,
- FeaturesStr);
- assert(TM && "Unable to create target machine!");
-
- // Get the target assembler info needed to setup the context.
- const TargetAsmInfo *tai = new TargetAsmInfo(*TM);
- assert(tai && "Unable to create target assembler!");
+ const MCSubtargetInfo *STI = TheTarget->createMCSubtargetInfo(TripleName, CPU,
+ FeaturesStr);
+ assert(STI && "Unable to create subtarget info!");
// Set up the MCContext for creating symbols and MCExpr's.
- MCContext *Ctx = new MCContext(*MAI, tai);
+ MCContext *Ctx = new MCContext(*MAI, *MRI, 0);
assert(Ctx && "Unable to create MCContext!");
// Set up disassembler.
- MCDisassembler *DisAsm = TheTarget->createMCDisassembler();
+ MCDisassembler *DisAsm = TheTarget->createMCDisassembler(*STI);
assert(DisAsm && "Unable to create disassembler!");
- DisAsm->setupForSymbolicDisassembly(GetOpInfo, DisInfo, Ctx);
+ DisAsm->setupForSymbolicDisassembly(GetOpInfo, SymbolLookUp, DisInfo, Ctx);
// Set up the instruction printer.
int AsmPrinterVariant = MAI->getAssemblerDialect();
MCInstPrinter *IP = TheTarget->createMCInstPrinter(AsmPrinterVariant,
- *MAI);
+ *MAI, *STI);
assert(IP && "Unable to create instruction printer!");
LLVMDisasmContext *DC = new LLVMDisasmContext(TripleName, DisInfo, TagType,
GetOpInfo, SymbolLookUp,
- TheTarget, MAI, TM, tai, Ctx,
- DisAsm, IP);
+ TheTarget, MAI, MRI,
+ Ctx, DisAsm, IP);
assert(DC && "Allocation failure!");
+
return DC;
}
@@ -147,18 +140,35 @@ size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes,
MCInst Inst;
const MCDisassembler *DisAsm = DC->getDisAsm();
MCInstPrinter *IP = DC->getIP();
- if (!DisAsm->getInstruction(Inst, Size, MemoryObject, PC, /*REMOVE*/ nulls()))
+ MCDisassembler::DecodeStatus S;
+ S = DisAsm->getInstruction(Inst, Size, MemoryObject, PC,
+ /*REMOVE*/ nulls(), DC->CommentStream);
+ switch (S) {
+ case MCDisassembler::Fail:
+ case MCDisassembler::SoftFail:
+ // FIXME: Do something different for soft failure modes?
return 0;
- SmallVector<char, 64> InsnStr;
- raw_svector_ostream OS(InsnStr);
- IP->printInst(&Inst, OS);
- OS.flush();
+ case MCDisassembler::Success: {
+ DC->CommentStream.flush();
+ StringRef Comments = DC->CommentsToEmit.str();
- assert(OutStringSize != 0 && "Output buffer cannot be zero size");
- size_t OutputSize = std::min(OutStringSize-1, InsnStr.size());
- std::memcpy(OutString, InsnStr.data(), OutputSize);
- OutString[OutputSize] = '\0'; // Terminate string.
+ SmallVector<char, 64> InsnStr;
+ raw_svector_ostream OS(InsnStr);
+ IP->printInst(&Inst, OS, Comments);
+ OS.flush();
- return Size;
+ // Tell the comment stream that the vector changed underneath it.
+ DC->CommentsToEmit.clear();
+ DC->CommentStream.resync();
+
+ assert(OutStringSize != 0 && "Output buffer cannot be zero size");
+ size_t OutputSize = std::min(OutStringSize-1, InsnStr.size());
+ std::memcpy(OutString, InsnStr.data(), OutputSize);
+ OutString[OutputSize] = '\0'; // Terminate string.
+
+ return Size;
+ }
+ }
+ return 0;
}
diff --git a/lib/MC/MCDisassembler/Disassembler.h b/lib/MC/MCDisassembler/Disassembler.h
index f0ec42a..238ff7d 100644
--- a/lib/MC/MCDisassembler/Disassembler.h
+++ b/lib/MC/MCDisassembler/Disassembler.h
@@ -20,15 +20,16 @@
#include "llvm-c/Disassembler.h"
#include <string>
#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
namespace llvm {
-class TargetAsmInfo;
class MCContext;
class MCAsmInfo;
class MCDisassembler;
class MCInstPrinter;
+class MCRegisterInfo;
class Target;
-class TargetMachine;
//
// This is the disassembler context returned by LLVMCreateDisasm().
@@ -58,12 +59,8 @@ private:
const Target *TheTarget;
// The assembly information for the target architecture.
llvm::OwningPtr<const llvm::MCAsmInfo> MAI;
- // The target machine instance.
- llvm::OwningPtr<llvm::TargetMachine> TM;
- // The disassembler for the target architecture.
- // FIXME: using llvm::OwningPtr<const llvm::TargetAsmInfo> causes a malloc
- // error when this LLVMDisasmContext is deleted.
- const TargetAsmInfo *Tai;
+ // The register information for the target architecture.
+ llvm::OwningPtr<const llvm::MCRegisterInfo> MRI;
// The assembly context for creating symbols and MCExprs.
llvm::OwningPtr<const llvm::MCContext> Ctx;
// The disassembler for the target architecture.
@@ -72,22 +69,28 @@ private:
llvm::OwningPtr<llvm::MCInstPrinter> IP;
public:
+ // Comment stream and backing vector.
+ SmallString<128> CommentsToEmit;
+ raw_svector_ostream CommentStream;
+
LLVMDisasmContext(std::string tripleName, void *disInfo, int tagType,
LLVMOpInfoCallback getOpInfo,
LLVMSymbolLookupCallback symbolLookUp,
const Target *theTarget, const MCAsmInfo *mAI,
- llvm::TargetMachine *tM, const TargetAsmInfo *tai,
+ const MCRegisterInfo *mRI,
llvm::MCContext *ctx, const MCDisassembler *disAsm,
MCInstPrinter *iP) : TripleName(tripleName),
DisInfo(disInfo), TagType(tagType), GetOpInfo(getOpInfo),
- SymbolLookUp(symbolLookUp), TheTarget(theTarget), Tai(tai) {
- TM.reset(tM);
+ SymbolLookUp(symbolLookUp), TheTarget(theTarget),
+ CommentStream(CommentsToEmit) {
MAI.reset(mAI);
+ MRI.reset(mRI);
Ctx.reset(ctx);
DisAsm.reset(disAsm);
IP.reset(iP);
}
const MCDisassembler *getDisAsm() const { return DisAsm.get(); }
+ const MCAsmInfo *getAsmInfo() const { return MAI.get(); }
MCInstPrinter *getIP() { return IP.get(); }
};
diff --git a/lib/MC/MCDisassembler/EDDisassembler.cpp b/lib/MC/MCDisassembler/EDDisassembler.cpp
index bdd99af..83362a2 100644
--- a/lib/MC/MCDisassembler/EDDisassembler.cpp
+++ b/lib/MC/MCDisassembler/EDDisassembler.cpp
@@ -22,20 +22,19 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCParser/AsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCTargetAsmLexer.h"
+#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Target/TargetAsmLexer.h"
-#include "llvm/Target/TargetAsmParser.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSelect.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
using namespace llvm;
bool EDDisassembler::sInitialized = false;
@@ -106,9 +105,7 @@ void EDDisassembler::initialize() {
sInitialized = true;
InitializeAllTargetInfos();
- InitializeAllTargets();
- InitializeAllMCAsmInfos();
- InitializeAllAsmPrinters();
+ InitializeAllTargetMCs();
InitializeAllAsmParsers();
InitializeAllDisassemblers();
}
@@ -169,24 +166,24 @@ EDDisassembler::EDDisassembler(CPUKey &key) :
if (!Tgt)
return;
- std::string CPU;
- std::string featureString;
- TargetMachine.reset(Tgt->createTargetMachine(tripleString, CPU,
- featureString));
+ MRI.reset(Tgt->createMCRegInfo(tripleString));
- const TargetRegisterInfo *registerInfo = TargetMachine->getRegisterInfo();
-
- if (!registerInfo)
+ if (!MRI)
return;
-
- initMaps(*registerInfo);
+
+ initMaps(*MRI);
AsmInfo.reset(Tgt->createMCAsmInfo(tripleString));
if (!AsmInfo)
return;
- Disassembler.reset(Tgt->createMCDisassembler());
+ STI.reset(Tgt->createMCSubtargetInfo(tripleString, "", ""));
+
+ if (!STI)
+ return;
+
+ Disassembler.reset(Tgt->createMCDisassembler(*STI));
if (!Disassembler)
return;
@@ -195,16 +192,16 @@ EDDisassembler::EDDisassembler(CPUKey &key) :
InstString.reset(new std::string);
InstStream.reset(new raw_string_ostream(*InstString));
- InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo));
+ InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo, *STI));
if (!InstPrinter)
return;
GenericAsmLexer.reset(new AsmLexer(*AsmInfo));
- SpecificAsmLexer.reset(Tgt->createAsmLexer(*AsmInfo));
+ SpecificAsmLexer.reset(Tgt->createMCAsmLexer(*MRI, *AsmInfo));
SpecificAsmLexer->InstallLexer(*GenericAsmLexer);
- initMaps(*TargetMachine->getRegisterInfo());
+ initMaps(*MRI);
Valid = true;
}
@@ -247,14 +244,17 @@ EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader,
MCInst* inst = new MCInst;
uint64_t byteSize;
- if (!Disassembler->getInstruction(*inst,
- byteSize,
- memoryObject,
- address,
- ErrorStream)) {
+ MCDisassembler::DecodeStatus S;
+ S = Disassembler->getInstruction(*inst, byteSize, memoryObject, address,
+ ErrorStream, nulls());
+ switch (S) {
+ case MCDisassembler::Fail:
+ case MCDisassembler::SoftFail:
+ // FIXME: Do something different on soft failure mode?
delete inst;
return NULL;
- } else {
+
+ case MCDisassembler::Success: {
const llvm::EDInstInfo *thisInstInfo = NULL;
if (InstInfos) {
@@ -264,9 +264,11 @@ EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader,
EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo);
return sdInst;
}
+ }
+ return NULL;
}
-void EDDisassembler::initMaps(const TargetRegisterInfo &registerInfo) {
+void EDDisassembler::initMaps(const MCRegisterInfo &registerInfo) {
unsigned numRegisters = registerInfo.getNumRegs();
unsigned registerIndex;
@@ -325,7 +327,7 @@ bool EDDisassembler::registerIsProgramCounter(unsigned registerID) {
int EDDisassembler::printInst(std::string &str, MCInst &inst) {
PrinterMutex.acquire();
- InstPrinter->printInst(&inst, *InstStream);
+ InstPrinter->printInst(&inst, *InstStream, "");
InstStream->flush();
str = *InstString;
InstString->clear();
@@ -368,16 +370,16 @@ int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
SourceMgr sourceMgr;
sourceMgr.setDiagHandler(diag_handler, static_cast<void*>(this));
sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over
- MCContext context(*AsmInfo, NULL);
+ MCContext context(*AsmInfo, *MRI, NULL);
OwningPtr<MCStreamer> streamer(createNullStreamer(context));
- OwningPtr<MCAsmParser> genericParser(createMCAsmParser(*Tgt, sourceMgr,
+ OwningPtr<MCAsmParser> genericParser(createMCAsmParser(sourceMgr,
context, *streamer,
*AsmInfo));
StringRef triple = tripleFromArch(Key.Arch);
OwningPtr<MCSubtargetInfo> STI(Tgt->createMCSubtargetInfo(triple, "", ""));
- OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(*STI,
- *genericParser));
+ OwningPtr<MCTargetAsmParser>
+ TargetParser(Tgt->createMCAsmParser(*STI, *genericParser));
AsmToken OpcodeToken = genericParser->Lex();
AsmToken NextToken = genericParser->Lex(); // consume next token, because specificParser expects us to
diff --git a/lib/MC/MCDisassembler/EDDisassembler.h b/lib/MC/MCDisassembler/EDDisassembler.h
index 11d69c1..38c2203 100644
--- a/lib/MC/MCDisassembler/EDDisassembler.h
+++ b/lib/MC/MCDisassembler/EDDisassembler.h
@@ -29,24 +29,23 @@
namespace llvm {
class AsmLexer;
+class AsmParser;
class AsmToken;
class MCContext;
class MCAsmInfo;
class MCAsmLexer;
-class AsmParser;
-class TargetAsmLexer;
-class TargetAsmParser;
class MCDisassembler;
class MCInstPrinter;
class MCInst;
class MCParsedAsmOperand;
+class MCRegisterInfo;
class MCStreamer;
class MCSubtargetInfo;
+class MCTargetAsmLexer;
+class MCTargetAsmParser;
template <typename T> class SmallVectorImpl;
class SourceMgr;
class Target;
-class TargetMachine;
-class TargetRegisterInfo;
struct EDInstInfo;
struct EDInst;
@@ -136,10 +135,12 @@ struct EDDisassembler {
CPUKey Key;
/// The LLVM target corresponding to the disassembler
const llvm::Target *Tgt;
- /// The target machine instance.
- llvm::OwningPtr<llvm::TargetMachine> TargetMachine;
/// The assembly information for the target architecture
llvm::OwningPtr<const llvm::MCAsmInfo> AsmInfo;
+ /// The subtarget information for the target architecture
+ llvm::OwningPtr<const llvm::MCSubtargetInfo> STI;
+ // The register information for the target architecture.
+ llvm::OwningPtr<const llvm::MCRegisterInfo> MRI;
/// The disassembler for the target architecture
llvm::OwningPtr<const llvm::MCDisassembler> Disassembler;
/// The output string for the instruction printer; must be guarded with
@@ -160,7 +161,7 @@ struct EDDisassembler {
/// The target-specific lexer for use in tokenizing strings, in
/// target-independent and target-specific portions
llvm::OwningPtr<llvm::AsmLexer> GenericAsmLexer;
- llvm::OwningPtr<llvm::TargetAsmLexer> SpecificAsmLexer;
+ llvm::OwningPtr<llvm::MCTargetAsmLexer> SpecificAsmLexer;
/// The guard for the above
llvm::sys::Mutex ParserMutex;
/// The LLVM number used for the target disassembly syntax variant
@@ -216,7 +217,7 @@ struct EDDisassembler {
/// info
///
/// @arg registerInfo - the register information to use as a source
- void initMaps(const llvm::TargetRegisterInfo &registerInfo);
+ void initMaps(const llvm::MCRegisterInfo &registerInfo);
/// nameWithRegisterID - Returns the name (owned by the EDDisassembler) of a
/// register for a given register ID, or NULL on failure
///
diff --git a/lib/MC/MCDisassembler/EDInst.h b/lib/MC/MCDisassembler/EDInst.h
index ceb9505..6b78dc8 100644
--- a/lib/MC/MCDisassembler/EDInst.h
+++ b/lib/MC/MCDisassembler/EDInst.h
@@ -73,7 +73,7 @@ struct EDInst {
std::string String;
/// The order in which operands from the InstInfo's operand information appear
/// in String
- const char* OperandOrder;
+ const signed char* OperandOrder;
/// The result of the parseOperands() function
CachedResult ParseResult;
diff --git a/lib/MC/MCDisassembler/EDToken.cpp b/lib/MC/MCDisassembler/EDToken.cpp
index de770b4..5f6c9df 100644
--- a/lib/MC/MCDisassembler/EDToken.cpp
+++ b/lib/MC/MCDisassembler/EDToken.cpp
@@ -87,14 +87,18 @@ int EDToken::registerID(unsigned &registerID) const {
int EDToken::tokenize(std::vector<EDToken*> &tokens,
std::string &str,
- const char *operandOrder,
+ const signed char *operandOrder,
EDDisassembler &disassembler) {
SmallVector<MCParsedAsmOperand*, 5> parsedOperands;
SmallVector<AsmToken, 10> asmTokens;
if (disassembler.parseInst(parsedOperands, asmTokens, str))
+ {
+ for (unsigned i = 0, e = parsedOperands.size(); i != e; ++i)
+ delete parsedOperands[i];
return -1;
-
+ }
+
SmallVectorImpl<MCParsedAsmOperand*>::iterator operandIterator;
unsigned int operandIndex;
SmallVectorImpl<AsmToken>::iterator tokenIterator;
diff --git a/lib/MC/MCDisassembler/EDToken.h b/lib/MC/MCDisassembler/EDToken.h
index ba46707..384079b 100644
--- a/lib/MC/MCDisassembler/EDToken.h
+++ b/lib/MC/MCDisassembler/EDToken.h
@@ -125,7 +125,7 @@ struct EDToken {
// assembly syntax
static int tokenize(std::vector<EDToken*> &tokens,
std::string &str,
- const char *operandOrder,
+ const signed char *operandOrder,
EDDisassembler &disassembler);
/// getString - Directs a character pointer to the string, returning 0 on
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index ad86db1..4658a30 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -7,17 +7,18 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCObjectWriter.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetAsmInfo.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
@@ -196,7 +197,7 @@ static inline void EmitDwarfLineTable(MCStreamer *MCOS,
MCOS->EmitLabel(SectionEnd);
// Switch back the the dwarf line section.
- MCOS->SwitchSection(context.getTargetAsmInfo().getDwarfLineSection());
+ MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfLineSection());
const MCAsmInfo &asmInfo = MCOS->getContext().getAsmInfo();
MCOS->EmitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, SectionEnd,
@@ -209,7 +210,7 @@ static inline void EmitDwarfLineTable(MCStreamer *MCOS,
void MCDwarfFileTable::Emit(MCStreamer *MCOS) {
MCContext &context = MCOS->getContext();
// Switch to the section where the table will be emitted into.
- MCOS->SwitchSection(context.getTargetAsmInfo().getDwarfLineSection());
+ MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfLineSection());
// Create a symbol at the beginning of this section.
MCSymbol *LineStartSym = context.CreateTempSymbol();
@@ -485,11 +486,11 @@ static void EmitPersonality(MCStreamer &streamer, const MCSymbol &symbol,
}
static const MachineLocation TranslateMachineLocation(
- const TargetAsmInfo &TAI,
+ const MCRegisterInfo &MRI,
const MachineLocation &Loc) {
unsigned Reg = Loc.getReg() == MachineLocation::VirtualFP ?
MachineLocation::VirtualFP :
- unsigned(TAI.getDwarfRegNum(Loc.getReg(), true));
+ unsigned(MRI.getDwarfRegNum(Loc.getReg(), true));
const MachineLocation &NewLoc = Loc.isReg() ?
MachineLocation(Reg) : MachineLocation(Reg, Loc.getOffset());
return NewLoc;
@@ -503,10 +504,11 @@ namespace {
bool IsEH;
const MCSymbol *SectionStart;
public:
- FrameEmitterImpl(bool usingCFI, bool isEH, const MCSymbol *sectionStart) :
- CFAOffset(0), CIENum(0), UsingCFI(usingCFI), IsEH(isEH),
- SectionStart(sectionStart) {
- }
+ FrameEmitterImpl(bool usingCFI, bool isEH)
+ : CFAOffset(0), CIENum(0), UsingCFI(usingCFI), IsEH(isEH),
+ SectionStart(0) {}
+
+ void setSectionStart(const MCSymbol *Label) { SectionStart = Label; }
/// EmitCompactUnwind - Emit the unwind information in a compact way. If
/// we're successful, return 'true'. Otherwise, return 'false' and it will
@@ -687,11 +689,8 @@ void FrameEmitterImpl::EmitCFIInstructions(MCStreamer &streamer,
/// normal CIE and FDE.
bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
const MCDwarfFrameInfo &Frame) {
-#if 1
- return false;
-#else
MCContext &Context = Streamer.getContext();
- const TargetAsmInfo &TAI = Context.getTargetAsmInfo();
+ const MCObjectFileInfo *MOFI = Context.getObjectFileInfo();
bool VerboseAsm = Streamer.isVerboseAsm();
// range-start range-length compact-unwind-enc personality-func lsda
@@ -716,19 +715,17 @@ bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
// .quad __gxx_personality
// .quad except_tab1
- uint32_t Encoding =
- TAI.getCompactUnwindEncoding(Frame.Instructions,
- getDataAlignmentFactor(Streamer), IsEH);
+ uint32_t Encoding = Frame.CompactUnwindEncoding;
if (!Encoding) return false;
// The encoding needs to know we have an LSDA.
if (Frame.Lsda)
Encoding |= 0x40000000;
- Streamer.SwitchSection(TAI.getCompactUnwindSection());
+ Streamer.SwitchSection(MOFI->getCompactUnwindSection());
// Range Start
- unsigned FDEEncoding = TAI.getFDEEncoding(UsingCFI);
+ unsigned FDEEncoding = MOFI->getFDEEncoding(UsingCFI);
unsigned Size = getSizeForEncoding(Streamer, FDEEncoding);
if (VerboseAsm) Streamer.AddComment("Range Start");
Streamer.EmitSymbolValue(Frame.Function, Size);
@@ -745,6 +742,7 @@ bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
Twine(llvm::utohexstr(Encoding)));
Streamer.EmitIntValue(Encoding, Size);
+
// Personality Function
Size = getSizeForEncoding(Streamer, dwarf::DW_EH_PE_absptr);
if (VerboseAsm) Streamer.AddComment("Personality Function");
@@ -762,7 +760,6 @@ bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
Streamer.EmitIntValue(0, Size); // No LSDA
return true;
-#endif
}
const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
@@ -771,11 +768,12 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
const MCSymbol *lsda,
unsigned lsdaEncoding) {
MCContext &context = streamer.getContext();
- const TargetAsmInfo &TAI = context.getTargetAsmInfo();
+ const MCRegisterInfo &MRI = context.getRegisterInfo();
+ const MCObjectFileInfo *MOFI = context.getObjectFileInfo();
bool verboseAsm = streamer.isVerboseAsm();
MCSymbol *sectionStart;
- if (TAI.isFunctionEHFrameSymbolPrivate() || !IsEH)
+ if (MOFI->isFunctionEHFrameSymbolPrivate() || !IsEH)
sectionStart = context.CreateTempSymbol();
else
sectionStart = context.GetOrCreateSymbol(Twine("EH_frame") + Twine(CIENum));
@@ -824,7 +822,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
// Return Address Register
if (verboseAsm) streamer.AddComment("CIE Return Address Column");
- streamer.EmitULEB128IntValue(TAI.getDwarfRARegNum(true));
+ streamer.EmitULEB128IntValue(MRI.getDwarfRegNum(MRI.getRARegister(), true));
// Augmentation Data Length (optional)
@@ -858,21 +856,22 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
EmitEncodingByte(streamer, lsdaEncoding, "LSDA Encoding");
// Encoding of the FDE pointers
- EmitEncodingByte(streamer, TAI.getFDEEncoding(UsingCFI),
+ EmitEncodingByte(streamer, MOFI->getFDEEncoding(UsingCFI),
"FDE Encoding");
}
// Initial Instructions
- const std::vector<MachineMove> &Moves = TAI.getInitialFrameState();
+ const MCAsmInfo &MAI = context.getAsmInfo();
+ const std::vector<MachineMove> &Moves = MAI.getInitialFrameState();
std::vector<MCCFIInstruction> Instructions;
for (int i = 0, n = Moves.size(); i != n; ++i) {
MCSymbol *Label = Moves[i].getLabel();
const MachineLocation &Dst =
- TranslateMachineLocation(TAI, Moves[i].getDestination());
+ TranslateMachineLocation(MRI, Moves[i].getDestination());
const MachineLocation &Src =
- TranslateMachineLocation(TAI, Moves[i].getSource());
+ TranslateMachineLocation(MRI, Moves[i].getSource());
MCCFIInstruction Inst(Label, Dst, Src);
Instructions.push_back(Inst);
}
@@ -893,10 +892,10 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
MCContext &context = streamer.getContext();
MCSymbol *fdeStart = context.CreateTempSymbol();
MCSymbol *fdeEnd = context.CreateTempSymbol();
- const TargetAsmInfo &TAI = context.getTargetAsmInfo();
+ const MCObjectFileInfo *MOFI = context.getObjectFileInfo();
bool verboseAsm = streamer.isVerboseAsm();
- if (!TAI.isFunctionEHFrameSymbolPrivate() && IsEH) {
+ if (IsEH && frame.Function && !MOFI->isFunctionEHFrameSymbolPrivate()) {
MCSymbol *EHSym =
context.GetOrCreateSymbol(frame.Function->getName() + Twine(".eh"));
streamer.EmitEHSymAttributes(frame.Function, EHSym);
@@ -925,7 +924,7 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
streamer.EmitSymbolValue(&cieStart, 4);
}
- unsigned fdeEncoding = TAI.getFDEEncoding(UsingCFI);
+ unsigned fdeEncoding = MOFI->getFDEEncoding(UsingCFI);
unsigned size = getSizeForEncoding(streamer, fdeEncoding);
// PC Begin
@@ -1011,26 +1010,34 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer,
bool UsingCFI,
bool IsEH) {
MCContext &Context = Streamer.getContext();
- const TargetAsmInfo &TAI = Context.getTargetAsmInfo();
- const MCSection &Section = IsEH ? *TAI.getEHFrameSection() :
- *TAI.getDwarfFrameSection();
+ MCObjectFileInfo *MOFI =
+ const_cast<MCObjectFileInfo*>(Context.getObjectFileInfo());
+ FrameEmitterImpl Emitter(UsingCFI, IsEH);
+ ArrayRef<MCDwarfFrameInfo> FrameArray = Streamer.getFrameInfos();
+
+ // Emit the compact unwind info if available.
+ // FIXME: This emits both the compact unwind and the old CIE/FDE
+ // information. Only one of those is needed.
+ if (IsEH && MOFI->getCompactUnwindSection())
+ for (unsigned i = 0, n = Streamer.getNumFrameInfos(); i < n; ++i) {
+ const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i);
+ if (!Frame.CompactUnwindEncoding)
+ Emitter.EmitCompactUnwind(Streamer, Frame);
+ }
+
+ const MCSection &Section = IsEH ? *MOFI->getEHFrameSection() :
+ *MOFI->getDwarfFrameSection();
Streamer.SwitchSection(&Section);
MCSymbol *SectionStart = Context.CreateTempSymbol();
Streamer.EmitLabel(SectionStart);
+ Emitter.setSectionStart(SectionStart);
MCSymbol *FDEEnd = NULL;
DenseMap<CIEKey, const MCSymbol*> CIEStarts;
- FrameEmitterImpl Emitter(UsingCFI, IsEH, SectionStart);
const MCSymbol *DummyDebugKey = NULL;
- for (unsigned i = 0, n = Streamer.getNumFrameInfos(); i < n; ++i) {
- const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i);
- if (IsEH && TAI.getCompactUnwindSection() &&
- Emitter.EmitCompactUnwind(Streamer, Frame)) {
- FDEEnd = NULL;
- continue;
- }
-
+ for (unsigned i = 0, n = FrameArray.size(); i < n; ++i) {
+ const MCDwarfFrameInfo &Frame = FrameArray[i];
CIEKey Key(Frame.Personality, Frame.PersonalityEncoding,
Frame.LsdaEncoding);
const MCSymbol *&CIEStart = IsEH ? CIEStarts[Key] : DummyDebugKey;
diff --git a/lib/MC/MCELF.cpp b/lib/MC/MCELF.cpp
index 2c3f8e8..dad2e7b 100644
--- a/lib/MC/MCELF.cpp
+++ b/lib/MC/MCELF.cpp
@@ -16,7 +16,6 @@
#include "llvm/MC/MCELFSymbolFlags.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/Support/ELF.h"
-#include "llvm/Target/TargetAsmBackend.h"
namespace llvm {
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index 49340ed..9ada08e 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -21,11 +21,11 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCAsmBackend.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetAsmBackend.h"
using namespace llvm;
@@ -53,8 +53,9 @@ void MCELFStreamer::EmitLabel(MCSymbol *Symbol) {
void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
switch (Flag) {
case MCAF_SyntaxUnified: return; // no-op here.
- case MCAF_Code16: return; // no-op here.
- case MCAF_Code32: return; // no-op here.
+ case MCAF_Code16: return; // Change parsing mode; no-op here.
+ case MCAF_Code32: return; // Change parsing mode; no-op here.
+ case MCAF_Code64: return; // Change parsing mode; no-op here.
case MCAF_SubsectionsViaSymbols:
getAssembler().setSubsectionsViaSymbols(true);
return;
@@ -219,14 +220,14 @@ void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
SD.setSize(MCConstantExpr::Create(Size, getContext()));
}
-void MCELFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+void MCELFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
// FIXME: Should this be caught and done earlier?
MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
MCELF::SetBinding(SD, ELF::STB_LOCAL);
SD.setExternal(false);
BindingExplicitlySet.insert(Symbol);
- // FIXME: ByteAlignment is not needed here, but is required.
- EmitCommonSymbol(Symbol, Size, 1);
+ EmitCommonSymbol(Symbol, Size, ByteAlignment);
}
void MCELFStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
@@ -374,10 +375,10 @@ void MCELFStreamer::Finish() {
this->MCObjectStreamer::Finish();
}
-MCStreamer *llvm::createELFStreamer(MCContext &Context, TargetAsmBackend &TAB,
+MCStreamer *llvm::createELFStreamer(MCContext &Context, MCAsmBackend &MAB,
raw_ostream &OS, MCCodeEmitter *CE,
bool RelaxAll, bool NoExecStack) {
- MCELFStreamer *S = new MCELFStreamer(Context, TAB, OS, CE);
+ MCELFStreamer *S = new MCELFStreamer(Context, MAB, OS, CE);
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
if (NoExecStack)
diff --git a/lib/MC/MCELFStreamer.h b/lib/MC/MCELFStreamer.h
index 855e7e9..10bf775 100644
--- a/lib/MC/MCELFStreamer.h
+++ b/lib/MC/MCELFStreamer.h
@@ -25,11 +25,11 @@ namespace llvm {
class MCELFStreamer : public MCObjectStreamer {
public:
- MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB,
+ MCELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter)
: MCObjectStreamer(Context, TAB, OS, Emitter) {}
- MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB,
+ MCELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
MCAssembler *Assembler)
: MCObjectStreamer(Context, TAB, OS, Emitter, Assembler) {}
@@ -74,7 +74,8 @@ public:
SD.setSize(Value);
}
- virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment);
virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
unsigned Size = 0, unsigned ByteAlignment = 0) {
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index fcf1aab..da297fb 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -18,7 +18,6 @@
#include "llvm/MC/MCValue.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetAsmBackend.h"
using namespace llvm;
namespace {
diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp
index 81a939f..2317a28 100644
--- a/lib/MC/MCInstPrinter.cpp
+++ b/lib/MC/MCInstPrinter.cpp
@@ -8,7 +8,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
MCInstPrinter::~MCInstPrinter() {
@@ -23,3 +25,12 @@ StringRef MCInstPrinter::getOpcodeName(unsigned Opcode) const {
void MCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
assert(0 && "Target should implement this");
}
+
+void MCInstPrinter::printAnnotation(raw_ostream &OS, StringRef Annot) {
+ if (!Annot.empty()) {
+ if (CommentStream)
+ (*CommentStream) << Annot;
+ else
+ OS << " " << MAI.getCommentString() << " " << Annot;
+ }
+}
diff --git a/lib/MC/MCInstrAnalysis.cpp b/lib/MC/MCInstrAnalysis.cpp
new file mode 100644
index 0000000..7736702
--- /dev/null
+++ b/lib/MC/MCInstrAnalysis.cpp
@@ -0,0 +1,21 @@
+//===-- MCInstrAnalysis.cpp - InstrDesc target hooks ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCInstrAnalysis.h"
+using namespace llvm;
+
+uint64_t MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr,
+ uint64_t Size) const {
+ if (Inst.getNumOperands() == 0 ||
+ Info->get(Inst.getOpcode()).OpInfo[0].OperandType != MCOI::OPERAND_PCREL)
+ return -1ULL;
+
+ int64_t Imm = Inst.getOperand(0).getImm();
+ return Addr+Size+Imm;
+}
diff --git a/lib/MC/MCLoggingStreamer.cpp b/lib/MC/MCLoggingStreamer.cpp
index 309752e..3fe8ac7 100644
--- a/lib/MC/MCLoggingStreamer.cpp
+++ b/lib/MC/MCLoggingStreamer.cpp
@@ -133,9 +133,10 @@ public:
return Child->EmitCommonSymbol(Symbol, Size, ByteAlignment);
}
- virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
LogCall("EmitLocalCommonSymbol");
- return Child->EmitLocalCommonSymbol(Symbol, Size);
+ return Child->EmitLocalCommonSymbol(Symbol, Size, ByteAlignment);
}
virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 1b21249..aa35815 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -20,10 +20,10 @@
#include "llvm/MC/MCMachOSymbolFlags.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCAsmBackend.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetAsmBackend.h"
using namespace llvm;
@@ -34,9 +34,9 @@ private:
virtual void EmitInstToData(const MCInst &Inst);
public:
- MCMachOStreamer(MCContext &Context, TargetAsmBackend &TAB,
+ MCMachOStreamer(MCContext &Context, MCAsmBackend &MAB,
raw_ostream &OS, MCCodeEmitter *Emitter)
- : MCObjectStreamer(Context, TAB, OS, Emitter) {}
+ : MCObjectStreamer(Context, MAB, OS, Emitter) {}
/// @name MCStreamer Interface
/// @{
@@ -67,7 +67,8 @@ public:
virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
assert(0 && "macho doesn't support this directive");
}
- virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
assert(0 && "macho doesn't support this directive");
}
virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
@@ -143,8 +144,9 @@ void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
// Do any generic stuff we need to do.
switch (Flag) {
case MCAF_SyntaxUnified: return; // no-op here.
- case MCAF_Code16: return; // no-op here.
- case MCAF_Code32: return; // no-op here.
+ case MCAF_Code16: return; // Change parsing mode; no-op here.
+ case MCAF_Code32: return; // Change parsing mode; no-op here.
+ case MCAF_Code64: return; // Change parsing mode; no-op here.
case MCAF_SubsectionsViaSymbols:
getAssembler().setSubsectionsViaSymbols(true);
return;
@@ -207,8 +209,8 @@ void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
case MCSA_ELF_TypeCommon:
case MCSA_ELF_TypeNoType:
case MCSA_ELF_TypeGnuUniqueObject:
- case MCSA_IndirectSymbol:
case MCSA_Hidden:
+ case MCSA_IndirectSymbol:
case MCSA_Internal:
case MCSA_Protected:
case MCSA_Weak:
@@ -410,10 +412,10 @@ void MCMachOStreamer::Finish() {
this->MCObjectStreamer::Finish();
}
-MCStreamer *llvm::createMachOStreamer(MCContext &Context, TargetAsmBackend &TAB,
+MCStreamer *llvm::createMachOStreamer(MCContext &Context, MCAsmBackend &MAB,
raw_ostream &OS, MCCodeEmitter *CE,
bool RelaxAll) {
- MCMachOStreamer *S = new MCMachOStreamer(Context, TAB, OS, CE);
+ MCMachOStreamer *S = new MCMachOStreamer(Context, MAB, OS, CE);
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
return S;
diff --git a/lib/MC/MCModule.cpp b/lib/MC/MCModule.cpp
new file mode 100644
index 0000000..b1d09d9
--- /dev/null
+++ b/lib/MC/MCModule.cpp
@@ -0,0 +1,45 @@
+//===- lib/MC/MCModule.cpp - MCModule implementation --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAtom.h"
+#include "llvm/MC/MCModule.h"
+
+using namespace llvm;
+
+MCAtom *MCModule::createAtom(MCAtom::AtomType Type,
+ uint64_t Begin, uint64_t End) {
+ assert(Begin < End && "Creating MCAtom with endpoints reversed?");
+
+ // Check for atoms already covering this range.
+ IntervalMap<uint64_t, MCAtom*>::iterator I = OffsetMap.find(Begin);
+ assert((!I.valid() || I.start() < End) && "Offset range already occupied!");
+
+ // Create the new atom and add it to our maps.
+ MCAtom *NewAtom = new MCAtom(Type, this, Begin, End);
+ AtomAllocationTracker.insert(NewAtom);
+ OffsetMap.insert(Begin, End, NewAtom);
+ return NewAtom;
+}
+
+// remap - Update the interval mapping for an atom.
+void MCModule::remap(MCAtom *Atom, uint64_t NewBegin, uint64_t NewEnd) {
+ // Find and erase the old mapping.
+ IntervalMap<uint64_t, MCAtom*>::iterator I = OffsetMap.find(Atom->Begin);
+ assert(I.valid() && "Atom offset not found in module!");
+ assert(*I == Atom && "Previous atom mapping was invalid!");
+ I.erase();
+
+ // Insert the new mapping.
+ OffsetMap.insert(NewBegin, NewEnd, Atom);
+
+ // Update the atom internal bounds.
+ Atom->Begin = NewBegin;
+ Atom->End = NewEnd;
+}
+
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index 9577af0..a6c0adb 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -59,8 +59,8 @@ namespace {
virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) {}
- virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {}
-
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {}
virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
unsigned Size = 0, unsigned ByteAlignment = 0) {}
virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
new file mode 100644
index 0000000..df8b99d
--- /dev/null
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -0,0 +1,554 @@
+//===-- MObjectFileInfo.cpp - Object File Information ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/ADT/Triple.h"
+using namespace llvm;
+
+void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
+ // MachO
+ IsFunctionEHFrameSymbolPrivate = false;
+ SupportsWeakOmittedEHFrame = false;
+
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel
+ | dwarf::DW_EH_PE_sdata4;
+ LSDAEncoding = FDEEncoding = FDECFIEncoding = dwarf::DW_EH_PE_pcrel;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+
+ // .comm doesn't support alignment before Leopard.
+ if (T.isMacOSX() && T.isMacOSXVersionLT(10, 5))
+ CommDirectiveSupportsAlignment = false;
+
+ TextSection // .text
+ = Ctx->getMachOSection("__TEXT", "__text",
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ SectionKind::getText());
+ DataSection // .data
+ = Ctx->getMachOSection("__DATA", "__data", 0,
+ SectionKind::getDataRel());
+
+ TLSDataSection // .tdata
+ = Ctx->getMachOSection("__DATA", "__thread_data",
+ MCSectionMachO::S_THREAD_LOCAL_REGULAR,
+ SectionKind::getDataRel());
+ TLSBSSSection // .tbss
+ = Ctx->getMachOSection("__DATA", "__thread_bss",
+ MCSectionMachO::S_THREAD_LOCAL_ZEROFILL,
+ SectionKind::getThreadBSS());
+
+ // TODO: Verify datarel below.
+ TLSTLVSection // .tlv
+ = Ctx->getMachOSection("__DATA", "__thread_vars",
+ MCSectionMachO::S_THREAD_LOCAL_VARIABLES,
+ SectionKind::getDataRel());
+
+ TLSThreadInitSection
+ = Ctx->getMachOSection("__DATA", "__thread_init",
+ MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS,
+ SectionKind::getDataRel());
+
+ CStringSection // .cstring
+ = Ctx->getMachOSection("__TEXT", "__cstring",
+ MCSectionMachO::S_CSTRING_LITERALS,
+ SectionKind::getMergeable1ByteCString());
+ UStringSection
+ = Ctx->getMachOSection("__TEXT","__ustring", 0,
+ SectionKind::getMergeable2ByteCString());
+ FourByteConstantSection // .literal4
+ = Ctx->getMachOSection("__TEXT", "__literal4",
+ MCSectionMachO::S_4BYTE_LITERALS,
+ SectionKind::getMergeableConst4());
+ EightByteConstantSection // .literal8
+ = Ctx->getMachOSection("__TEXT", "__literal8",
+ MCSectionMachO::S_8BYTE_LITERALS,
+ SectionKind::getMergeableConst8());
+
+ // ld_classic doesn't support .literal16 in 32-bit mode, and ld64 falls back
+ // to using it in -static mode.
+ SixteenByteConstantSection = 0;
+ if (RelocM != Reloc::Static &&
+ T.getArch() != Triple::x86_64 && T.getArch() != Triple::ppc64)
+ SixteenByteConstantSection = // .literal16
+ Ctx->getMachOSection("__TEXT", "__literal16",
+ MCSectionMachO::S_16BYTE_LITERALS,
+ SectionKind::getMergeableConst16());
+
+ ReadOnlySection // .const
+ = Ctx->getMachOSection("__TEXT", "__const", 0,
+ SectionKind::getReadOnly());
+
+ TextCoalSection
+ = Ctx->getMachOSection("__TEXT", "__textcoal_nt",
+ MCSectionMachO::S_COALESCED |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ SectionKind::getText());
+ ConstTextCoalSection
+ = Ctx->getMachOSection("__TEXT", "__const_coal",
+ MCSectionMachO::S_COALESCED,
+ SectionKind::getReadOnly());
+ ConstDataSection // .const_data
+ = Ctx->getMachOSection("__DATA", "__const", 0,
+ SectionKind::getReadOnlyWithRel());
+ DataCoalSection
+ = Ctx->getMachOSection("__DATA","__datacoal_nt",
+ MCSectionMachO::S_COALESCED,
+ SectionKind::getDataRel());
+ DataCommonSection
+ = Ctx->getMachOSection("__DATA","__common",
+ MCSectionMachO::S_ZEROFILL,
+ SectionKind::getBSS());
+ DataBSSSection
+ = Ctx->getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL,
+ SectionKind::getBSS());
+
+
+ LazySymbolPointerSection
+ = Ctx->getMachOSection("__DATA", "__la_symbol_ptr",
+ MCSectionMachO::S_LAZY_SYMBOL_POINTERS,
+ SectionKind::getMetadata());
+ NonLazySymbolPointerSection
+ = Ctx->getMachOSection("__DATA", "__nl_symbol_ptr",
+ MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
+ SectionKind::getMetadata());
+
+ if (RelocM == Reloc::Static) {
+ StaticCtorSection
+ = Ctx->getMachOSection("__TEXT", "__constructor", 0,
+ SectionKind::getDataRel());
+ StaticDtorSection
+ = Ctx->getMachOSection("__TEXT", "__destructor", 0,
+ SectionKind::getDataRel());
+ } else {
+ StaticCtorSection
+ = Ctx->getMachOSection("__DATA", "__mod_init_func",
+ MCSectionMachO::S_MOD_INIT_FUNC_POINTERS,
+ SectionKind::getDataRel());
+ StaticDtorSection
+ = Ctx->getMachOSection("__DATA", "__mod_term_func",
+ MCSectionMachO::S_MOD_TERM_FUNC_POINTERS,
+ SectionKind::getDataRel());
+ }
+
+ // Exception Handling.
+ LSDASection = Ctx->getMachOSection("__TEXT", "__gcc_except_tab", 0,
+ SectionKind::getReadOnlyWithRel());
+
+ if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6))
+ CompactUnwindSection =
+ Ctx->getMachOSection("__LD", "__compact_unwind",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getReadOnly());
+
+ // Debug Information.
+ DwarfAbbrevSection =
+ Ctx->getMachOSection("__DWARF", "__debug_abbrev",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfInfoSection =
+ Ctx->getMachOSection("__DWARF", "__debug_info",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfLineSection =
+ Ctx->getMachOSection("__DWARF", "__debug_line",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfFrameSection =
+ Ctx->getMachOSection("__DWARF", "__debug_frame",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfPubNamesSection =
+ Ctx->getMachOSection("__DWARF", "__debug_pubnames",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfPubTypesSection =
+ Ctx->getMachOSection("__DWARF", "__debug_pubtypes",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfStrSection =
+ Ctx->getMachOSection("__DWARF", "__debug_str",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfLocSection =
+ Ctx->getMachOSection("__DWARF", "__debug_loc",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfARangesSection =
+ Ctx->getMachOSection("__DWARF", "__debug_aranges",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfRangesSection =
+ Ctx->getMachOSection("__DWARF", "__debug_ranges",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfMacroInfoSection =
+ Ctx->getMachOSection("__DWARF", "__debug_macinfo",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfDebugInlineSection =
+ Ctx->getMachOSection("__DWARF", "__debug_inlined",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+
+ TLSExtraDataSection = TLSTLVSection;
+}
+
+void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
+ if (T.getArch() == Triple::x86) {
+ PersonalityEncoding = (RelocM == Reloc::PIC_)
+ ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
+ : dwarf::DW_EH_PE_absptr;
+ LSDAEncoding = (RelocM == Reloc::PIC_)
+ ? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
+ : dwarf::DW_EH_PE_absptr;
+ FDEEncoding = FDECFIEncoding = (RelocM == Reloc::PIC_)
+ ? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
+ : dwarf::DW_EH_PE_absptr;
+ TTypeEncoding = (RelocM == Reloc::PIC_)
+ ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
+ : dwarf::DW_EH_PE_absptr;
+ } else if (T.getArch() == Triple::x86_64) {
+ FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+
+ if (RelocM == Reloc::PIC_) {
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ ((CMModel == CodeModel::Small || CMModel == CodeModel::Medium)
+ ? dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_sdata8);
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel |
+ (CMModel == CodeModel::Small
+ ? dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_sdata8);
+ FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ ((CMModel == CodeModel::Small || CMModel == CodeModel::Medium)
+ ? dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_sdata8);
+ } else {
+ PersonalityEncoding =
+ (CMModel == CodeModel::Small || CMModel == CodeModel::Medium)
+ ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr;
+ LSDAEncoding = (CMModel == CodeModel::Small)
+ ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr;
+ FDEEncoding = dwarf::DW_EH_PE_udata4;
+ TTypeEncoding = (CMModel == CodeModel::Small)
+ ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr;
+ }
+ }
+
+ // ELF
+ BSSSection =
+ Ctx->getELFSection(".bss", ELF::SHT_NOBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
+ SectionKind::getBSS());
+
+ TextSection =
+ Ctx->getELFSection(".text", ELF::SHT_PROGBITS,
+ ELF::SHF_EXECINSTR |
+ ELF::SHF_ALLOC,
+ SectionKind::getText());
+
+ DataSection =
+ Ctx->getELFSection(".data", ELF::SHT_PROGBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+
+ ReadOnlySection =
+ Ctx->getELFSection(".rodata", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC,
+ SectionKind::getReadOnly());
+
+ TLSDataSection =
+ Ctx->getELFSection(".tdata", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_TLS |
+ ELF::SHF_WRITE,
+ SectionKind::getThreadData());
+
+ TLSBSSSection =
+ Ctx->getELFSection(".tbss", ELF::SHT_NOBITS,
+ ELF::SHF_ALLOC | ELF::SHF_TLS |
+ ELF::SHF_WRITE,
+ SectionKind::getThreadBSS());
+
+ DataRelSection =
+ Ctx->getELFSection(".data.rel", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+
+ DataRelLocalSection =
+ Ctx->getELFSection(".data.rel.local", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getDataRelLocal());
+
+ DataRelROSection =
+ Ctx->getELFSection(".data.rel.ro", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getReadOnlyWithRel());
+
+ DataRelROLocalSection =
+ Ctx->getELFSection(".data.rel.ro.local", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getReadOnlyWithRelLocal());
+
+ MergeableConst4Section =
+ Ctx->getELFSection(".rodata.cst4", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_MERGE,
+ SectionKind::getMergeableConst4());
+
+ MergeableConst8Section =
+ Ctx->getELFSection(".rodata.cst8", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_MERGE,
+ SectionKind::getMergeableConst8());
+
+ MergeableConst16Section =
+ Ctx->getELFSection(".rodata.cst16", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_MERGE,
+ SectionKind::getMergeableConst16());
+
+ StaticCtorSection =
+ Ctx->getELFSection(".ctors", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+
+ StaticDtorSection =
+ Ctx->getELFSection(".dtors", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+
+ // Exception Handling Sections.
+
+ // FIXME: We're emitting LSDA info into a readonly section on ELF, even though
+ // it contains relocatable pointers. In PIC mode, this is probably a big
+ // runtime hit for C++ apps. Either the contents of the LSDA need to be
+ // adjusted or this should be a data section.
+ LSDASection =
+ Ctx->getELFSection(".gcc_except_table", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC,
+ SectionKind::getReadOnly());
+
+ // Debug Info Sections.
+ DwarfAbbrevSection =
+ Ctx->getELFSection(".debug_abbrev", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfInfoSection =
+ Ctx->getELFSection(".debug_info", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfLineSection =
+ Ctx->getELFSection(".debug_line", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfFrameSection =
+ Ctx->getELFSection(".debug_frame", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfPubNamesSection =
+ Ctx->getELFSection(".debug_pubnames", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfPubTypesSection =
+ Ctx->getELFSection(".debug_pubtypes", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfStrSection =
+ Ctx->getELFSection(".debug_str", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfLocSection =
+ Ctx->getELFSection(".debug_loc", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfARangesSection =
+ Ctx->getELFSection(".debug_aranges", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfRangesSection =
+ Ctx->getELFSection(".debug_ranges", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfMacroInfoSection =
+ Ctx->getELFSection(".debug_macinfo", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+}
+
+
+void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
+ // COFF
+ TextSection =
+ Ctx->getCOFFSection(".text",
+ COFF::IMAGE_SCN_CNT_CODE |
+ COFF::IMAGE_SCN_MEM_EXECUTE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getText());
+ DataSection =
+ Ctx->getCOFFSection(".data",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+ ReadOnlySection =
+ Ctx->getCOFFSection(".rdata",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getReadOnly());
+ StaticCtorSection =
+ Ctx->getCOFFSection(".ctors",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+ StaticDtorSection =
+ Ctx->getCOFFSection(".dtors",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+
+ // FIXME: We're emitting LSDA info into a readonly section on COFF, even
+ // though it contains relocatable pointers. In PIC mode, this is probably a
+ // big runtime hit for C++ apps. Either the contents of the LSDA need to be
+ // adjusted or this should be a data section.
+ LSDASection =
+ Ctx->getCOFFSection(".gcc_except_table",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getReadOnly());
+
+ // Debug info.
+ DwarfAbbrevSection =
+ Ctx->getCOFFSection(".debug_abbrev",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfInfoSection =
+ Ctx->getCOFFSection(".debug_info",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfLineSection =
+ Ctx->getCOFFSection(".debug_line",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfFrameSection =
+ Ctx->getCOFFSection(".debug_frame",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfPubNamesSection =
+ Ctx->getCOFFSection(".debug_pubnames",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfPubTypesSection =
+ Ctx->getCOFFSection(".debug_pubtypes",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfStrSection =
+ Ctx->getCOFFSection(".debug_str",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfLocSection =
+ Ctx->getCOFFSection(".debug_loc",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfARangesSection =
+ Ctx->getCOFFSection(".debug_aranges",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfRangesSection =
+ Ctx->getCOFFSection(".debug_ranges",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfMacroInfoSection =
+ Ctx->getCOFFSection(".debug_macinfo",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+
+ DrectveSection =
+ Ctx->getCOFFSection(".drectve",
+ COFF::IMAGE_SCN_LNK_INFO,
+ SectionKind::getMetadata());
+
+ PDataSection =
+ Ctx->getCOFFSection(".pdata",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+
+ XDataSection =
+ Ctx->getCOFFSection(".xdata",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+}
+
+void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm,
+ CodeModel::Model cm,
+ MCContext &ctx) {
+ RelocM = relocm;
+ CMModel = cm;
+ Ctx = &ctx;
+
+ // Common.
+ CommDirectiveSupportsAlignment = true;
+ SupportsWeakOmittedEHFrame = true;
+ IsFunctionEHFrameSymbolPrivate = true;
+
+ PersonalityEncoding = LSDAEncoding = FDEEncoding = FDECFIEncoding =
+ TTypeEncoding = dwarf::DW_EH_PE_absptr;
+
+ EHFrameSection = 0; // Created on demand.
+ CompactUnwindSection = 0; // Used only by selected targets.
+
+ Triple T(TT);
+ Triple::ArchType Arch = T.getArch();
+ // FIXME: Checking for Arch here to filter out bogus triples such as
+ // cellspu-apple-darwin. Perhaps we should fix in Triple?
+ if ((Arch == Triple::x86 || Arch == Triple::x86_64 ||
+ Arch == Triple::arm || Arch == Triple::thumb ||
+ Arch == Triple::ppc || Arch == Triple::ppc64 ||
+ Arch == Triple::UnknownArch) &&
+ (T.isOSDarwin() || T.getEnvironment() == Triple::MachO)) {
+ Env = IsMachO;
+ InitMachOMCObjectFileInfo(T);
+ } else if ((Arch == Triple::x86 || Arch == Triple::x86_64) &&
+ (T.getOS() == Triple::MinGW32 || T.getOS() == Triple::Cygwin ||
+ T.getOS() == Triple::Win32)) {
+ Env = IsCOFF;
+ InitCOFFMCObjectFileInfo(T);
+ } else {
+ Env = IsELF;
+ InitELFMCObjectFileInfo(T);
+ }
+}
+
+void MCObjectFileInfo::InitEHFrameSection() {
+ if (Env == IsMachO)
+ EHFrameSection =
+ Ctx->getMachOSection("__TEXT", "__eh_frame",
+ MCSectionMachO::S_COALESCED |
+ MCSectionMachO::S_ATTR_NO_TOC |
+ MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS |
+ MCSectionMachO::S_ATTR_LIVE_SUPPORT,
+ SectionKind::getReadOnly());
+ else if (Env == IsELF)
+ EHFrameSection =
+ Ctx->getELFSection(".eh_frame", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+ else
+ EHFrameSection =
+ Ctx->getCOFFSection(".eh_frame",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+}
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index 8635aac..a04ae08 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -17,10 +17,10 @@
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/MC/MCAsmBackend.h"
using namespace llvm;
-MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB,
+MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter_)
: MCStreamer(Context),
Assembler(new MCAssembler(Context, TAB,
@@ -30,7 +30,7 @@ MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB,
{
}
-MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB,
+MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter_,
MCAssembler *_Assembler)
: MCStreamer(Context), Assembler(_Assembler), CurSectionData(0)
diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp
index 0c1f8f0..c76052d 100644
--- a/lib/MC/MCParser/AsmLexer.cpp
+++ b/lib/MC/MCParser/AsmLexer.cpp
@@ -24,6 +24,7 @@ using namespace llvm;
AsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI) {
CurBuf = NULL;
CurPtr = NULL;
+ isAtStartOfLine = true;
}
AsmLexer::~AsmLexer() {
@@ -146,7 +147,7 @@ AsmToken AsmLexer::LexLineComment() {
// FIXME: This is broken if we happen to a comment at the end of a file, which
// was .included, and which doesn't end with a newline.
int CurChar = getNextChar();
- while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF)
+ while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)
CurChar = getNextChar();
if (CurChar == EOF)
@@ -334,6 +335,17 @@ StringRef AsmLexer::LexUntilEndOfStatement() {
return StringRef(TokStart, CurPtr-TokStart);
}
+StringRef AsmLexer::LexUntilEndOfLine() {
+ TokStart = CurPtr;
+
+ while (*CurPtr != '\n' &&
+ *CurPtr != '\r' &&
+ (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) {
+ ++CurPtr;
+ }
+ return StringRef(TokStart, CurPtr-TokStart);
+}
+
bool AsmLexer::isAtStartOfComment(char Char) {
// FIXME: This won't work for multi-character comment indicators like "//".
return Char == *MAI.getCommentString();
@@ -349,14 +361,29 @@ AsmToken AsmLexer::LexToken() {
// This always consumes at least one character.
int CurChar = getNextChar();
- if (isAtStartOfComment(CurChar))
+ if (isAtStartOfComment(CurChar)) {
+ // If this comment starts with a '#', then return the Hash token and let
+ // the assembler parser see if it can be parsed as a cpp line filename
+ // comment. We do this only if we are at the start of a line.
+ if (CurChar == '#' && isAtStartOfLine)
+ return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
+ isAtStartOfLine = true;
return LexLineComment();
+ }
if (isAtStatementSeparator(TokStart)) {
CurPtr += strlen(MAI.getSeparatorString()) - 1;
return AsmToken(AsmToken::EndOfStatement,
StringRef(TokStart, strlen(MAI.getSeparatorString())));
}
+ // If we're missing a newline at EOF, make sure we still get an
+ // EndOfStatement token before the Eof token.
+ if (CurChar == EOF && !isAtStartOfLine) {
+ isAtStartOfLine = true;
+ return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
+ }
+
+ isAtStartOfLine = false;
switch (CurChar) {
default:
// Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
@@ -373,6 +400,7 @@ AsmToken AsmLexer::LexToken() {
return LexToken();
case '\n': // FALL THROUGH.
case '\r':
+ isAtStartOfLine = true;
return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 0c181f3..1648757 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -18,22 +18,22 @@
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCParser/AsmCond.h"
#include "llvm/MC/MCParser/AsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetAsmInfo.h"
-#include "llvm/Target/TargetAsmParser.h"
#include <cctype>
#include <vector>
using namespace llvm;
@@ -87,6 +87,8 @@ private:
MCStreamer &Out;
const MCAsmInfo &MAI;
SourceMgr &SrcMgr;
+ SourceMgr::DiagHandlerTy SavedDiagHandler;
+ void *SavedDiagContext;
MCAsmParserExtension *GenericParser;
MCAsmParserExtension *PlatformParser;
@@ -115,8 +117,13 @@ private:
/// Flag tracking whether any errors have been encountered.
unsigned HadError : 1;
+ /// The values from the last parsed cpp hash file line comment if any.
+ StringRef CppHashFilename;
+ int64_t CppHashLineNumber;
+ SMLoc CppHashLoc;
+
public:
- AsmParser(const Target &T, SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
+ AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
const MCAsmInfo &MAI);
~AsmParser();
@@ -153,6 +160,8 @@ private:
void CheckForValidSection();
bool ParseStatement();
+ void EatToEndOfLine();
+ bool ParseCppHashLineFilenameComment(const SMLoc &L);
bool HandleMacroEntry(StringRef Name, SMLoc NameLoc, const Macro *M);
bool expandMacro(SmallString<256> &Buf, StringRef Body,
@@ -166,6 +175,7 @@ private:
bool ShowLine = true) const {
SrcMgr.PrintMessage(Loc, Msg, Type, ShowLine);
}
+ static void DiagHandler(const SMDiagnostic &Diag, void *Context);
/// EnterIncludeFile - Enter the specified file. This returns true on failure.
bool EnterIncludeFile(const std::string &Filename);
@@ -338,11 +348,16 @@ extern MCAsmParserExtension *createCOFFAsmParser();
enum { DEFAULT_ADDRSPACE = 0 };
-AsmParser::AsmParser(const Target &T, SourceMgr &_SM, MCContext &_Ctx,
+AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx,
MCStreamer &_Out, const MCAsmInfo &_MAI)
: Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM),
GenericParser(new GenericAsmParser), PlatformParser(0),
- CurBuffer(0), MacrosEnabled(true) {
+ CurBuffer(0), MacrosEnabled(true), CppHashLineNumber(0) {
+ // Save the old handler.
+ SavedDiagHandler = SrcMgr.getDiagHandler();
+ SavedDiagContext = SrcMgr.getDiagContext();
+ // Set our own handler which calls the saved handler.
+ SrcMgr.setDiagHandler(DiagHandler, this);
Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
// Initialize the generic parser.
@@ -740,9 +755,12 @@ AsmParser::ApplyModifierToExpr(const MCExpr *E,
/// ParseExpression - Parse an expression and return it.
///
-/// expr ::= expr +,- expr -> lowest.
-/// expr ::= expr |,^,&,! expr -> middle.
-/// expr ::= expr *,/,%,<<,>> expr -> highest.
+/// expr ::= expr &&,|| expr -> lowest.
+/// expr ::= expr |,^,&,! expr
+/// expr ::= expr ==,!=,<>,<,<=,>,>= expr
+/// expr ::= expr <<,>> expr
+/// expr ::= expr +,- expr
+/// expr ::= expr *,/,% expr -> highest.
/// expr ::= primaryexpr
///
bool AsmParser::ParseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
@@ -809,7 +827,7 @@ static unsigned getBinOpPrecedence(AsmToken::TokenKind K,
default:
return 0; // not a binop.
- // Lowest Precedence: &&, ||, @
+ // Lowest Precedence: &&, ||
case AsmToken::AmpAmp:
Kind = MCBinaryExpr::LAnd;
return 1;
@@ -852,30 +870,32 @@ static unsigned getBinOpPrecedence(AsmToken::TokenKind K,
Kind = MCBinaryExpr::GTE;
return 3;
+ // Intermediate Precedence: <<, >>
+ case AsmToken::LessLess:
+ Kind = MCBinaryExpr::Shl;
+ return 4;
+ case AsmToken::GreaterGreater:
+ Kind = MCBinaryExpr::Shr;
+ return 4;
+
// High Intermediate Precedence: +, -
case AsmToken::Plus:
Kind = MCBinaryExpr::Add;
- return 4;
+ return 5;
case AsmToken::Minus:
Kind = MCBinaryExpr::Sub;
- return 4;
+ return 5;
- // Highest Precedence: *, /, %, <<, >>
+ // Highest Precedence: *, /, %
case AsmToken::Star:
Kind = MCBinaryExpr::Mul;
- return 5;
+ return 6;
case AsmToken::Slash:
Kind = MCBinaryExpr::Div;
- return 5;
+ return 6;
case AsmToken::Percent:
Kind = MCBinaryExpr::Mod;
- return 5;
- case AsmToken::LessLess:
- Kind = MCBinaryExpr::Shl;
- return 5;
- case AsmToken::GreaterGreater:
- Kind = MCBinaryExpr::Shr;
- return 5;
+ return 6;
}
}
@@ -932,10 +952,8 @@ bool AsmParser::ParseStatement() {
StringRef IDVal;
int64_t LocalLabelVal = -1;
// A full line comment is a '#' as the first token.
- if (Lexer.is(AsmToken::Hash)) {
- EatToEndOfStatement();
- return false;
- }
+ if (Lexer.is(AsmToken::Hash))
+ return ParseCppHashLineFilenameComment(IDLoc);
// Allow an integer followed by a ':' as a directional local label.
if (Lexer.is(AsmToken::Integer)) {
@@ -1117,15 +1135,8 @@ bool AsmParser::ParseStatement() {
if (IDVal == ".globl" || IDVal == ".global")
return ParseDirectiveSymbolAttribute(MCSA_Global);
- // ELF only? Should it be here?
- if (IDVal == ".local")
- return ParseDirectiveSymbolAttribute(MCSA_Local);
- if (IDVal == ".hidden")
- return ParseDirectiveSymbolAttribute(MCSA_Hidden);
if (IDVal == ".indirect_symbol")
return ParseDirectiveSymbolAttribute(MCSA_IndirectSymbol);
- if (IDVal == ".internal")
- return ParseDirectiveSymbolAttribute(MCSA_Internal);
if (IDVal == ".lazy_reference")
return ParseDirectiveSymbolAttribute(MCSA_LazyReference);
if (IDVal == ".no_dead_strip")
@@ -1134,12 +1145,8 @@ bool AsmParser::ParseStatement() {
return ParseDirectiveSymbolAttribute(MCSA_SymbolResolver);
if (IDVal == ".private_extern")
return ParseDirectiveSymbolAttribute(MCSA_PrivateExtern);
- if (IDVal == ".protected")
- return ParseDirectiveSymbolAttribute(MCSA_Protected);
if (IDVal == ".reference")
return ParseDirectiveSymbolAttribute(MCSA_Reference);
- if (IDVal == ".weak")
- return ParseDirectiveSymbolAttribute(MCSA_Weak);
if (IDVal == ".weak_definition")
return ParseDirectiveSymbolAttribute(MCSA_WeakDefinition);
if (IDVal == ".weak_reference")
@@ -1157,7 +1164,7 @@ bool AsmParser::ParseStatement() {
if (IDVal == ".include")
return ParseDirectiveInclude();
- if (IDVal == ".code16" || IDVal == ".code32" || IDVal == ".code64")
+ if (IDVal == ".code16")
return TokError(Twine(IDVal) + " not supported yet");
// Look up the handler in the handler table.
@@ -1215,6 +1222,108 @@ bool AsmParser::ParseStatement() {
return false;
}
+/// EatToEndOfLine uses the Lexer to eat the characters to the end of the line
+/// since they may not be able to be tokenized to get to the end of line token.
+void AsmParser::EatToEndOfLine() {
+ if (!Lexer.is(AsmToken::EndOfStatement))
+ Lexer.LexUntilEndOfLine();
+ // Eat EOL.
+ Lex();
+}
+
+/// ParseCppHashLineFilenameComment as this:
+/// ::= # number "filename"
+/// or just as a full line comment if it doesn't have a number and a string.
+bool AsmParser::ParseCppHashLineFilenameComment(const SMLoc &L) {
+ Lex(); // Eat the hash token.
+
+ if (getLexer().isNot(AsmToken::Integer)) {
+ // Consume the line since in cases it is not a well-formed line directive,
+ // as if were simply a full line comment.
+ EatToEndOfLine();
+ return false;
+ }
+
+ int64_t LineNumber = getTok().getIntVal();
+ Lex();
+
+ if (getLexer().isNot(AsmToken::String)) {
+ EatToEndOfLine();
+ return false;
+ }
+
+ StringRef Filename = getTok().getString();
+ // Get rid of the enclosing quotes.
+ Filename = Filename.substr(1, Filename.size()-2);
+
+ // Save the SMLoc, Filename and LineNumber for later use by diagnostics.
+ CppHashLoc = L;
+ CppHashFilename = Filename;
+ CppHashLineNumber = LineNumber;
+
+ // Ignore any trailing characters, they're just comment.
+ EatToEndOfLine();
+ return false;
+}
+
+/// DiagHandler - will use the the last parsed cpp hash line filename comment
+/// for the Filename and LineNo if any in the diagnostic.
+void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
+ const AsmParser *Parser = static_cast<const AsmParser*>(Context);
+ raw_ostream &OS = errs();
+
+ const SourceMgr &DiagSrcMgr = *Diag.getSourceMgr();
+ const SMLoc &DiagLoc = Diag.getLoc();
+ int DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
+ int CppHashBuf = Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashLoc);
+
+ // Like SourceMgr::PrintMessage() we need to print the include stack if any
+ // before printing the message.
+ int DiagCurBuffer = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
+ if (!Parser->SavedDiagHandler && DiagCurBuffer > 0) {
+ SMLoc ParentIncludeLoc = DiagSrcMgr.getParentIncludeLoc(DiagCurBuffer);
+ DiagSrcMgr.PrintIncludeStack(ParentIncludeLoc, OS);
+ }
+
+ // If we have not parsed a cpp hash line filename comment or the source
+ // manager changed or buffer changed (like in a nested include) then just
+ // print the normal diagnostic using its Filename and LineNo.
+ if (!Parser->CppHashLineNumber ||
+ &DiagSrcMgr != &Parser->SrcMgr ||
+ DiagBuf != CppHashBuf) {
+ if (Parser->SavedDiagHandler)
+ Parser->SavedDiagHandler(Diag, Parser->SavedDiagContext);
+ else
+ Diag.Print(0, OS);
+ return;
+ }
+
+ // Use the CppHashFilename and calculate a line number based on the
+ // CppHashLoc and CppHashLineNumber relative to this Diag's SMLoc for
+ // the diagnostic.
+ const std::string Filename = Parser->CppHashFilename;
+
+ int DiagLocLineNo = DiagSrcMgr.FindLineNumber(DiagLoc, DiagBuf);
+ int CppHashLocLineNo =
+ Parser->SrcMgr.FindLineNumber(Parser->CppHashLoc, CppHashBuf);
+ int LineNo = Parser->CppHashLineNumber - 1 +
+ (DiagLocLineNo - CppHashLocLineNo);
+
+ SMDiagnostic NewDiag(*Diag.getSourceMgr(),
+ Diag.getLoc(),
+ Filename,
+ LineNo,
+ Diag.getColumnNo(),
+ Diag.getMessage(),
+ Diag.getLineContents(),
+ Diag.getShowLine());
+
+ if (Parser->SavedDiagHandler)
+ Parser->SavedDiagHandler(NewDiag, Parser->SavedDiagContext);
+ else
+ NewDiag.Print(0, OS);
+}
+
bool AsmParser::expandMacro(SmallString<256> &Buf, StringRef Body,
const std::vector<StringRef> &Parameters,
const std::vector<std::vector<AsmToken> > &A,
@@ -1923,12 +2032,17 @@ bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
StringRef Name;
+ SMLoc Loc = getTok().getLoc();
if (ParseIdentifier(Name))
- return TokError("expected identifier in directive");
+ return Error(Loc, "expected identifier in directive");
MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+ // Assembler local symbols don't make any sense here. Complain loudly.
+ if (Sym->isTemporary())
+ return Error(Loc, "non-local symbol required in directive");
+
getStreamer().EmitSymbolAttribute(Sym, Attr);
if (getLexer().is(AsmToken::EndOfStatement))
@@ -2416,7 +2530,7 @@ bool GenericAsmParser::ParseRegisterOrRegisterNumber(int64_t &Register,
if (getParser().getTargetParser().ParseRegister(RegNo, DirectiveLoc,
DirectiveLoc))
return true;
- Register = getContext().getTargetAsmInfo().getDwarfRegNum(RegNo, true);
+ Register = getContext().getRegisterInfo().getDwarfRegNum(RegNo, true);
} else
return getParser().ParseAbsoluteExpression(Register);
@@ -2724,8 +2838,8 @@ bool GenericAsmParser::ParseDirectiveLEB128(StringRef DirName, SMLoc) {
/// \brief Create an MCAsmParser instance.
-MCAsmParser *llvm::createMCAsmParser(const Target &T, SourceMgr &SM,
+MCAsmParser *llvm::createMCAsmParser(SourceMgr &SM,
MCContext &C, MCStreamer &Out,
const MCAsmInfo &MAI) {
- return new AsmParser(T, SM, C, Out, MAI);
+ return new AsmParser(SM, C, Out, MAI);
}
diff --git a/lib/MC/MCParser/CMakeLists.txt b/lib/MC/MCParser/CMakeLists.txt
index eaea9f6..299d281 100644
--- a/lib/MC/MCParser/CMakeLists.txt
+++ b/lib/MC/MCParser/CMakeLists.txt
@@ -7,5 +7,10 @@ add_llvm_library(LLVMMCParser
MCAsmLexer.cpp
MCAsmParser.cpp
MCAsmParserExtension.cpp
- TargetAsmParser.cpp
+ MCTargetAsmParser.cpp
+ )
+
+add_llvm_library_dependencies(LLVMMCParser
+ LLVMMC
+ LLVMSupport
)
diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp
index 66ad384..185b516 100644
--- a/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/lib/MC/MCParser/COFFAsmParser.cpp
@@ -8,15 +8,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/Target/TargetAsmInfo.h"
-#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/Support/COFF.h"
using namespace llvm;
@@ -72,6 +73,7 @@ class COFFAsmParser : public MCAsmParserExtension {
".seh_pushframe");
AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveEndProlog>(
".seh_endprologue");
+ AddDirectiveHandler<&COFFAsmParser::ParseDirectiveSymbolAttribute>(".weak");
}
bool ParseSectionDirectiveText(StringRef, SMLoc) {
@@ -118,12 +120,44 @@ class COFFAsmParser : public MCAsmParserExtension {
bool ParseAtUnwindOrAtExcept(bool &unwind, bool &except);
bool ParseSEHRegisterNumber(unsigned &RegNo);
+ bool ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc);
public:
COFFAsmParser() {}
};
} // end annonomous namespace.
+/// ParseDirectiveSymbolAttribute
+/// ::= { ".weak", ... } [ identifier ( , identifier )* ]
+bool COFFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
+ MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Directive)
+ .Case(".weak", MCSA_Weak)
+ .Default(MCSA_Invalid);
+ assert(Attr != MCSA_Invalid && "unexpected symbol attribute directive!");
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ StringRef Name;
+
+ if (getParser().ParseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ getStreamer().EmitSymbolAttribute(Sym, Attr);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+ }
+ }
+
+ Lex();
+ return false;
+}
+
bool COFFAsmParser::ParseSectionSwitch(StringRef Section,
unsigned Characteristics,
SectionKind Kind) {
@@ -401,12 +435,16 @@ bool COFFAsmParser::ParseAtUnwindOrAtExcept(bool &unwind, bool &except) {
bool COFFAsmParser::ParseSEHRegisterNumber(unsigned &RegNo) {
SMLoc startLoc = getLexer().getLoc();
if (getLexer().is(AsmToken::Percent)) {
- const TargetAsmInfo &TAI = getContext().getTargetAsmInfo();
+ const MCRegisterInfo &MRI = getContext().getRegisterInfo();
SMLoc endLoc;
unsigned LLVMRegNo;
if (getParser().getTargetParser().ParseRegister(LLVMRegNo,startLoc,endLoc))
return true;
+#if 0
+ // FIXME: TargetAsmInfo::getCalleeSavedRegs() commits a serious layering
+ // violation so this validation code is disabled.
+
// Check that this is a non-volatile register.
const unsigned *NVRegs = TAI.getCalleeSavedRegs();
unsigned i;
@@ -415,8 +453,9 @@ bool COFFAsmParser::ParseSEHRegisterNumber(unsigned &RegNo) {
break;
if (NVRegs[i] == 0)
return Error(startLoc, "expected non-volatile register");
+#endif
- int SEHRegNo = TAI.getSEHRegNum(LLVMRegNo);
+ int SEHRegNo = MRI.getSEHRegNum(LLVMRegNo);
if (SEHRegNo < 0)
return Error(startLoc,"register can't be represented in SEH unwind info");
RegNo = SEHRegNo;
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index dcf689a..d891126 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -47,12 +47,17 @@ public:
AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveRoData>(".rodata");
AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTData>(".tdata");
AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTBSS>(".tbss");
- AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveDataRel>(".data.rel");
- AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveDataRelRo>(".data.rel.ro");
- AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveDataRelRoLocal>(".data.rel.ro.local");
- AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveEhFrame>(".eh_frame");
+ AddDirectiveHandler<
+ &ELFAsmParser::ParseSectionDirectiveDataRel>(".data.rel");
+ AddDirectiveHandler<
+ &ELFAsmParser::ParseSectionDirectiveDataRelRo>(".data.rel.ro");
+ AddDirectiveHandler<
+ &ELFAsmParser::ParseSectionDirectiveDataRelRoLocal>(".data.rel.ro.local");
+ AddDirectiveHandler<
+ &ELFAsmParser::ParseSectionDirectiveEhFrame>(".eh_frame");
AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSection>(".section");
- AddDirectiveHandler<&ELFAsmParser::ParseDirectivePushSection>(".pushsection");
+ AddDirectiveHandler<
+ &ELFAsmParser::ParseDirectivePushSection>(".pushsection");
AddDirectiveHandler<&ELFAsmParser::ParseDirectivePopSection>(".popsection");
AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSize>(".size");
AddDirectiveHandler<&ELFAsmParser::ParseDirectivePrevious>(".previous");
@@ -60,6 +65,14 @@ public:
AddDirectiveHandler<&ELFAsmParser::ParseDirectiveIdent>(".ident");
AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSymver>(".symver");
AddDirectiveHandler<&ELFAsmParser::ParseDirectiveWeakref>(".weakref");
+ AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSymbolAttribute>(".weak");
+ AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSymbolAttribute>(".local");
+ AddDirectiveHandler<
+ &ELFAsmParser::ParseDirectiveSymbolAttribute>(".protected");
+ AddDirectiveHandler<
+ &ELFAsmParser::ParseDirectiveSymbolAttribute>(".internal");
+ AddDirectiveHandler<
+ &ELFAsmParser::ParseDirectiveSymbolAttribute>(".hidden");
}
// FIXME: Part of this logic is duplicated in the MCELFStreamer. What is
@@ -129,6 +142,7 @@ public:
bool ParseDirectiveIdent(StringRef, SMLoc);
bool ParseDirectiveSymver(StringRef, SMLoc);
bool ParseDirectiveWeakref(StringRef, SMLoc);
+ bool ParseDirectiveSymbolAttribute(StringRef, SMLoc);
private:
bool ParseSectionName(StringRef &SectionName);
@@ -136,6 +150,41 @@ private:
}
+/// ParseDirectiveSymbolAttribute
+/// ::= { ".local", ".weak", ... } [ identifier ( , identifier )* ]
+bool ELFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
+ MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Directive)
+ .Case(".weak", MCSA_Weak)
+ .Case(".local", MCSA_Local)
+ .Case(".hidden", MCSA_Hidden)
+ .Case(".internal", MCSA_Internal)
+ .Case(".protected", MCSA_Protected)
+ .Default(MCSA_Invalid);
+ assert(Attr != MCSA_Invalid && "unexpected symbol attribute directive!");
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ StringRef Name;
+
+ if (getParser().ParseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ getStreamer().EmitSymbolAttribute(Sym, Attr);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+ }
+ }
+
+ Lex();
+ return false;
+}
+
bool ELFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Type,
unsigned Flags, SectionKind Kind) {
if (getLexer().isNot(AsmToken::EndOfStatement))
diff --git a/lib/MC/MCParser/MCAsmParser.cpp b/lib/MC/MCParser/MCAsmParser.cpp
index 4030e41..5239ec7 100644
--- a/lib/MC/MCParser/MCAsmParser.cpp
+++ b/lib/MC/MCParser/MCAsmParser.cpp
@@ -8,13 +8,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCParser/MCAsmParser.h"
-#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/ADT/Twine.h"
using namespace llvm;
MCAsmParser::MCAsmParser() : TargetParser(0), ShowParsedOperands(0) {
@@ -23,7 +23,7 @@ MCAsmParser::MCAsmParser() : TargetParser(0), ShowParsedOperands(0) {
MCAsmParser::~MCAsmParser() {
}
-void MCAsmParser::setTargetParser(TargetAsmParser &P) {
+void MCAsmParser::setTargetParser(MCTargetAsmParser &P) {
assert(!TargetParser && "Target parser is already initialized!");
TargetParser = &P;
TargetParser->Initialize(*this);
diff --git a/lib/MC/MCParser/TargetAsmParser.cpp b/lib/MC/MCParser/MCTargetAsmParser.cpp
index 512f6b0..6fb1ba4 100644
--- a/lib/MC/MCParser/TargetAsmParser.cpp
+++ b/lib/MC/MCParser/MCTargetAsmParser.cpp
@@ -1,4 +1,4 @@
-//===-- TargetAsmParser.cpp - Target Assembly Parser -----------------------==//
+//===-- MCTargetAsmParser.cpp - Target Assembly Parser ---------------------==//
//
// The LLVM Compiler Infrastructure
//
@@ -7,13 +7,13 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/MC/MCTargetAsmParser.h"
using namespace llvm;
-TargetAsmParser::TargetAsmParser()
+MCTargetAsmParser::MCTargetAsmParser()
: AvailableFeatures(0)
{
}
-TargetAsmParser::~TargetAsmParser() {
+MCTargetAsmParser::~MCTargetAsmParser() {
}
diff --git a/lib/MC/MCPureStreamer.cpp b/lib/MC/MCPureStreamer.cpp
index 6098e6b..086c922 100644
--- a/lib/MC/MCPureStreamer.cpp
+++ b/lib/MC/MCPureStreamer.cpp
@@ -28,7 +28,7 @@ private:
virtual void EmitInstToData(const MCInst &Inst);
public:
- MCPureStreamer(MCContext &Context, TargetAsmBackend &TAB,
+ MCPureStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter)
: MCObjectStreamer(Context, TAB, OS, Emitter) {}
@@ -86,7 +86,8 @@ public:
virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
report_fatal_error("unsupported directive in pure streamer");
}
- virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
report_fatal_error("unsupported directive in pure streamer");
}
virtual void EmitFileDirective(StringRef Filename) {
@@ -228,7 +229,7 @@ void MCPureStreamer::Finish() {
this->MCObjectStreamer::Finish();
}
-MCStreamer *llvm::createPureStreamer(MCContext &Context, TargetAsmBackend &TAB,
+MCStreamer *llvm::createPureStreamer(MCContext &Context, MCAsmBackend &MAB,
raw_ostream &OS, MCCodeEmitter *CE) {
- return new MCPureStreamer(Context, TAB, OS, CE);
+ return new MCPureStreamer(Context, MAB, OS, CE);
}
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index 6e96b78..3afa22b 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -16,13 +16,17 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include <cstdlib>
using namespace llvm;
MCStreamer::MCStreamer(MCContext &Ctx) : Context(Ctx), EmitEHFrame(true),
EmitDebugFrame(false),
- CurrentW64UnwindInfo(0) {
+ CurrentW64UnwindInfo(0),
+ LastSymbol(0),
+ UniqueCodeBeginSuffix(0),
+ UniqueDataBeginSuffix(0) {
const MCSection *section = NULL;
SectionStack.push_back(std::make_pair(section, section));
}
@@ -171,10 +175,94 @@ void MCStreamer::EmitLabel(MCSymbol *Symbol) {
assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
assert(getCurrentSection() && "Cannot emit before setting section!");
Symbol->setSection(*getCurrentSection());
+ LastSymbol = Symbol;
+}
- StringRef Prefix = getContext().getAsmInfo().getPrivateGlobalPrefix();
- if (!Symbol->getName().startswith(Prefix))
- LastNonPrivate = Symbol;
+void MCStreamer::EmitDataRegion() {
+ if (RegionIndicator == Data) return;
+
+ MCContext &Context = getContext();
+ const MCAsmInfo &MAI = Context.getAsmInfo();
+ if (!MAI.getSupportsDataRegions()) return;
+
+ // Generate a unique symbol name.
+ MCSymbol *NewSym = Context.GetOrCreateSymbol(
+ Twine(MAI.getDataBeginLabelName()) +
+ utostr(UniqueDataBeginSuffix++));
+ EmitLabel(NewSym);
+
+ RegionIndicator = Data;
+}
+
+void MCStreamer::EmitCodeRegion() {
+ if (RegionIndicator == Code) return;
+
+ MCContext &Context = getContext();
+ const MCAsmInfo &MAI = Context.getAsmInfo();
+ if (!MAI.getSupportsDataRegions()) return;
+
+ // Generate a unique symbol name.
+ MCSymbol *NewSym = Context.GetOrCreateSymbol(
+ Twine(MAI.getCodeBeginLabelName()) +
+ utostr(UniqueCodeBeginSuffix++));
+ EmitLabel(NewSym);
+
+ RegionIndicator = Code;
+}
+
+void MCStreamer::EmitJumpTable8Region() {
+ if (RegionIndicator == JumpTable8) return;
+
+ MCContext &Context = getContext();
+ const MCAsmInfo &MAI = Context.getAsmInfo();
+ if (!MAI.getSupportsDataRegions()) return;
+
+ // Generate a unique symbol name.
+ MCSymbol *NewSym = Context.GetOrCreateSymbol(
+ Twine(MAI.getJumpTable8BeginLabelName()) +
+ utostr(UniqueDataBeginSuffix++));
+ EmitLabel(NewSym);
+
+ RegionIndicator = JumpTable8;
+}
+
+void MCStreamer::EmitJumpTable16Region() {
+ if (RegionIndicator == JumpTable16) return;
+
+ MCContext &Context = getContext();
+ const MCAsmInfo &MAI = Context.getAsmInfo();
+ if (!MAI.getSupportsDataRegions()) return;
+
+ // Generate a unique symbol name.
+ MCSymbol *NewSym = Context.GetOrCreateSymbol(
+ Twine(MAI.getJumpTable16BeginLabelName()) +
+ utostr(UniqueDataBeginSuffix++));
+ EmitLabel(NewSym);
+
+ RegionIndicator = JumpTable16;
+}
+
+
+void MCStreamer::EmitJumpTable32Region() {
+ if (RegionIndicator == JumpTable32) return;
+
+ MCContext &Context = getContext();
+ const MCAsmInfo &MAI = Context.getAsmInfo();
+ if (!MAI.getSupportsDataRegions()) return;
+
+ // Generate a unique symbol name.
+ MCSymbol *NewSym = Context.GetOrCreateSymbol(
+ Twine(MAI.getJumpTable32BeginLabelName()) +
+ utostr(UniqueDataBeginSuffix++));
+ EmitLabel(NewSym);
+
+ RegionIndicator = JumpTable32;
+}
+
+void MCStreamer::EmitCompactUnwindEncoding(uint32_t CompactUnwindEncoding) {
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ CurFrame->CompactUnwindEncoding = CompactUnwindEncoding;
}
void MCStreamer::EmitCFISections(bool EH, bool Debug) {
@@ -187,11 +275,22 @@ void MCStreamer::EmitCFIStartProc() {
MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
if (CurFrame && !CurFrame->End)
report_fatal_error("Starting a frame before finishing the previous one!");
+
MCDwarfFrameInfo Frame;
- Frame.Begin = getContext().CreateTempSymbol();
- Frame.Function = LastNonPrivate;
- EmitLabel(Frame.Begin);
+ Frame.Function = LastSymbol;
+
+ // If the function is externally visible, we need to create a local
+ // symbol to avoid relocations.
+ StringRef Prefix = getContext().getAsmInfo().getPrivateGlobalPrefix();
+ if (LastSymbol && LastSymbol->getName().startswith(Prefix)) {
+ Frame.Begin = LastSymbol;
+ } else {
+ Frame.Begin = getContext().CreateTempSymbol();
+ EmitLabel(Frame.Begin);
+ }
+
FrameInfos.push_back(Frame);
+ RegionIndicator = Code;
}
void MCStreamer::EmitCFIEndProc() {
diff --git a/lib/Target/TargetAsmLexer.cpp b/lib/MC/MCTargetAsmLexer.cpp
index d4893ff..c01c914 100644
--- a/lib/Target/TargetAsmLexer.cpp
+++ b/lib/MC/MCTargetAsmLexer.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/Target/TargetAsmLexer.cpp - Target Assembly Lexer ------------===//
+//===-- llvm/MC/MCTargetAsmLexer.cpp - Target Assembly Lexer --------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,8 +7,10 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Target/TargetAsmLexer.h"
+#include "llvm/MC/MCTargetAsmLexer.h"
using namespace llvm;
-TargetAsmLexer::TargetAsmLexer(const Target &T) : TheTarget(T), Lexer(NULL) {}
-TargetAsmLexer::~TargetAsmLexer() {}
+MCTargetAsmLexer::MCTargetAsmLexer(const Target &T)
+ : TheTarget(T), Lexer(NULL) {
+}
+MCTargetAsmLexer::~MCTargetAsmLexer() {}
diff --git a/lib/MC/MCWin64EH.cpp b/lib/MC/MCWin64EH.cpp
index e698384..79e66fc 100644
--- a/lib/MC/MCWin64EH.cpp
+++ b/lib/MC/MCWin64EH.cpp
@@ -10,10 +10,11 @@
#include "llvm/MC/MCWin64EH.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/ADT/Twine.h"
namespace llvm {
@@ -220,14 +221,36 @@ StringRef MCWin64EHUnwindEmitter::GetSectionSuffix(const MCSymbol *func) {
return "";
}
+static const MCSection *getWin64EHTableSection(StringRef suffix,
+ MCContext &context) {
+ if (suffix == "")
+ return context.getObjectFileInfo()->getXDataSection();
+
+ return context.getCOFFSection((".xdata"+suffix).str(),
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+}
+
+static const MCSection *getWin64EHFuncTableSection(StringRef suffix,
+ MCContext &context) {
+ if (suffix == "")
+ return context.getObjectFileInfo()->getPDataSection();
+ return context.getCOFFSection((".pdata"+suffix).str(),
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+}
+
void MCWin64EHUnwindEmitter::EmitUnwindInfo(MCStreamer &streamer,
MCWin64EHUnwindInfo *info) {
// Switch sections (the static function above is meant to be called from
// here and from Emit().
MCContext &context = streamer.getContext();
- const TargetAsmInfo &TAI = context.getTargetAsmInfo();
const MCSection *xdataSect =
- TAI.getWin64EHTableSection(GetSectionSuffix(info->Function));
+ getWin64EHTableSection(GetSectionSuffix(info->Function), context);
streamer.SwitchSection(xdataSect);
llvm::EmitUnwindInfo(streamer, info);
@@ -236,11 +259,10 @@ void MCWin64EHUnwindEmitter::EmitUnwindInfo(MCStreamer &streamer,
void MCWin64EHUnwindEmitter::Emit(MCStreamer &streamer) {
MCContext &context = streamer.getContext();
// Emit the unwind info structs first.
- const TargetAsmInfo &TAI = context.getTargetAsmInfo();
for (unsigned i = 0; i < streamer.getNumW64UnwindInfos(); ++i) {
MCWin64EHUnwindInfo &info = streamer.getW64UnwindInfo(i);
const MCSection *xdataSect =
- TAI.getWin64EHTableSection(GetSectionSuffix(info.Function));
+ getWin64EHTableSection(GetSectionSuffix(info.Function), context);
streamer.SwitchSection(xdataSect);
llvm::EmitUnwindInfo(streamer, &info);
}
@@ -248,7 +270,7 @@ void MCWin64EHUnwindEmitter::Emit(MCStreamer &streamer) {
for (unsigned i = 0; i < streamer.getNumW64UnwindInfos(); ++i) {
MCWin64EHUnwindInfo &info = streamer.getW64UnwindInfo(i);
const MCSection *pdataSect =
- TAI.getWin64EHFuncTableSection(GetSectionSuffix(info.Function));
+ getWin64EHFuncTableSection(GetSectionSuffix(info.Function), context);
streamer.SwitchSection(pdataSect);
EmitRuntimeFunction(streamer, &info);
}
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index 69efe23..a9219ad 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -12,6 +12,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCObjectWriter.h"
@@ -21,7 +22,6 @@
#include "llvm/MC/MCValue.h"
#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetAsmBackend.h"
#include <vector>
using namespace llvm;
@@ -291,7 +291,7 @@ void MachObjectWriter::WriteNlist(MachSymbolData &MSD,
const MCSymbol &Symbol = Data.getSymbol();
uint8_t Type = 0;
uint16_t Flags = Data.getFlags();
- uint32_t Address = 0;
+ uint64_t Address = 0;
// Set the N_TYPE bits. See <mach-o/nlist.h>.
//
@@ -590,14 +590,28 @@ IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
return false;
return true;
}
+ // For Darwin x86_64, there is one special case when the reference IsPCRel.
+ // If the fragment with the reference does not have a base symbol but meets
+ // the simple way of dealing with this, in that it is a temporary symbol in
+ // the same atom then it is assumed to be fully resolved. This is needed so
+ // a relocation entry is not created and so the static linker does not
+ // mess up the reference later.
+ else if(!FB.getAtom() &&
+ SA.isTemporary() && SA.isInSection() && &SecA == &SecB){
+ return true;
+ }
} else {
if (!TargetObjectWriter->useAggressiveSymbolFolding())
return false;
}
- const MCFragment &FA = *Asm.getSymbolData(SA).getFragment();
+ const MCFragment *FA = Asm.getSymbolData(SA).getFragment();
+
+ // Bail if the symbol has no fragment.
+ if (!FA)
+ return false;
- A_Base = FA.getAtom();
+ A_Base = FA->getAtom();
if (!A_Base)
return false;
@@ -613,7 +627,8 @@ IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
return false;
}
-void MachObjectWriter::WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) {
+void MachObjectWriter::WriteObject(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
unsigned NumSections = Asm.size();
// The section data starts after the header, the segment load command (and
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index 101237a..b15e225 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -33,7 +33,7 @@
#include "llvm/Support/TimeValue.h"
-#include "../Target/X86/X86FixupKinds.h"
+#include "../Target/X86/MCTargetDesc/X86FixupKinds.h"
#include <cstdio>
diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp
index 6c36c12..7409daf 100644
--- a/lib/MC/WinCOFFStreamer.cpp
+++ b/lib/MC/WinCOFFStreamer.cpp
@@ -24,13 +24,13 @@
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCWin64EH.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/MC/MCAsmBackend.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/COFF.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -40,7 +40,7 @@ public:
MCSymbol const *CurSymbol;
WinCOFFStreamer(MCContext &Context,
- TargetAsmBackend &TAB,
+ MCAsmBackend &MAB,
MCCodeEmitter &CE,
raw_ostream &OS);
@@ -63,7 +63,8 @@ public:
virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment);
- virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment);
virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
unsigned Size,unsigned ByteAlignment);
virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
@@ -123,10 +124,10 @@ private:
} // end anonymous namespace.
WinCOFFStreamer::WinCOFFStreamer(MCContext &Context,
- TargetAsmBackend &TAB,
+ MCAsmBackend &MAB,
MCCodeEmitter &CE,
raw_ostream &OS)
- : MCObjectStreamer(Context, TAB, OS, &CE)
+ : MCObjectStreamer(Context, MAB, OS, &CE)
, CurSymbol(NULL) {
}
@@ -304,11 +305,12 @@ void WinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
AddCommonSymbol(Symbol, Size, ByteAlignment, true);
}
-void WinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+void WinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
assert((Symbol->isInSection()
? Symbol->getSection().getVariant() == MCSection::SV_COFF
: true) && "Got non COFF section in the COFF backend!");
- AddCommonSymbol(Symbol, Size, 1, false);
+ AddCommonSymbol(Symbol, Size, ByteAlignment, false);
}
void WinCOFFStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
@@ -395,11 +397,11 @@ void WinCOFFStreamer::Finish() {
namespace llvm
{
MCStreamer *createWinCOFFStreamer(MCContext &Context,
- TargetAsmBackend &TAB,
+ MCAsmBackend &MAB,
MCCodeEmitter &CE,
raw_ostream &OS,
bool RelaxAll) {
- WinCOFFStreamer *S = new WinCOFFStreamer(Context, TAB, CE, OS);
+ WinCOFFStreamer *S = new WinCOFFStreamer(Context, MAB, CE, OS);
S->getAssembler().setRelaxAll(RelaxAll);
return S;
}
diff --git a/lib/Makefile b/lib/Makefile
index ed27854..fd575cd 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -11,7 +11,7 @@ LEVEL = ..
include $(LEVEL)/Makefile.config
PARALLEL_DIRS := VMCore AsmParser Bitcode Archive Analysis Transforms CodeGen \
- Target ExecutionEngine Linker MC CompilerDriver Object
+ Target ExecutionEngine Linker MC Object DebugInfo
include $(LEVEL)/Makefile.common
diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp
new file mode 100644
index 0000000..e2eaff5
--- /dev/null
+++ b/lib/Object/Archive.cpp
@@ -0,0 +1,172 @@
+//===- Archive.cpp - ar File Format implementation --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ArchiveObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/Archive.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+using namespace llvm;
+using namespace object;
+
+namespace {
+const StringRef Magic = "!<arch>\n";
+
+struct ArchiveMemberHeader {
+ char Name[16];
+ char LastModified[12];
+ char UID[6];
+ char GID[6];
+ char AccessMode[8];
+ char Size[10]; //< Size of data, not including header or padding.
+ char Terminator[2];
+
+ ///! Get the name without looking up long names.
+ StringRef getName() const {
+ char EndCond = Name[0] == '/' ? ' ' : '/';
+ StringRef::size_type end = StringRef(Name, sizeof(Name)).find(EndCond);
+ if (end == StringRef::npos)
+ end = sizeof(Name);
+ assert(end <= sizeof(Name) && end > 0);
+ // Don't include the EndCond if there is one.
+ return StringRef(Name, end);
+ }
+
+ uint64_t getSize() const {
+ APInt ret;
+ StringRef(Size, sizeof(Size)).getAsInteger(10, ret);
+ return ret.getZExtValue();
+ }
+};
+
+const ArchiveMemberHeader *ToHeader(const char *base) {
+ return reinterpret_cast<const ArchiveMemberHeader *>(base);
+}
+}
+
+Archive::Child Archive::Child::getNext() const {
+ size_t SpaceToSkip = sizeof(ArchiveMemberHeader) +
+ ToHeader(Data.data())->getSize();
+ // If it's odd, add 1 to make it even.
+ if (SpaceToSkip & 1)
+ ++SpaceToSkip;
+
+ const char *NextLoc = Data.data() + SpaceToSkip;
+
+ // Check to see if this is past the end of the archive.
+ if (NextLoc >= Parent->Data->getBufferEnd())
+ return Child(Parent, StringRef(0, 0));
+
+ size_t NextSize = sizeof(ArchiveMemberHeader) +
+ ToHeader(NextLoc)->getSize();
+
+ return Child(Parent, StringRef(NextLoc, NextSize));
+}
+
+error_code Archive::Child::getName(StringRef &Result) const {
+ StringRef name = ToHeader(Data.data())->getName();
+ // Check if it's a special name.
+ if (name[0] == '/') {
+ if (name.size() == 1) { // Linker member.
+ Result = name;
+ return object_error::success;
+ }
+ if (name.size() == 2 && name[1] == '/') { // String table.
+ Result = name;
+ return object_error::success;
+ }
+ // It's a long name.
+ // Get the offset.
+ APInt offset;
+ name.substr(1).getAsInteger(10, offset);
+ const char *addr = Parent->StringTable->Data.begin()
+ + sizeof(ArchiveMemberHeader)
+ + offset.getZExtValue();
+ // Verify it.
+ if (Parent->StringTable == Parent->end_children()
+ || addr < (Parent->StringTable->Data.begin()
+ + sizeof(ArchiveMemberHeader))
+ || addr > (Parent->StringTable->Data.begin()
+ + sizeof(ArchiveMemberHeader)
+ + Parent->StringTable->getSize()))
+ return object_error::parse_failed;
+ Result = addr;
+ return object_error::success;
+ }
+ // It's a simple name.
+ if (name[name.size() - 1] == '/')
+ Result = name.substr(0, name.size() - 1);
+ else
+ Result = name;
+ return object_error::success;
+}
+
+uint64_t Archive::Child::getSize() const {
+ return ToHeader(Data.data())->getSize();
+}
+
+MemoryBuffer *Archive::Child::getBuffer() const {
+ StringRef name;
+ if (getName(name)) return NULL;
+ return MemoryBuffer::getMemBuffer(Data.substr(sizeof(ArchiveMemberHeader),
+ getSize()),
+ name,
+ false);
+}
+
+error_code Archive::Child::getAsBinary(OwningPtr<Binary> &Result) const {
+ OwningPtr<Binary> ret;
+ if (error_code ec =
+ createBinary(getBuffer(), ret))
+ return ec;
+ Result.swap(ret);
+ return object_error::success;
+}
+
+Archive::Archive(MemoryBuffer *source, error_code &ec)
+ : Binary(Binary::isArchive, source)
+ , StringTable(Child(this, StringRef(0, 0))) {
+ // Check for sufficient magic.
+ if (!source || source->getBufferSize()
+ < (8 + sizeof(ArchiveMemberHeader) + 2) // Smallest archive.
+ || StringRef(source->getBufferStart(), 8) != Magic) {
+ ec = object_error::invalid_file_type;
+ return;
+ }
+
+ // Get the string table. It's the 3rd member.
+ child_iterator StrTable = begin_children();
+ child_iterator e = end_children();
+ for (int i = 0; StrTable != e && i < 2; ++StrTable, ++i) {}
+
+ // Check to see if there were 3 members, or the 3rd member wasn't named "//".
+ StringRef name;
+ if (StrTable != e && !StrTable->getName(name) && name == "//")
+ StringTable = StrTable;
+
+ ec = object_error::success;
+}
+
+Archive::child_iterator Archive::begin_children() const {
+ const char *Loc = Data->getBufferStart() + Magic.size();
+ size_t Size = sizeof(ArchiveMemberHeader) +
+ ToHeader(Loc)->getSize();
+ return Child(this, StringRef(Loc, Size));
+}
+
+Archive::child_iterator Archive::end_children() const {
+ return Child(this, StringRef(0, 0));
+}
+
+namespace llvm {
+
+} // end namespace llvm
diff --git a/lib/Object/Binary.cpp b/lib/Object/Binary.cpp
index 4b31c75..4e528d8 100644
--- a/lib/Object/Binary.cpp
+++ b/lib/Object/Binary.cpp
@@ -17,8 +17,9 @@
#include "llvm/Support/Path.h"
// Include headers for createBinary.
-#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/Archive.h"
#include "llvm/Object/COFF.h"
+#include "llvm/Object/ObjectFile.h"
using namespace llvm;
using namespace object;
@@ -50,6 +51,12 @@ error_code object::createBinary(MemoryBuffer *Source,
static_cast<unsigned>(Source->getBufferSize()));
error_code ec;
switch (type) {
+ case sys::Archive_FileType: {
+ OwningPtr<Binary> ret(new Archive(scopedSource.take(), ec));
+ if (ec) return ec;
+ Result.swap(ret);
+ return object_error::success;
+ }
case sys::ELF_Relocatable_FileType:
case sys::ELF_Executable_FileType:
case sys::ELF_SharedObject_FileType:
@@ -90,7 +97,7 @@ error_code object::createBinary(MemoryBuffer *Source,
error_code object::createBinary(StringRef Path, OwningPtr<Binary> &Result) {
OwningPtr<MemoryBuffer> File;
- if (error_code ec = MemoryBuffer::getFile(Path, File))
+ if (error_code ec = MemoryBuffer::getFileOrSTDIN(Path, File))
return ec;
return createBinary(File.take(), Result);
}
diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt
index 68e5e94..86eb51a 100644
--- a/lib/Object/CMakeLists.txt
+++ b/lib/Object/CMakeLists.txt
@@ -1,4 +1,5 @@
add_llvm_library(LLVMObject
+ Archive.cpp
Binary.cpp
COFFObjectFile.cpp
ELFObjectFile.cpp
@@ -8,3 +9,8 @@ add_llvm_library(LLVMObject
Object.cpp
ObjectFile.cpp
)
+
+add_llvm_library_dependencies(LLVMObject
+ LLVMCore
+ LLVMSupport
+ )
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index 07de6bc..750c34d 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Object/COFF.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
@@ -114,7 +115,7 @@ error_code COFFObjectFile::getSymbolNext(DataRefImpl Symb,
return object_error::success;
}
-error_code COFFObjectFile::getSymbolAddress(DataRefImpl Symb,
+error_code COFFObjectFile::getSymbolOffset(DataRefImpl Symb,
uint64_t &Result) const {
const coff_symbol *symb = toSymb(Symb);
const coff_section *Section = NULL;
@@ -132,6 +133,55 @@ error_code COFFObjectFile::getSymbolAddress(DataRefImpl Symb,
return object_error::success;
}
+error_code COFFObjectFile::getSymbolAddress(DataRefImpl Symb,
+ uint64_t &Result) const {
+ const coff_symbol *symb = toSymb(Symb);
+ const coff_section *Section = NULL;
+ if (error_code ec = getSection(symb->SectionNumber, Section))
+ return ec;
+ char Type;
+ if (error_code ec = getSymbolNMTypeChar(Symb, Type))
+ return ec;
+ if (Type == 'U' || Type == 'w')
+ Result = UnknownAddressOrSize;
+ else if (Section)
+ Result = reinterpret_cast<uintptr_t>(base() +
+ Section->PointerToRawData +
+ symb->Value);
+ else
+ Result = reinterpret_cast<uintptr_t>(base() + symb->Value);
+ return object_error::success;
+}
+
+error_code COFFObjectFile::getSymbolType(DataRefImpl Symb,
+ SymbolRef::SymbolType &Result) const {
+ const coff_symbol *symb = toSymb(Symb);
+ Result = SymbolRef::ST_Other;
+ if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL &&
+ symb->SectionNumber == COFF::IMAGE_SYM_UNDEFINED) {
+ Result = SymbolRef::ST_External;
+ } else {
+ if (symb->Type.ComplexType == COFF::IMAGE_SYM_DTYPE_FUNCTION) {
+ Result = SymbolRef::ST_Function;
+ } else {
+ char Type;
+ if (error_code ec = getSymbolNMTypeChar(Symb, Type))
+ return ec;
+ if (Type == 'r' || Type == 'R') {
+ Result = SymbolRef::ST_Data;
+ }
+ }
+ }
+ return object_error::success;
+}
+
+error_code COFFObjectFile::isSymbolGlobal(DataRefImpl Symb,
+ bool &Result) const {
+ const coff_symbol *symb = toSymb(Symb);
+ Result = (symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL);
+ return object_error::success;
+}
+
error_code COFFObjectFile::getSymbolSize(DataRefImpl Symb,
uint64_t &Result) const {
// FIXME: Return the correct size. This requires looking at all the symbols
@@ -286,6 +336,15 @@ error_code COFFObjectFile::getSectionContents(DataRefImpl Sec,
return object_error::success;
}
+error_code COFFObjectFile::getSectionAlignment(DataRefImpl Sec,
+ uint64_t &Res) const {
+ const coff_section *sec = toSec(Sec);
+ if (!sec)
+ return object_error::parse_failed;
+ Res = uint64_t(1) << (((sec->Characteristics & 0x00F00000) >> 20) - 1);
+ return object_error::success;
+}
+
error_code COFFObjectFile::isSectionText(DataRefImpl Sec,
bool &Result) const {
const coff_section *sec = toSec(Sec);
@@ -293,14 +352,61 @@ error_code COFFObjectFile::isSectionText(DataRefImpl Sec,
return object_error::success;
}
+error_code COFFObjectFile::isSectionData(DataRefImpl Sec,
+ bool &Result) const {
+ const coff_section *sec = toSec(Sec);
+ Result = sec->Characteristics & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA;
+ return object_error::success;
+}
+
+error_code COFFObjectFile::isSectionBSS(DataRefImpl Sec,
+ bool &Result) const {
+ const coff_section *sec = toSec(Sec);
+ Result = sec->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
+ return object_error::success;
+}
+
error_code COFFObjectFile::sectionContainsSymbol(DataRefImpl Sec,
DataRefImpl Symb,
bool &Result) const {
- // FIXME: Unimplemented.
- Result = false;
+ const coff_section *sec = toSec(Sec);
+ const coff_symbol *symb = toSymb(Symb);
+ const coff_section *symb_sec;
+ if (error_code ec = getSection(symb->SectionNumber, symb_sec)) return ec;
+ if (symb_sec == sec)
+ Result = true;
+ else
+ Result = false;
return object_error::success;
}
+relocation_iterator COFFObjectFile::getSectionRelBegin(DataRefImpl Sec) const {
+ const coff_section *sec = toSec(Sec);
+ DataRefImpl ret;
+ std::memset(&ret, 0, sizeof(ret));
+ if (sec->NumberOfRelocations == 0)
+ ret.p = 0;
+ else
+ ret.p = reinterpret_cast<uintptr_t>(base() + sec->PointerToRelocations);
+
+ return relocation_iterator(RelocationRef(ret, this));
+}
+
+relocation_iterator COFFObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
+ const coff_section *sec = toSec(Sec);
+ DataRefImpl ret;
+ std::memset(&ret, 0, sizeof(ret));
+ if (sec->NumberOfRelocations == 0)
+ ret.p = 0;
+ else
+ ret.p = reinterpret_cast<uintptr_t>(
+ reinterpret_cast<const coff_relocation*>(
+ base() + sec->PointerToRelocations)
+ + sec->NumberOfRelocations);
+
+ return relocation_iterator(RelocationRef(ret, this));
+}
+
COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec)
: ObjectFile(Binary::isCOFF, Object, ec) {
// Check that we at least have enough room for a header.
@@ -327,7 +433,7 @@ COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec)
Header = reinterpret_cast<const coff_file_header *>(base() + HeaderStart);
if (!checkAddr(Data, ec, uintptr_t(Header), sizeof(coff_file_header)))
return;
-
+
SectionTable =
reinterpret_cast<const coff_section *>( base()
+ HeaderStart
@@ -360,18 +466,18 @@ COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec)
ec = object_error::parse_failed;
return;
}
-
+
ec = object_error::success;
}
-ObjectFile::symbol_iterator COFFObjectFile::begin_symbols() const {
+symbol_iterator COFFObjectFile::begin_symbols() const {
DataRefImpl ret;
std::memset(&ret, 0, sizeof(DataRefImpl));
ret.p = reinterpret_cast<intptr_t>(SymbolTable);
return symbol_iterator(SymbolRef(ret, this));
}
-ObjectFile::symbol_iterator COFFObjectFile::end_symbols() const {
+symbol_iterator COFFObjectFile::end_symbols() const {
// The symbol table ends where the string table begins.
DataRefImpl ret;
std::memset(&ret, 0, sizeof(DataRefImpl));
@@ -379,14 +485,14 @@ ObjectFile::symbol_iterator COFFObjectFile::end_symbols() const {
return symbol_iterator(SymbolRef(ret, this));
}
-ObjectFile::section_iterator COFFObjectFile::begin_sections() const {
+section_iterator COFFObjectFile::begin_sections() const {
DataRefImpl ret;
std::memset(&ret, 0, sizeof(DataRefImpl));
ret.p = reinterpret_cast<intptr_t>(SectionTable);
return section_iterator(SectionRef(ret, this));
}
-ObjectFile::section_iterator COFFObjectFile::end_sections() const {
+section_iterator COFFObjectFile::end_sections() const {
DataRefImpl ret;
std::memset(&ret, 0, sizeof(DataRefImpl));
ret.p = reinterpret_cast<intptr_t>(SectionTable + Header->NumberOfSections);
@@ -445,6 +551,121 @@ error_code COFFObjectFile::getString(uint32_t offset,
return object_error::success;
}
+error_code COFFObjectFile::getSymbol(uint32_t index,
+ const coff_symbol *&Result) const {
+ if (index > 0 && index < Header->NumberOfSymbols)
+ Result = SymbolTable + index;
+ else
+ return object_error::parse_failed;
+ return object_error::success;
+}
+
+const coff_relocation *COFFObjectFile::toRel(DataRefImpl Rel) const {
+ return reinterpret_cast<const coff_relocation*>(Rel.p);
+}
+error_code COFFObjectFile::getRelocationNext(DataRefImpl Rel,
+ RelocationRef &Res) const {
+ Rel.p = reinterpret_cast<uintptr_t>(
+ reinterpret_cast<const coff_relocation*>(Rel.p) + 1);
+ Res = RelocationRef(Rel, this);
+ return object_error::success;
+}
+error_code COFFObjectFile::getRelocationAddress(DataRefImpl Rel,
+ uint64_t &Res) const {
+ Res = toRel(Rel)->VirtualAddress;
+ return object_error::success;
+}
+error_code COFFObjectFile::getRelocationSymbol(DataRefImpl Rel,
+ SymbolRef &Res) const {
+ const coff_relocation* R = toRel(Rel);
+ DataRefImpl Symb;
+ Symb.p = reinterpret_cast<uintptr_t>(SymbolTable + R->SymbolTableIndex);
+ Res = SymbolRef(Symb, this);
+ return object_error::success;
+}
+error_code COFFObjectFile::getRelocationType(DataRefImpl Rel,
+ uint32_t &Res) const {
+ const coff_relocation* R = toRel(Rel);
+ Res = R->Type;
+ return object_error::success;
+}
+
+#define LLVM_COFF_SWITCH_RELOC_TYPE_NAME(enum) \
+ case COFF::enum: res = #enum; break;
+
+error_code COFFObjectFile::getRelocationTypeName(DataRefImpl Rel,
+ SmallVectorImpl<char> &Result) const {
+ const coff_relocation *reloc = toRel(Rel);
+ StringRef res;
+ switch (Header->Machine) {
+ case COFF::IMAGE_FILE_MACHINE_AMD64:
+ switch (reloc->Type) {
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ABSOLUTE);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ADDR64);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ADDR32);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ADDR32NB);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_1);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_2);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_3);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_4);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_5);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SECTION);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SECREL);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SECREL7);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_TOKEN);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SREL32);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_PAIR);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SSPAN32);
+ default:
+ res = "Unknown";
+ }
+ break;
+ case COFF::IMAGE_FILE_MACHINE_I386:
+ switch (reloc->Type) {
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_ABSOLUTE);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_DIR16);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_REL16);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_DIR32);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_DIR32NB);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_SEG12);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_SECTION);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_SECREL);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_TOKEN);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_SECREL7);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_REL32);
+ default:
+ res = "Unknown";
+ }
+ break;
+ default:
+ res = "Unknown";
+ }
+ Result.append(res.begin(), res.end());
+ return object_error::success;
+}
+
+#undef LLVM_COFF_SWITCH_RELOC_TYPE_NAME
+
+error_code COFFObjectFile::getRelocationAdditionalInfo(DataRefImpl Rel,
+ int64_t &Res) const {
+ Res = 0;
+ return object_error::success;
+}
+error_code COFFObjectFile::getRelocationValueString(DataRefImpl Rel,
+ SmallVectorImpl<char> &Result) const {
+ const coff_relocation *reloc = toRel(Rel);
+ const coff_symbol *symb = 0;
+ if (error_code ec = getSymbol(reloc->SymbolTableIndex, symb)) return ec;
+ DataRefImpl sym;
+ ::memset(&sym, 0, sizeof(sym));
+ sym.p = reinterpret_cast<uintptr_t>(symb);
+ StringRef symname;
+ if (error_code ec = getSymbolName(sym, symname)) return ec;
+ Result.append(symname.begin(), symname.end());
+ return object_error::success;
+}
+
namespace llvm {
ObjectFile *ObjectFile::createCOFFObjectFile(MemoryBuffer *Object) {
diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp
index e2ff4df..257d08c 100644
--- a/lib/Object/ELFObjectFile.cpp
+++ b/lib/Object/ELFObjectFile.cpp
@@ -14,11 +14,14 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
#include <limits>
#include <utility>
@@ -176,12 +179,89 @@ struct Elf_Sym_Impl : Elf_Sym_Base<target_endianness, is64Bits> {
}
namespace {
+template<support::endianness target_endianness, bool is64Bits, bool isRela>
+struct Elf_Rel_Base;
+
+template<support::endianness target_endianness>
+struct Elf_Rel_Base<target_endianness, false, false> {
+ LLVM_ELF_IMPORT_TYPES(target_endianness, false)
+ Elf_Addr r_offset; // Location (file byte offset, or program virtual addr)
+ Elf_Word r_info; // Symbol table index and type of relocation to apply
+};
+
+template<support::endianness target_endianness>
+struct Elf_Rel_Base<target_endianness, true, false> {
+ LLVM_ELF_IMPORT_TYPES(target_endianness, true)
+ Elf_Addr r_offset; // Location (file byte offset, or program virtual addr)
+ Elf_Xword r_info; // Symbol table index and type of relocation to apply
+};
+
+template<support::endianness target_endianness>
+struct Elf_Rel_Base<target_endianness, false, true> {
+ LLVM_ELF_IMPORT_TYPES(target_endianness, false)
+ Elf_Addr r_offset; // Location (file byte offset, or program virtual addr)
+ Elf_Word r_info; // Symbol table index and type of relocation to apply
+ Elf_Sword r_addend; // Compute value for relocatable field by adding this
+};
+
+template<support::endianness target_endianness>
+struct Elf_Rel_Base<target_endianness, true, true> {
+ LLVM_ELF_IMPORT_TYPES(target_endianness, true)
+ Elf_Addr r_offset; // Location (file byte offset, or program virtual addr)
+ Elf_Xword r_info; // Symbol table index and type of relocation to apply
+ Elf_Sxword r_addend; // Compute value for relocatable field by adding this.
+};
+
+template<support::endianness target_endianness, bool is64Bits, bool isRela>
+struct Elf_Rel_Impl;
+
+template<support::endianness target_endianness, bool isRela>
+struct Elf_Rel_Impl<target_endianness, true, isRela>
+ : Elf_Rel_Base<target_endianness, true, isRela> {
+ using Elf_Rel_Base<target_endianness, true, isRela>::r_info;
+ LLVM_ELF_IMPORT_TYPES(target_endianness, true)
+
+ // These accessors and mutators correspond to the ELF64_R_SYM, ELF64_R_TYPE,
+ // and ELF64_R_INFO macros defined in the ELF specification:
+ uint64_t getSymbol() const { return (r_info >> 32); }
+ unsigned char getType() const {
+ return (unsigned char) (r_info & 0xffffffffL);
+ }
+ void setSymbol(uint64_t s) { setSymbolAndType(s, getType()); }
+ void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); }
+ void setSymbolAndType(uint64_t s, unsigned char t) {
+ r_info = (s << 32) + (t&0xffffffffL);
+ }
+};
+
+template<support::endianness target_endianness, bool isRela>
+struct Elf_Rel_Impl<target_endianness, false, isRela>
+ : Elf_Rel_Base<target_endianness, false, isRela> {
+ using Elf_Rel_Base<target_endianness, false, isRela>::r_info;
+ LLVM_ELF_IMPORT_TYPES(target_endianness, false)
+
+ // These accessors and mutators correspond to the ELF32_R_SYM, ELF32_R_TYPE,
+ // and ELF32_R_INFO macros defined in the ELF specification:
+ uint32_t getSymbol() const { return (r_info >> 8); }
+ unsigned char getType() const { return (unsigned char) (r_info & 0x0ff); }
+ void setSymbol(uint32_t s) { setSymbolAndType(s, getType()); }
+ void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); }
+ void setSymbolAndType(uint32_t s, unsigned char t) {
+ r_info = (s << 8) + t;
+ }
+};
+
+}
+
+namespace {
template<support::endianness target_endianness, bool is64Bits>
class ELFObjectFile : public ObjectFile {
LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits)
typedef Elf_Shdr_Impl<target_endianness, is64Bits> Elf_Shdr;
typedef Elf_Sym_Impl<target_endianness, is64Bits> Elf_Sym;
+ typedef Elf_Rel_Impl<target_endianness, is64Bits, false> Elf_Rel;
+ typedef Elf_Rel_Impl<target_endianness, is64Bits, true> Elf_Rela;
struct Elf_Ehdr {
unsigned char e_ident[ELF::EI_NIDENT]; // ELF Identification bytes
@@ -206,37 +286,81 @@ class ELFObjectFile : public ObjectFile {
unsigned char getDataEncoding() const { return e_ident[ELF::EI_DATA]; }
};
- typedef SmallVector<const Elf_Shdr*, 1> SymbolTableSections_t;
+ typedef SmallVector<const Elf_Shdr*, 1> Sections_t;
+ typedef DenseMap<unsigned, unsigned> IndexMap_t;
+ typedef DenseMap<const Elf_Shdr*, SmallVector<uint32_t, 1> > RelocMap_t;
const Elf_Ehdr *Header;
const Elf_Shdr *SectionHeaderTable;
const Elf_Shdr *dot_shstrtab_sec; // Section header string table.
const Elf_Shdr *dot_strtab_sec; // Symbol header string table.
- SymbolTableSections_t SymbolTableSections;
+ Sections_t SymbolTableSections;
+ IndexMap_t SymbolTableSectionsIndexMap;
+ DenseMap<const Elf_Sym*, ELF::Elf64_Word> ExtendedSymbolTable;
+
+ /// @brief Map sections to an array of relocation sections that reference
+ /// them sorted by section index.
+ RelocMap_t SectionRelocMap;
+
+ /// @brief Get the relocation section that contains \a Rel.
+ const Elf_Shdr *getRelSection(DataRefImpl Rel) const {
+ return getSection(Rel.w.b);
+ }
void validateSymbol(DataRefImpl Symb) const;
+ bool isRelocationHasAddend(DataRefImpl Rel) const;
+ template<typename T>
+ const T *getEntry(uint16_t Section, uint32_t Entry) const;
+ template<typename T>
+ const T *getEntry(const Elf_Shdr *Section, uint32_t Entry) const;
const Elf_Sym *getSymbol(DataRefImpl Symb) const;
const Elf_Shdr *getSection(DataRefImpl index) const;
- const Elf_Shdr *getSection(uint16_t index) const;
- const char *getString(uint16_t section, uint32_t offset) const;
+ const Elf_Shdr *getSection(uint32_t index) const;
+ const Elf_Rel *getRel(DataRefImpl Rel) const;
+ const Elf_Rela *getRela(DataRefImpl Rela) const;
+ const char *getString(uint32_t section, uint32_t offset) const;
const char *getString(const Elf_Shdr *section, uint32_t offset) const;
+ error_code getSymbolName(const Elf_Sym *Symb, StringRef &Res) const;
protected:
virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const;
virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const;
+ virtual error_code getSymbolOffset(DataRefImpl Symb, uint64_t &Res) const;
virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const;
virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const;
virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const;
virtual error_code isSymbolInternal(DataRefImpl Symb, bool &Res) const;
+ virtual error_code isSymbolGlobal(DataRefImpl Symb, bool &Res) const;
+ virtual error_code getSymbolType(DataRefImpl Symb, SymbolRef::SymbolType &Res) const;
virtual error_code getSectionNext(DataRefImpl Sec, SectionRef &Res) const;
virtual error_code getSectionName(DataRefImpl Sec, StringRef &Res) const;
virtual error_code getSectionAddress(DataRefImpl Sec, uint64_t &Res) const;
virtual error_code getSectionSize(DataRefImpl Sec, uint64_t &Res) const;
virtual error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const;
+ virtual error_code getSectionAlignment(DataRefImpl Sec, uint64_t &Res) const;
virtual error_code isSectionText(DataRefImpl Sec, bool &Res) const;
+ virtual error_code isSectionData(DataRefImpl Sec, bool &Res) const;
+ virtual error_code isSectionBSS(DataRefImpl Sec, bool &Res) const;
virtual error_code sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb,
bool &Result) const;
+ virtual relocation_iterator getSectionRelBegin(DataRefImpl Sec) const;
+ virtual relocation_iterator getSectionRelEnd(DataRefImpl Sec) const;
+
+ virtual error_code getRelocationNext(DataRefImpl Rel,
+ RelocationRef &Res) const;
+ virtual error_code getRelocationAddress(DataRefImpl Rel,
+ uint64_t &Res) const;
+ virtual error_code getRelocationSymbol(DataRefImpl Rel,
+ SymbolRef &Res) const;
+ virtual error_code getRelocationType(DataRefImpl Rel,
+ uint32_t &Res) const;
+ virtual error_code getRelocationTypeName(DataRefImpl Rel,
+ SmallVectorImpl<char> &Result) const;
+ virtual error_code getRelocationAdditionalInfo(DataRefImpl Rel,
+ int64_t &Res) const;
+ virtual error_code getRelocationValueString(DataRefImpl Rel,
+ SmallVectorImpl<char> &Result) const;
public:
ELFObjectFile(MemoryBuffer *Object, error_code &ec);
@@ -248,6 +372,11 @@ public:
virtual uint8_t getBytesInAddress() const;
virtual StringRef getFileFormatName() const;
virtual unsigned getArch() const;
+
+ uint64_t getNumSections() const;
+ uint64_t getStringTableIndex() const;
+ ELF::Elf64_Word getSymbolTableIndex(const Elf_Sym *symb) const;
+ const Elf_Shdr *getSection(const Elf_Sym *symb) const;
};
} // end namespace
@@ -299,29 +428,37 @@ error_code ELFObjectFile<target_endianness, is64Bits>
::getSymbolName(DataRefImpl Symb,
StringRef &Result) const {
validateSymbol(Symb);
- const Elf_Sym *symb = getSymbol(Symb);
- if (symb->st_name == 0) {
- const Elf_Shdr *section = getSection(symb->st_shndx);
- if (!section)
- Result = "";
- else
- Result = getString(dot_shstrtab_sec, section->sh_name);
- return object_error::success;
- }
+ const Elf_Sym *symb = getSymbol(Symb);
+ return getSymbolName(symb, Result);
+}
- // Use the default symbol table name section.
- Result = getString(dot_strtab_sec, symb->st_name);
- return object_error::success;
+template<support::endianness target_endianness, bool is64Bits>
+ELF::Elf64_Word ELFObjectFile<target_endianness, is64Bits>
+ ::getSymbolTableIndex(const Elf_Sym *symb) const {
+ if (symb->st_shndx == ELF::SHN_XINDEX)
+ return ExtendedSymbolTable.lookup(symb);
+ return symb->st_shndx;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr *
+ELFObjectFile<target_endianness, is64Bits>
+ ::getSection(const Elf_Sym *symb) const {
+ if (symb->st_shndx == ELF::SHN_XINDEX)
+ return getSection(ExtendedSymbolTable.lookup(symb));
+ if (symb->st_shndx >= ELF::SHN_LORESERVE)
+ return 0;
+ return getSection(symb->st_shndx);
}
template<support::endianness target_endianness, bool is64Bits>
error_code ELFObjectFile<target_endianness, is64Bits>
- ::getSymbolAddress(DataRefImpl Symb,
- uint64_t &Result) const {
+ ::getSymbolOffset(DataRefImpl Symb,
+ uint64_t &Result) const {
validateSymbol(Symb);
const Elf_Sym *symb = getSymbol(Symb);
const Elf_Shdr *Section;
- switch (symb->st_shndx) {
+ switch (getSymbolTableIndex(symb)) {
case ELF::SHN_COMMON:
// Undefined symbols have no address yet.
case ELF::SHN_UNDEF:
@@ -330,7 +467,7 @@ error_code ELFObjectFile<target_endianness, is64Bits>
case ELF::SHN_ABS:
Result = symb->st_value;
return object_error::success;
- default: Section = getSection(symb->st_shndx);
+ default: Section = getSection(symb);
}
switch (symb->getType()) {
@@ -350,6 +487,43 @@ error_code ELFObjectFile<target_endianness, is64Bits>
template<support::endianness target_endianness, bool is64Bits>
error_code ELFObjectFile<target_endianness, is64Bits>
+ ::getSymbolAddress(DataRefImpl Symb,
+ uint64_t &Result) const {
+ validateSymbol(Symb);
+ const Elf_Sym *symb = getSymbol(Symb);
+ const Elf_Shdr *Section;
+ switch (getSymbolTableIndex(symb)) {
+ case ELF::SHN_COMMON: // Fall through.
+ // Undefined symbols have no address yet.
+ case ELF::SHN_UNDEF:
+ Result = UnknownAddressOrSize;
+ return object_error::success;
+ case ELF::SHN_ABS:
+ Result = reinterpret_cast<uintptr_t>(base()+symb->st_value);
+ return object_error::success;
+ default: Section = getSection(symb);
+ }
+ const uint8_t* addr = base();
+ if (Section)
+ addr += Section->sh_offset;
+ switch (symb->getType()) {
+ case ELF::STT_SECTION:
+ Result = reinterpret_cast<uintptr_t>(addr);
+ return object_error::success;
+ case ELF::STT_FUNC: // Fall through.
+ case ELF::STT_OBJECT: // Fall through.
+ case ELF::STT_NOTYPE:
+ addr += symb->st_value;
+ Result = reinterpret_cast<uintptr_t>(addr);
+ return object_error::success;
+ default:
+ Result = UnknownAddressOrSize;
+ return object_error::success;
+ }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
::getSymbolSize(DataRefImpl Symb,
uint64_t &Result) const {
validateSymbol(Symb);
@@ -366,7 +540,7 @@ error_code ELFObjectFile<target_endianness, is64Bits>
char &Result) const {
validateSymbol(Symb);
const Elf_Sym *symb = getSymbol(Symb);
- const Elf_Shdr *Section = getSection(symb->st_shndx);
+ const Elf_Shdr *Section = getSection(symb);
char ret = '?';
@@ -389,7 +563,7 @@ error_code ELFObjectFile<target_endianness, is64Bits>
}
}
- switch (symb->st_shndx) {
+ switch (getSymbolTableIndex(symb)) {
case ELF::SHN_UNDEF:
if (ret == '?')
ret = 'U';
@@ -401,7 +575,7 @@ error_code ELFObjectFile<target_endianness, is64Bits>
switch (symb->getBinding()) {
case ELF::STB_GLOBAL: ret = ::toupper(ret); break;
case ELF::STB_WEAK:
- if (symb->st_shndx == ELF::SHN_UNDEF)
+ if (getSymbolTableIndex(symb) == ELF::SHN_UNDEF)
ret = 'w';
else
if (symb->getType() == ELF::STT_OBJECT)
@@ -416,7 +590,8 @@ error_code ELFObjectFile<target_endianness, is64Bits>
return ec;
Result = StringSwitch<char>(name)
.StartsWith(".debug", 'N')
- .StartsWith(".note", 'n');
+ .StartsWith(".note", 'n')
+ .Default('?');
return object_error::success;
}
@@ -426,6 +601,43 @@ error_code ELFObjectFile<target_endianness, is64Bits>
template<support::endianness target_endianness, bool is64Bits>
error_code ELFObjectFile<target_endianness, is64Bits>
+ ::getSymbolType(DataRefImpl Symb,
+ SymbolRef::SymbolType &Result) const {
+ validateSymbol(Symb);
+ const Elf_Sym *symb = getSymbol(Symb);
+
+ if (getSymbolTableIndex(symb) == ELF::SHN_UNDEF) {
+ Result = SymbolRef::ST_External;
+ return object_error::success;
+ }
+
+ switch (symb->getType()) {
+ case ELF::STT_FUNC:
+ Result = SymbolRef::ST_Function;
+ break;
+ case ELF::STT_OBJECT:
+ Result = SymbolRef::ST_Data;
+ break;
+ default:
+ Result = SymbolRef::ST_Other;
+ break;
+ }
+ return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+ ::isSymbolGlobal(DataRefImpl Symb,
+ bool &Result) const {
+ validateSymbol(Symb);
+ const Elf_Sym *symb = getSymbol(Symb);
+
+ Result = symb->getBinding() == ELF::STB_GLOBAL;
+ return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
::isSymbolInternal(DataRefImpl Symb,
bool &Result) const {
validateSymbol(Symb);
@@ -487,6 +699,15 @@ error_code ELFObjectFile<target_endianness, is64Bits>
template<support::endianness target_endianness, bool is64Bits>
error_code ELFObjectFile<target_endianness, is64Bits>
+ ::getSectionAlignment(DataRefImpl Sec,
+ uint64_t &Result) const {
+ const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+ Result = sec->sh_addralign;
+ return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
::isSectionText(DataRefImpl Sec,
bool &Result) const {
const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
@@ -499,6 +720,32 @@ error_code ELFObjectFile<target_endianness, is64Bits>
template<support::endianness target_endianness, bool is64Bits>
error_code ELFObjectFile<target_endianness, is64Bits>
+ ::isSectionData(DataRefImpl Sec,
+ bool &Result) const {
+ const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+ if (sec->sh_flags & (ELF::SHF_ALLOC | ELF::SHF_WRITE)
+ && sec->sh_type == ELF::SHT_PROGBITS)
+ Result = true;
+ else
+ Result = false;
+ return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+ ::isSectionBSS(DataRefImpl Sec,
+ bool &Result) const {
+ const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+ if (sec->sh_flags & (ELF::SHF_ALLOC | ELF::SHF_WRITE)
+ && sec->sh_type == ELF::SHT_NOBITS)
+ Result = true;
+ else
+ Result = false;
+ return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
::sectionContainsSymbol(DataRefImpl Sec,
DataRefImpl Symb,
bool &Result) const {
@@ -508,6 +755,330 @@ error_code ELFObjectFile<target_endianness, is64Bits>
}
template<support::endianness target_endianness, bool is64Bits>
+relocation_iterator ELFObjectFile<target_endianness, is64Bits>
+ ::getSectionRelBegin(DataRefImpl Sec) const {
+ DataRefImpl RelData;
+ memset(&RelData, 0, sizeof(RelData));
+ const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+ typename RelocMap_t::const_iterator ittr = SectionRelocMap.find(sec);
+ if (sec != 0 && ittr != SectionRelocMap.end()) {
+ RelData.w.a = getSection(ittr->second[0])->sh_info;
+ RelData.w.b = ittr->second[0];
+ RelData.w.c = 0;
+ }
+ return relocation_iterator(RelocationRef(RelData, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+relocation_iterator ELFObjectFile<target_endianness, is64Bits>
+ ::getSectionRelEnd(DataRefImpl Sec) const {
+ DataRefImpl RelData;
+ memset(&RelData, 0, sizeof(RelData));
+ const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+ typename RelocMap_t::const_iterator ittr = SectionRelocMap.find(sec);
+ if (sec != 0 && ittr != SectionRelocMap.end()) {
+ // Get the index of the last relocation section for this section.
+ std::size_t relocsecindex = ittr->second[ittr->second.size() - 1];
+ const Elf_Shdr *relocsec = getSection(relocsecindex);
+ RelData.w.a = relocsec->sh_info;
+ RelData.w.b = relocsecindex;
+ RelData.w.c = relocsec->sh_size / relocsec->sh_entsize;
+ }
+ return relocation_iterator(RelocationRef(RelData, this));
+}
+
+// Relocations
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+ ::getRelocationNext(DataRefImpl Rel,
+ RelocationRef &Result) const {
+ ++Rel.w.c;
+ const Elf_Shdr *relocsec = getSection(Rel.w.b);
+ if (Rel.w.c >= (relocsec->sh_size / relocsec->sh_entsize)) {
+ // We have reached the end of the relocations for this section. See if there
+ // is another relocation section.
+ typename RelocMap_t::mapped_type relocseclist =
+ SectionRelocMap.lookup(getSection(Rel.w.a));
+
+ // Do a binary search for the current reloc section index (which must be
+ // present). Then get the next one.
+ typename RelocMap_t::mapped_type::const_iterator loc =
+ std::lower_bound(relocseclist.begin(), relocseclist.end(), Rel.w.b);
+ ++loc;
+
+ // If there is no next one, don't do anything. The ++Rel.w.c above sets Rel
+ // to the end iterator.
+ if (loc != relocseclist.end()) {
+ Rel.w.b = *loc;
+ Rel.w.a = 0;
+ }
+ }
+ Result = RelocationRef(Rel, this);
+ return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+ ::getRelocationSymbol(DataRefImpl Rel,
+ SymbolRef &Result) const {
+ uint32_t symbolIdx;
+ const Elf_Shdr *sec = getSection(Rel.w.b);
+ switch (sec->sh_type) {
+ default :
+ report_fatal_error("Invalid section type in Rel!");
+ case ELF::SHT_REL : {
+ symbolIdx = getRel(Rel)->getSymbol();
+ break;
+ }
+ case ELF::SHT_RELA : {
+ symbolIdx = getRela(Rel)->getSymbol();
+ break;
+ }
+ }
+ DataRefImpl SymbolData;
+ IndexMap_t::const_iterator it = SymbolTableSectionsIndexMap.find(sec->sh_link);
+ if (it == SymbolTableSectionsIndexMap.end())
+ report_fatal_error("Relocation symbol table not found!");
+ SymbolData.d.a = symbolIdx;
+ SymbolData.d.b = it->second;
+ Result = SymbolRef(SymbolData, this);
+ return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+ ::getRelocationAddress(DataRefImpl Rel,
+ uint64_t &Result) const {
+ uint64_t offset;
+ const Elf_Shdr *sec = getSection(Rel.w.b);
+ switch (sec->sh_type) {
+ default :
+ report_fatal_error("Invalid section type in Rel!");
+ case ELF::SHT_REL : {
+ offset = getRel(Rel)->r_offset;
+ break;
+ }
+ case ELF::SHT_RELA : {
+ offset = getRela(Rel)->r_offset;
+ break;
+ }
+ }
+
+ Result = offset;
+ return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+ ::getRelocationType(DataRefImpl Rel,
+ uint32_t &Result) const {
+ const Elf_Shdr *sec = getSection(Rel.w.b);
+ switch (sec->sh_type) {
+ default :
+ report_fatal_error("Invalid section type in Rel!");
+ case ELF::SHT_REL : {
+ Result = getRel(Rel)->getType();
+ break;
+ }
+ case ELF::SHT_RELA : {
+ Result = getRela(Rel)->getType();
+ break;
+ }
+ }
+ return object_error::success;
+}
+
+#define LLVM_ELF_SWITCH_RELOC_TYPE_NAME(enum) \
+ case ELF::enum: res = #enum; break;
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+ ::getRelocationTypeName(DataRefImpl Rel,
+ SmallVectorImpl<char> &Result) const {
+ const Elf_Shdr *sec = getSection(Rel.w.b);
+ uint8_t type;
+ StringRef res;
+ switch (sec->sh_type) {
+ default :
+ return object_error::parse_failed;
+ case ELF::SHT_REL : {
+ type = getRel(Rel)->getType();
+ break;
+ }
+ case ELF::SHT_RELA : {
+ type = getRela(Rel)->getType();
+ break;
+ }
+ }
+ switch (Header->e_machine) {
+ case ELF::EM_X86_64:
+ switch (type) {
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_NONE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOT32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PLT32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_COPY);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GLOB_DAT);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_JUMP_SLOT);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_RELATIVE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPCREL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_32S);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_8);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC8);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_DTPMOD64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_DTPOFF64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TPOFF64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSGD);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSLD);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_DTPOFF32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTTPOFF);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TPOFF32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTOFF64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPC32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_SIZE32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_SIZE64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPC32_TLSDESC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSDESC_CALL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSDESC);
+ default:
+ res = "Unknown";
+ }
+ break;
+ case ELF::EM_386:
+ switch (type) {
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_NONE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PC32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GOT32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PLT32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_COPY);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GLOB_DAT);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_JUMP_SLOT);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_RELATIVE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GOTOFF);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GOTPC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_32PLT);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_TPOFF);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_IE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GOTIE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PC16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_8);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PC8);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_PUSH);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_CALL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_POP);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_PUSH);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_CALL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_POP);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDO_32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_IE_32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LE_32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DTPMOD32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DTPOFF32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_TPOFF32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GOTDESC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DESC_CALL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DESC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_IRELATIVE);
+ default:
+ res = "Unknown";
+ }
+ break;
+ default:
+ res = "Unknown";
+ }
+ Result.append(res.begin(), res.end());
+ return object_error::success;
+}
+
+#undef LLVM_ELF_SWITCH_RELOC_TYPE_NAME
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+ ::getRelocationAdditionalInfo(DataRefImpl Rel,
+ int64_t &Result) const {
+ const Elf_Shdr *sec = getSection(Rel.w.b);
+ switch (sec->sh_type) {
+ default :
+ report_fatal_error("Invalid section type in Rel!");
+ case ELF::SHT_REL : {
+ Result = 0;
+ return object_error::success;
+ }
+ case ELF::SHT_RELA : {
+ Result = getRela(Rel)->r_addend;
+ return object_error::success;
+ }
+ }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+ ::getRelocationValueString(DataRefImpl Rel,
+ SmallVectorImpl<char> &Result) const {
+ const Elf_Shdr *sec = getSection(Rel.w.b);
+ uint8_t type;
+ StringRef res;
+ int64_t addend = 0;
+ uint16_t symbol_index = 0;
+ switch (sec->sh_type) {
+ default :
+ return object_error::parse_failed;
+ case ELF::SHT_REL : {
+ type = getRel(Rel)->getType();
+ symbol_index = getRel(Rel)->getSymbol();
+ // TODO: Read implicit addend from section data.
+ break;
+ }
+ case ELF::SHT_RELA : {
+ type = getRela(Rel)->getType();
+ symbol_index = getRela(Rel)->getSymbol();
+ addend = getRela(Rel)->r_addend;
+ break;
+ }
+ }
+ const Elf_Sym *symb = getEntry<Elf_Sym>(sec->sh_link, symbol_index);
+ StringRef symname;
+ if (error_code ec = getSymbolName(symb, symname))
+ return ec;
+ switch (Header->e_machine) {
+ case ELF::EM_X86_64:
+ switch (type) {
+ case ELF::R_X86_64_32S:
+ res = symname;
+ break;
+ case ELF::R_X86_64_PC32: {
+ std::string fmtbuf;
+ raw_string_ostream fmt(fmtbuf);
+ fmt << symname << (addend < 0 ? "" : "+") << addend << "-P";
+ fmt.flush();
+ Result.append(fmtbuf.begin(), fmtbuf.end());
+ }
+ break;
+ default:
+ res = "Unknown";
+ }
+ break;
+ default:
+ res = "Unknown";
+ }
+ if (Result.empty())
+ Result.append(res.begin(), res.end());
+ return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object
, error_code &ec)
: ObjectFile(Binary::isELF, Object, ec)
@@ -521,25 +1092,41 @@ ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object
SectionHeaderTable =
reinterpret_cast<const Elf_Shdr *>(base() + Header->e_shoff);
- uint32_t SectionTableSize = Header->e_shnum * Header->e_shentsize;
+ uint64_t SectionTableSize = getNumSections() * Header->e_shentsize;
if (!( (const uint8_t *)SectionHeaderTable + SectionTableSize
<= base() + Data->getBufferSize()))
// FIXME: Proper error handling.
report_fatal_error("Section table goes past end of file!");
- // To find the symbol tables we walk the section table to find SHT_STMTAB.
- for (const char *i = reinterpret_cast<const char *>(SectionHeaderTable),
- *e = i + Header->e_shnum * Header->e_shentsize;
- i != e; i += Header->e_shentsize) {
- const Elf_Shdr *sh = reinterpret_cast<const Elf_Shdr*>(i);
+ // To find the symbol tables we walk the section table to find SHT_SYMTAB.
+ const Elf_Shdr* SymbolTableSectionHeaderIndex = 0;
+ const Elf_Shdr* sh = reinterpret_cast<const Elf_Shdr*>(SectionHeaderTable);
+ for (uint64_t i = 0, e = getNumSections(); i != e; ++i) {
+ if (sh->sh_type == ELF::SHT_SYMTAB_SHNDX) {
+ if (SymbolTableSectionHeaderIndex)
+ // FIXME: Proper error handling.
+ report_fatal_error("More than one .symtab_shndx!");
+ SymbolTableSectionHeaderIndex = sh;
+ }
if (sh->sh_type == ELF::SHT_SYMTAB) {
+ SymbolTableSectionsIndexMap[i] = SymbolTableSections.size();
SymbolTableSections.push_back(sh);
}
+ if (sh->sh_type == ELF::SHT_REL || sh->sh_type == ELF::SHT_RELA) {
+ SectionRelocMap[getSection(sh->sh_info)].push_back(i);
+ }
+ ++sh;
+ }
+
+ // Sort section relocation lists by index.
+ for (typename RelocMap_t::iterator i = SectionRelocMap.begin(),
+ e = SectionRelocMap.end(); i != e; ++i) {
+ std::sort(i->second.begin(), i->second.end());
}
// Get string table sections.
- dot_shstrtab_sec = getSection(Header->e_shstrndx);
+ dot_shstrtab_sec = getSection(getStringTableIndex());
if (dot_shstrtab_sec) {
// Verify that the last byte in the string table in a null.
if (((const char*)base() + dot_shstrtab_sec->sh_offset)
@@ -550,7 +1137,7 @@ ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object
// Merge this into the above loop.
for (const char *i = reinterpret_cast<const char *>(SectionHeaderTable),
- *e = i + Header->e_shnum * Header->e_shentsize;
+ *e = i + getNumSections() * Header->e_shentsize;
i != e; i += Header->e_shentsize) {
const Elf_Shdr *sh = reinterpret_cast<const Elf_Shdr*>(i);
if (sh->sh_type == ELF::SHT_STRTAB) {
@@ -567,11 +1154,26 @@ ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object
}
}
}
+
+ // Build symbol name side-mapping if there is one.
+ if (SymbolTableSectionHeaderIndex) {
+ const Elf_Word *ShndxTable = reinterpret_cast<const Elf_Word*>(base() +
+ SymbolTableSectionHeaderIndex->sh_offset);
+ error_code ec;
+ for (symbol_iterator si = begin_symbols(),
+ se = end_symbols(); si != se; si.increment(ec)) {
+ if (ec)
+ report_fatal_error("Fewer extended symbol table entries than symbols!");
+ if (*ShndxTable != ELF::SHN_UNDEF)
+ ExtendedSymbolTable[getSymbol(si->getRawDataRefImpl())] = *ShndxTable;
+ ++ShndxTable;
+ }
+ }
}
template<support::endianness target_endianness, bool is64Bits>
-ObjectFile::symbol_iterator ELFObjectFile<target_endianness, is64Bits>
- ::begin_symbols() const {
+symbol_iterator ELFObjectFile<target_endianness, is64Bits>
+ ::begin_symbols() const {
DataRefImpl SymbolData;
memset(&SymbolData, 0, sizeof(SymbolData));
if (SymbolTableSections.size() == 0) {
@@ -585,8 +1187,8 @@ ObjectFile::symbol_iterator ELFObjectFile<target_endianness, is64Bits>
}
template<support::endianness target_endianness, bool is64Bits>
-ObjectFile::symbol_iterator ELFObjectFile<target_endianness, is64Bits>
- ::end_symbols() const {
+symbol_iterator ELFObjectFile<target_endianness, is64Bits>
+ ::end_symbols() const {
DataRefImpl SymbolData;
memset(&SymbolData, 0, sizeof(SymbolData));
SymbolData.d.a = std::numeric_limits<uint32_t>::max();
@@ -595,8 +1197,8 @@ ObjectFile::symbol_iterator ELFObjectFile<target_endianness, is64Bits>
}
template<support::endianness target_endianness, bool is64Bits>
-ObjectFile::section_iterator ELFObjectFile<target_endianness, is64Bits>
- ::begin_sections() const {
+section_iterator ELFObjectFile<target_endianness, is64Bits>
+ ::begin_sections() const {
DataRefImpl ret;
memset(&ret, 0, sizeof(DataRefImpl));
ret.p = reinterpret_cast<intptr_t>(base() + Header->e_shoff);
@@ -604,13 +1206,13 @@ ObjectFile::section_iterator ELFObjectFile<target_endianness, is64Bits>
}
template<support::endianness target_endianness, bool is64Bits>
-ObjectFile::section_iterator ELFObjectFile<target_endianness, is64Bits>
- ::end_sections() const {
+section_iterator ELFObjectFile<target_endianness, is64Bits>
+ ::end_sections() const {
DataRefImpl ret;
memset(&ret, 0, sizeof(DataRefImpl));
ret.p = reinterpret_cast<intptr_t>(base()
+ Header->e_shoff
- + (Header->e_shentsize * Header->e_shnum));
+ + (Header->e_shentsize*getNumSections()));
return section_iterator(SectionRef(ret, this));
}
@@ -629,6 +1231,8 @@ StringRef ELFObjectFile<target_endianness, is64Bits>
return "ELF32-i386";
case ELF::EM_X86_64:
return "ELF32-x86-64";
+ case ELF::EM_ARM:
+ return "ELF32-arm";
default:
return "ELF32-unknown";
}
@@ -654,26 +1258,75 @@ unsigned ELFObjectFile<target_endianness, is64Bits>::getArch() const {
return Triple::x86;
case ELF::EM_X86_64:
return Triple::x86_64;
+ case ELF::EM_ARM:
+ return Triple::arm;
default:
return Triple::UnknownArch;
}
}
template<support::endianness target_endianness, bool is64Bits>
+uint64_t ELFObjectFile<target_endianness, is64Bits>::getNumSections() const {
+ if (Header->e_shnum == ELF::SHN_UNDEF)
+ return SectionHeaderTable->sh_size;
+ return Header->e_shnum;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+uint64_t
+ELFObjectFile<target_endianness, is64Bits>::getStringTableIndex() const {
+ if (Header->e_shnum == ELF::SHN_UNDEF) {
+ if (Header->e_shstrndx == ELF::SHN_HIRESERVE)
+ return SectionHeaderTable->sh_link;
+ if (Header->e_shstrndx >= getNumSections())
+ return 0;
+ }
+ return Header->e_shstrndx;
+}
+
+
+template<support::endianness target_endianness, bool is64Bits>
+template<typename T>
+inline const T *
+ELFObjectFile<target_endianness, is64Bits>::getEntry(uint16_t Section,
+ uint32_t Entry) const {
+ return getEntry<T>(getSection(Section), Entry);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+template<typename T>
+inline const T *
+ELFObjectFile<target_endianness, is64Bits>::getEntry(const Elf_Shdr * Section,
+ uint32_t Entry) const {
+ return reinterpret_cast<const T *>(
+ base()
+ + Section->sh_offset
+ + (Entry * Section->sh_entsize));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Sym *
ELFObjectFile<target_endianness, is64Bits>::getSymbol(DataRefImpl Symb) const {
- const Elf_Shdr *sec = SymbolTableSections[Symb.d.b];
- return reinterpret_cast<const Elf_Sym *>(
- base()
- + sec->sh_offset
- + (Symb.d.a * sec->sh_entsize));
+ return getEntry<Elf_Sym>(SymbolTableSections[Symb.d.b], Symb.d.a);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Rel *
+ELFObjectFile<target_endianness, is64Bits>::getRel(DataRefImpl Rel) const {
+ return getEntry<Elf_Rel>(Rel.w.b, Rel.w.c);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Rela *
+ELFObjectFile<target_endianness, is64Bits>::getRela(DataRefImpl Rela) const {
+ return getEntry<Elf_Rela>(Rela.w.b, Rela.w.c);
}
template<support::endianness target_endianness, bool is64Bits>
const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr *
ELFObjectFile<target_endianness, is64Bits>::getSection(DataRefImpl Symb) const {
const Elf_Shdr *sec = getSection(Symb.d.b);
- if (sec->sh_type != ELF::SHT_SYMTAB)
+ if (sec->sh_type != ELF::SHT_SYMTAB || sec->sh_type != ELF::SHT_DYNSYM)
// FIXME: Proper error handling.
report_fatal_error("Invalid symbol table section!");
return sec;
@@ -681,10 +1334,10 @@ ELFObjectFile<target_endianness, is64Bits>::getSection(DataRefImpl Symb) const {
template<support::endianness target_endianness, bool is64Bits>
const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr *
-ELFObjectFile<target_endianness, is64Bits>::getSection(uint16_t index) const {
- if (index == 0 || index >= ELF::SHN_LORESERVE)
+ELFObjectFile<target_endianness, is64Bits>::getSection(uint32_t index) const {
+ if (index == 0)
return 0;
- if (!SectionHeaderTable || index >= Header->e_shnum)
+ if (!SectionHeaderTable || index >= getNumSections())
// FIXME: Proper error handling.
report_fatal_error("Invalid section index!");
@@ -695,7 +1348,7 @@ ELFObjectFile<target_endianness, is64Bits>::getSection(uint16_t index) const {
template<support::endianness target_endianness, bool is64Bits>
const char *ELFObjectFile<target_endianness, is64Bits>
- ::getString(uint16_t section,
+ ::getString(uint32_t section,
ELF::Elf32_Word offset) const {
return getString(getSection(section), offset);
}
@@ -711,6 +1364,24 @@ const char *ELFObjectFile<target_endianness, is64Bits>
return (const char *)base() + section->sh_offset + offset;
}
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+ ::getSymbolName(const Elf_Sym *symb,
+ StringRef &Result) const {
+ if (symb->st_name == 0) {
+ const Elf_Shdr *section = getSection(symb);
+ if (!section)
+ Result = "";
+ else
+ Result = getString(dot_shstrtab_sec, section->sh_name);
+ return object_error::success;
+ }
+
+ // Use the default symbol table name section.
+ Result = getString(dot_strtab_sec, symb->st_name);
+ return object_error::success;
+}
+
// EI_CLASS, EI_DATA.
static std::pair<unsigned char, unsigned char>
getElfArchType(MemoryBuffer *Object) {
diff --git a/lib/Object/MachOObject.cpp b/lib/Object/MachOObject.cpp
index 9890feb..9cdac86 100644
--- a/lib/Object/MachOObject.cpp
+++ b/lib/Object/MachOObject.cpp
@@ -9,6 +9,7 @@
#include "llvm/Object/MachOObject.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/SwapByteOrder.h"
@@ -244,6 +245,18 @@ void MachOObject::ReadDysymtabLoadCommand(const LoadCommandInfo &LCI,
}
template<>
+void SwapStruct(macho::LinkeditDataLoadCommand &Value) {
+ SwapValue(Value.Type);
+ SwapValue(Value.Size);
+ SwapValue(Value.DataOffset);
+ SwapValue(Value.DataSize);
+}
+void MachOObject::ReadLinkeditDataLoadCommand(const LoadCommandInfo &LCI,
+ InMemoryStruct<macho::LinkeditDataLoadCommand> &Res) const {
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
+}
+
+template<>
void SwapStruct(macho::IndirectSymbolTableEntry &Value) {
SwapValue(Value.Index);
}
@@ -343,6 +356,31 @@ void MachOObject::ReadSymbol64TableEntry(uint64_t SymbolTableOffset,
ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
}
+
+void MachOObject::ReadULEB128s(uint64_t Index,
+ SmallVectorImpl<uint64_t> &Out) const {
+ const char *ptr = Buffer->getBufferStart() + Index;
+ uint64_t data = 0;
+ uint64_t delta = 0;
+ uint32_t shift = 0;
+ while (true) {
+ assert(ptr < Buffer->getBufferEnd() && "index out of bounds");
+ assert(shift < 64 && "too big for uint64_t");
+
+ uint8_t byte = *ptr++;
+ delta |= ((byte & 0x7F) << shift);
+ shift += 7;
+ if (byte < 0x80) {
+ if (delta == 0)
+ break;
+ data += delta;
+ Out.push_back(data);
+ delta = 0;
+ shift = 0;
+ }
+ }
+}
+
/* ** */
// Object Dumping Facilities
void MachOObject::dump() const { print(dbgs()); dbgs() << '\n'; }
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index 26a6e13..507df58 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -13,11 +13,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Triple.h"
+#include "llvm/Object/MachO.h"
#include "llvm/Object/MachOFormat.h"
-#include "llvm/Object/MachOObject.h"
-#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/MachO.h"
#include <cctype>
#include <cstring>
@@ -27,56 +25,24 @@ using namespace llvm;
using namespace object;
namespace llvm {
+namespace object {
-typedef MachOObject::LoadCommandInfo LoadCommandInfo;
-
-class MachOObjectFile : public ObjectFile {
-public:
- MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO, error_code &ec)
+MachOObjectFile::MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO,
+ error_code &ec)
: ObjectFile(Binary::isMachO, Object, ec),
MachOObj(MOO),
- RegisteredStringTable(std::numeric_limits<uint32_t>::max()) {}
-
- virtual symbol_iterator begin_symbols() const;
- virtual symbol_iterator end_symbols() const;
- virtual section_iterator begin_sections() const;
- virtual section_iterator end_sections() const;
-
- virtual uint8_t getBytesInAddress() const;
- virtual StringRef getFileFormatName() const;
- virtual unsigned getArch() const;
-
-protected:
- virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const;
- virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const;
- virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const;
- virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const;
- virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const;
- virtual error_code isSymbolInternal(DataRefImpl Symb, bool &Res) const;
-
- virtual error_code getSectionNext(DataRefImpl Sec, SectionRef &Res) const;
- virtual error_code getSectionName(DataRefImpl Sec, StringRef &Res) const;
- virtual error_code getSectionAddress(DataRefImpl Sec, uint64_t &Res) const;
- virtual error_code getSectionSize(DataRefImpl Sec, uint64_t &Res) const;
- virtual error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const;
- virtual error_code isSectionText(DataRefImpl Sec, bool &Res) const;
- virtual error_code sectionContainsSymbol(DataRefImpl DRI, DataRefImpl S,
- bool &Result) const;
-
-private:
- MachOObject *MachOObj;
- mutable uint32_t RegisteredStringTable;
-
- void moveToNextSection(DataRefImpl &DRI) const;
- void getSymbolTableEntry(DataRefImpl DRI,
- InMemoryStruct<macho::SymbolTableEntry> &Res) const;
- void getSymbol64TableEntry(DataRefImpl DRI,
- InMemoryStruct<macho::Symbol64TableEntry> &Res) const;
- void moveToNextSymbol(DataRefImpl &DRI) const;
- void getSection(DataRefImpl DRI, InMemoryStruct<macho::Section> &Res) const;
- void getSection64(DataRefImpl DRI,
- InMemoryStruct<macho::Section64> &Res) const;
-};
+ RegisteredStringTable(std::numeric_limits<uint32_t>::max()) {
+ DataRefImpl DRI;
+ DRI.d.a = DRI.d.b = 0;
+ moveToNextSection(DRI);
+ uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
+ while (DRI.d.a < LoadCommandCount) {
+ Sections.push_back(DRI);
+ DRI.d.b++;
+ moveToNextSection(DRI);
+ }
+}
+
ObjectFile *ObjectFile::createMachOObjectFile(MemoryBuffer *Buffer) {
error_code ec;
@@ -158,6 +124,27 @@ error_code MachOObjectFile::getSymbolName(DataRefImpl DRI,
return object_error::success;
}
+error_code MachOObjectFile::getSymbolOffset(DataRefImpl DRI,
+ uint64_t &Result) const {
+ uint64_t SectionOffset;
+ uint8_t SectionIndex;
+ if (MachOObj->is64Bit()) {
+ InMemoryStruct<macho::Symbol64TableEntry> Entry;
+ getSymbol64TableEntry(DRI, Entry);
+ Result = Entry->Value;
+ SectionIndex = Entry->SectionIndex;
+ } else {
+ InMemoryStruct<macho::SymbolTableEntry> Entry;
+ getSymbolTableEntry(DRI, Entry);
+ Result = Entry->Value;
+ SectionIndex = Entry->SectionIndex;
+ }
+ getSectionAddress(Sections[SectionIndex-1], SectionOffset);
+ Result -= SectionOffset;
+
+ return object_error::success;
+}
+
error_code MachOObjectFile::getSymbolAddress(DataRefImpl DRI,
uint64_t &Result) const {
if (MachOObj->is64Bit()) {
@@ -227,7 +214,51 @@ error_code MachOObjectFile::isSymbolInternal(DataRefImpl DRI,
return object_error::success;
}
-ObjectFile::symbol_iterator MachOObjectFile::begin_symbols() const {
+error_code MachOObjectFile::isSymbolGlobal(DataRefImpl Symb, bool &Res) const {
+
+ if (MachOObj->is64Bit()) {
+ InMemoryStruct<macho::Symbol64TableEntry> Entry;
+ getSymbol64TableEntry(Symb, Entry);
+ Res = Entry->Type & MachO::NlistMaskExternal;
+ } else {
+ InMemoryStruct<macho::SymbolTableEntry> Entry;
+ getSymbolTableEntry(Symb, Entry);
+ Res = Entry->Type & MachO::NlistMaskExternal;
+ }
+ return object_error::success;
+}
+
+error_code MachOObjectFile::getSymbolType(DataRefImpl Symb,
+ SymbolRef::SymbolType &Res) const {
+ uint8_t n_type;
+ if (MachOObj->is64Bit()) {
+ InMemoryStruct<macho::Symbol64TableEntry> Entry;
+ getSymbol64TableEntry(Symb, Entry);
+ n_type = Entry->Type;
+ } else {
+ InMemoryStruct<macho::SymbolTableEntry> Entry;
+ getSymbolTableEntry(Symb, Entry);
+ n_type = Entry->Type;
+ }
+ Res = SymbolRef::ST_Other;
+
+ // If this is a STAB debugging symbol, we can do nothing more.
+ if (n_type & MachO::NlistMaskStab)
+ return object_error::success;
+
+ switch (n_type & MachO::NlistMaskType) {
+ case MachO::NListTypeUndefined :
+ Res = SymbolRef::ST_External;
+ break;
+ case MachO::NListTypeSection :
+ Res = SymbolRef::ST_Function;
+ break;
+ }
+ return object_error::success;
+}
+
+
+symbol_iterator MachOObjectFile::begin_symbols() const {
// DRI.d.a = segment number; DRI.d.b = symbol index.
DataRefImpl DRI;
DRI.d.a = DRI.d.b = 0;
@@ -235,7 +266,7 @@ ObjectFile::symbol_iterator MachOObjectFile::begin_symbols() const {
return symbol_iterator(SymbolRef(DRI, this));
}
-ObjectFile::symbol_iterator MachOObjectFile::end_symbols() const {
+symbol_iterator MachOObjectFile::end_symbols() const {
DataRefImpl DRI;
DRI.d.a = MachOObj->getHeader().NumLoadCommands;
DRI.d.b = 0;
@@ -283,6 +314,13 @@ MachOObjectFile::getSection(DataRefImpl DRI,
MachOObj->ReadSection(LCI, DRI.d.b, Res);
}
+std::size_t MachOObjectFile::getSectionIndex(DataRefImpl Sec) const {
+ SectionList::const_iterator loc =
+ std::find(Sections.begin(), Sections.end(), Sec);
+ assert(loc != Sections.end() && "Sec is not a valid section!");
+ return std::distance(Sections.begin(), loc);
+}
+
void
MachOObjectFile::getSection64(DataRefImpl DRI,
InMemoryStruct<macho::Section64> &Res) const {
@@ -371,6 +409,20 @@ error_code MachOObjectFile::getSectionContents(DataRefImpl DRI,
return object_error::success;
}
+error_code MachOObjectFile::getSectionAlignment(DataRefImpl DRI,
+ uint64_t &Result) const {
+ if (is64BitLoadCommand(MachOObj, DRI)) {
+ InMemoryStruct<macho::Section64> Sect;
+ getSection64(DRI, Sect);
+ Result = uint64_t(1) << Sect->Align;
+ } else {
+ InMemoryStruct<macho::Section> Sect;
+ getSection(DRI, Sect);
+ Result = uint64_t(1) << Sect->Align;
+ }
+ return object_error::success;
+}
+
error_code MachOObjectFile::isSectionText(DataRefImpl DRI,
bool &Result) const {
if (is64BitLoadCommand(MachOObj, DRI)) {
@@ -385,35 +437,185 @@ error_code MachOObjectFile::isSectionText(DataRefImpl DRI,
return object_error::success;
}
+error_code MachOObjectFile::isSectionData(DataRefImpl DRI,
+ bool &Result) const {
+ // FIXME: Unimplemented.
+ Result = false;
+ return object_error::success;
+}
+
+error_code MachOObjectFile::isSectionBSS(DataRefImpl DRI,
+ bool &Result) const {
+ // FIXME: Unimplemented.
+ Result = false;
+ return object_error::success;
+}
+
error_code MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec,
DataRefImpl Symb,
bool &Result) const {
+ SymbolRef::SymbolType ST;
+ getSymbolType(Symb, ST);
+ if (ST == SymbolRef::ST_External) {
+ Result = false;
+ return object_error::success;
+ }
+
+ uint64_t SectBegin, SectEnd;
+ getSectionAddress(Sec, SectBegin);
+ getSectionSize(Sec, SectEnd);
+ SectEnd += SectBegin;
+
if (MachOObj->is64Bit()) {
InMemoryStruct<macho::Symbol64TableEntry> Entry;
getSymbol64TableEntry(Symb, Entry);
- Result = Entry->SectionIndex == 1 + Sec.d.a + Sec.d.b;
+ uint64_t SymAddr= Entry->Value;
+ Result = (SymAddr >= SectBegin) && (SymAddr < SectEnd);
} else {
InMemoryStruct<macho::SymbolTableEntry> Entry;
getSymbolTableEntry(Symb, Entry);
- Result = Entry->SectionIndex == 1 + Sec.d.a + Sec.d.b;
+ uint64_t SymAddr= Entry->Value;
+ Result = (SymAddr >= SectBegin) && (SymAddr < SectEnd);
}
+
return object_error::success;
}
-ObjectFile::section_iterator MachOObjectFile::begin_sections() const {
+relocation_iterator MachOObjectFile::getSectionRelBegin(DataRefImpl Sec) const {
+ DataRefImpl ret;
+ ret.d.a = 0;
+ ret.d.b = getSectionIndex(Sec);
+ return relocation_iterator(RelocationRef(ret, this));
+}
+relocation_iterator MachOObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
+ uint32_t last_reloc;
+ if (is64BitLoadCommand(MachOObj, Sec)) {
+ InMemoryStruct<macho::Section64> Sect;
+ getSection64(Sec, Sect);
+ last_reloc = Sect->NumRelocationTableEntries;
+ } else {
+ InMemoryStruct<macho::Section> Sect;
+ getSection(Sec, Sect);
+ last_reloc = Sect->NumRelocationTableEntries;
+ }
+ DataRefImpl ret;
+ ret.d.a = last_reloc;
+ ret.d.b = getSectionIndex(Sec);
+ return relocation_iterator(RelocationRef(ret, this));
+}
+
+section_iterator MachOObjectFile::begin_sections() const {
DataRefImpl DRI;
DRI.d.a = DRI.d.b = 0;
moveToNextSection(DRI);
return section_iterator(SectionRef(DRI, this));
}
-ObjectFile::section_iterator MachOObjectFile::end_sections() const {
+section_iterator MachOObjectFile::end_sections() const {
DataRefImpl DRI;
DRI.d.a = MachOObj->getHeader().NumLoadCommands;
DRI.d.b = 0;
return section_iterator(SectionRef(DRI, this));
}
+/*===-- Relocations -------------------------------------------------------===*/
+
+void MachOObjectFile::
+getRelocation(DataRefImpl Rel,
+ InMemoryStruct<macho::RelocationEntry> &Res) const {
+ uint32_t relOffset;
+ if (MachOObj->is64Bit()) {
+ InMemoryStruct<macho::Section64> Sect;
+ getSection64(Sections[Rel.d.b], Sect);
+ relOffset = Sect->RelocationTableOffset;
+ } else {
+ InMemoryStruct<macho::Section> Sect;
+ getSection(Sections[Rel.d.b], Sect);
+ relOffset = Sect->RelocationTableOffset;
+ }
+ MachOObj->ReadRelocationEntry(relOffset, Rel.d.a, Res);
+}
+error_code MachOObjectFile::getRelocationNext(DataRefImpl Rel,
+ RelocationRef &Res) const {
+ ++Rel.d.a;
+ Res = RelocationRef(Rel, this);
+ return object_error::success;
+}
+error_code MachOObjectFile::getRelocationAddress(DataRefImpl Rel,
+ uint64_t &Res) const {
+ const uint8_t* sectAddress = base();
+ if (MachOObj->is64Bit()) {
+ InMemoryStruct<macho::Section64> Sect;
+ getSection64(Sections[Rel.d.b], Sect);
+ sectAddress += Sect->Offset;
+ } else {
+ InMemoryStruct<macho::Section> Sect;
+ getSection(Sections[Rel.d.b], Sect);
+ sectAddress += Sect->Offset;
+ }
+ InMemoryStruct<macho::RelocationEntry> RE;
+ getRelocation(Rel, RE);
+ Res = reinterpret_cast<uintptr_t>(sectAddress + RE->Word0);
+ return object_error::success;
+}
+error_code MachOObjectFile::getRelocationSymbol(DataRefImpl Rel,
+ SymbolRef &Res) const {
+ InMemoryStruct<macho::RelocationEntry> RE;
+ getRelocation(Rel, RE);
+ uint32_t SymbolIdx = RE->Word1 & 0xffffff;
+ bool isExtern = (RE->Word1 >> 27) & 1;
+
+ DataRefImpl Sym;
+ Sym.d.a = Sym.d.b = 0;
+ moveToNextSymbol(Sym);
+ if (isExtern) {
+ for (unsigned i = 0; i < SymbolIdx; i++) {
+ Sym.d.b++;
+ moveToNextSymbol(Sym);
+ assert(Sym.d.a < MachOObj->getHeader().NumLoadCommands &&
+ "Relocation symbol index out of range!");
+ }
+ }
+ Res = SymbolRef(Sym, this);
+ return object_error::success;
+}
+error_code MachOObjectFile::getRelocationType(DataRefImpl Rel,
+ uint32_t &Res) const {
+ InMemoryStruct<macho::RelocationEntry> RE;
+ getRelocation(Rel, RE);
+ Res = RE->Word1;
+ return object_error::success;
+}
+error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
+ SmallVectorImpl<char> &Result) const {
+ return object_error::success;
+}
+error_code MachOObjectFile::getRelocationAdditionalInfo(DataRefImpl Rel,
+ int64_t &Res) const {
+ InMemoryStruct<macho::RelocationEntry> RE;
+ getRelocation(Rel, RE);
+ bool isExtern = (RE->Word1 >> 27) & 1;
+ Res = 0;
+ if (!isExtern) {
+ const uint8_t* sectAddress = base();
+ if (MachOObj->is64Bit()) {
+ InMemoryStruct<macho::Section64> Sect;
+ getSection64(Sections[Rel.d.b], Sect);
+ sectAddress += Sect->Offset;
+ } else {
+ InMemoryStruct<macho::Section> Sect;
+ getSection(Sections[Rel.d.b], Sect);
+ sectAddress += Sect->Offset;
+ }
+ Res = reinterpret_cast<uintptr_t>(sectAddress);
+ }
+ return object_error::success;
+}
+error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
+ SmallVectorImpl<char> &Result) const {
+ return object_error::success;
+}
+
/*===-- Miscellaneous -----------------------------------------------------===*/
uint8_t MachOObjectFile::getBytesInAddress() const {
@@ -465,5 +667,5 @@ unsigned MachOObjectFile::getArch() const {
}
}
+} // end namespace object
} // end namespace llvm
-
diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp
index 9a373ad..2ea8db9 100644
--- a/lib/Object/Object.cpp
+++ b/lib/Object/Object.cpp
@@ -27,8 +27,8 @@ void LLVMDisposeObjectFile(LLVMObjectFileRef ObjectFile) {
}
LLVMSectionIteratorRef LLVMGetSections(LLVMObjectFileRef ObjectFile) {
- ObjectFile::section_iterator SI = unwrap(ObjectFile)->begin_sections();
- return wrap(new ObjectFile::section_iterator(SI));
+ section_iterator SI = unwrap(ObjectFile)->begin_sections();
+ return wrap(new section_iterator(SI));
}
void LLVMDisposeSectionIterator(LLVMSectionIteratorRef SI) {
diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp
index a7798df..69d8ed0 100644
--- a/lib/Object/ObjectFile.cpp
+++ b/lib/Object/ObjectFile.cpp
@@ -45,6 +45,7 @@ ObjectFile *ObjectFile::createObjectFile(MemoryBuffer *Object) {
case sys::Mach_O_DynamicLinker_FileType:
case sys::Mach_O_Bundle_FileType:
case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType:
+ case sys::Mach_O_DSYMCompanion_FileType:
return createMachOObjectFile(Object);
case sys::COFF_FileType:
return createCOFFObjectFile(Object);
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index c64da6e..f238894 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -832,6 +832,7 @@ APFloat::incrementSignificand()
/* Our callers should never cause us to overflow. */
assert(carry == 0);
+ (void)carry;
}
/* Add the significand of the RHS. Returns the carry flag. */
@@ -926,6 +927,7 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
APFloat extendedAddend(*addend);
status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
assert(status == opOK);
+ (void)status;
lost_fraction = addOrSubtractSignificand(extendedAddend, false);
/* Restore our state. */
@@ -1190,7 +1192,7 @@ APFloat::normalize(roundingMode rounding_mode,
if (omsb) {
/* OMSB is numbered from 1. We want to place it in the integer
- bit numbered PRECISON if possible, with a compensating change in
+ bit numbered PRECISION if possible, with a compensating change in
the exponent. */
exponentChange = omsb - semantics->precision;
@@ -1389,6 +1391,7 @@ APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
/* The code above is intended to ensure that no borrow is
necessary. */
assert(!carry);
+ (void)carry;
} else {
if (bits > 0) {
APFloat temp_rhs(rhs);
@@ -1402,6 +1405,7 @@ APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
/* We have a guard bit; generating a carry cannot happen. */
assert(!carry);
+ (void)carry;
}
return lost_fraction;
@@ -2098,7 +2102,7 @@ APFloat::convertToInteger(APSInt &result,
opStatus status = convertToInteger(
parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
// Keeps the original signed-ness.
- result = APInt(bitWidth, (unsigned)parts.size(), parts.data());
+ result = APInt(bitWidth, parts);
return status;
}
@@ -2121,7 +2125,7 @@ APFloat::convertFromUnsignedParts(const integerPart *src,
dstCount = partCount();
precision = semantics->precision;
- /* We want the most significant PRECISON bits of SRC. There may not
+ /* We want the most significant PRECISION bits of SRC. There may not
be that many; extract what we can. */
if (precision <= omsb) {
exponent = omsb - 1;
@@ -2192,7 +2196,7 @@ APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
roundingMode rounding_mode)
{
unsigned int partCount = partCountForBits(width);
- APInt api = APInt(width, partCount, parts);
+ APInt api = APInt(width, makeArrayRef(parts, partCount));
sign = false;
if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
@@ -2746,7 +2750,7 @@ APFloat::convertF80LongDoubleAPFloatToAPInt() const
words[0] = mysignificand;
words[1] = ((uint64_t)(sign & 1) << 15) |
(myexponent & 0x7fffLL);
- return APInt(80, 2, words);
+ return APInt(80, words);
}
APInt
@@ -2791,7 +2795,7 @@ APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
words[1] = ((uint64_t)(sign2 & 1) << 63) |
((myexponent2 & 0x7ff) << 52) |
(mysignificand2 & 0xfffffffffffffLL);
- return APInt(128, 2, words);
+ return APInt(128, words);
}
APInt
@@ -2827,7 +2831,7 @@ APFloat::convertQuadrupleAPFloatToAPInt() const
((myexponent & 0x7fff) << 48) |
(mysignificand2 & 0xffffffffffffLL);
- return APInt(128, 2, words);
+ return APInt(128, words);
}
APInt
@@ -3239,8 +3243,9 @@ APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
significand[i] = ~((integerPart) 0);
// ...and then clear the top bits for internal consistency.
- significand[N-1] &=
- (((integerPart) 1) << ((Sem.precision % integerPartWidth) - 1)) - 1;
+ if (Sem.precision % integerPartWidth != 0)
+ significand[N-1] &=
+ (((integerPart) 1) << (Sem.precision % integerPartWidth)) - 1;
return Val;
}
@@ -3270,7 +3275,7 @@ APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
Val.exponent = Sem.minExponent;
Val.zeroSignificand();
Val.significandParts()[partCountForBits(Sem.precision)-1] |=
- (((integerPart) 1) << ((Sem.precision % integerPartWidth) - 1));
+ (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth));
return Val;
}
@@ -3413,8 +3418,8 @@ void APFloat::toString(SmallVectorImpl<char> &Str,
// Decompose the number into an APInt and an exponent.
int exp = exponent - ((int) semantics->precision - 1);
APInt significand(semantics->precision,
- partCountForBits(semantics->precision),
- significandParts());
+ makeArrayRef(significandParts(),
+ partCountForBits(semantics->precision)));
// Set FormatPrecision if zero. We want to do this before we
// truncate trailing zeros, as those are part of the precision.
@@ -3451,7 +3456,7 @@ void APFloat::toString(SmallVectorImpl<char> &Str,
// <= semantics->precision + e * 137 / 59
// (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
- unsigned precision = semantics->precision + 137 * texp / 59;
+ unsigned precision = semantics->precision + (137 * texp + 136) / 59;
// Multiply significand by 5^e.
// N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 76265d4..3774c52 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -48,18 +48,20 @@ inline static uint64_t* getMemory(unsigned numWords) {
inline static unsigned getDigit(char cdigit, uint8_t radix) {
unsigned r;
- if (radix == 16) {
+ if (radix == 16 || radix == 36) {
r = cdigit - '0';
if (r <= 9)
return r;
r = cdigit - 'A';
- if (r <= 5)
+ if (r <= radix - 11U)
return r + 10;
r = cdigit - 'a';
- if (r <= 5)
+ if (r <= radix - 11U)
return r + 10;
+
+ radix = 10;
}
r = cdigit - '0';
@@ -83,25 +85,33 @@ void APInt::initSlowCase(const APInt& that) {
memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE);
}
-
-APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
- : BitWidth(numBits), VAL(0) {
+void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
assert(BitWidth && "Bitwidth too small");
- assert(bigVal && "Null pointer detected!");
+ assert(bigVal.data() && "Null pointer detected!");
if (isSingleWord())
VAL = bigVal[0];
else {
// Get memory, cleared to 0
pVal = getClearedMemory(getNumWords());
// Calculate the number of words to copy
- unsigned words = std::min<unsigned>(numWords, getNumWords());
+ unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
// Copy the words from bigVal to pVal
- memcpy(pVal, bigVal, words * APINT_WORD_SIZE);
+ memcpy(pVal, bigVal.data(), words * APINT_WORD_SIZE);
}
// Make sure unused high bits are cleared
clearUnusedBits();
}
+APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
+ : BitWidth(numBits), VAL(0) {
+ initFromArray(bigVal);
+}
+
+APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
+ : BitWidth(numBits), VAL(0) {
+ initFromArray(makeArrayRef(bigVal, numWords));
+}
+
APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
: BitWidth(numbits), VAL(0) {
assert(BitWidth && "Bitwidth too small");
@@ -376,6 +386,7 @@ APInt& APInt::operator*=(const APInt& RHS) {
clearAllBits();
unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
+ clearUnusedBits();
// delete dest array and return
delete[] dest;
@@ -461,7 +472,7 @@ APInt APInt::operator*(const APInt& RHS) const {
return APInt(BitWidth, VAL * RHS.VAL);
APInt Result(*this);
Result *= RHS;
- return Result.clearUnusedBits();
+ return Result;
}
APInt APInt::operator+(const APInt& RHS) const {
@@ -613,8 +624,9 @@ void APInt::flipBit(unsigned bitPosition) {
unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
assert(!str.empty() && "Invalid string length");
- assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
- "Radix should be 2, 8, 10, or 16!");
+ assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
+ radix == 36) &&
+ "Radix should be 2, 8, 10, 16, or 36!");
size_t slen = str.size();
@@ -636,6 +648,8 @@ unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
if (radix == 16)
return slen * 4 + isNegative;
+ // FIXME: base 36
+
// This is grossly inefficient but accurate. We could probably do something
// with a computation of roughly slen*64/20 and then adjust by the value of
// the first few digits. But, I'm not sure how accurate that could be.
@@ -644,7 +658,9 @@ unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
// be too large. This avoids the assertion in the constructor. This
// calculation doesn't work appropriately for the numbers 0-9, so just use 4
// bits in that case.
- unsigned sufficient = slen == 1 ? 4 : slen * 64/18;
+ unsigned sufficient
+ = radix == 10? (slen == 1 ? 4 : slen * 64/18)
+ : (slen == 1 ? 7 : slen * 16/3);
// Convert to the actual binary value.
APInt tmp(sufficient, StringRef(p, slen), radix);
@@ -2107,8 +2123,9 @@ APInt APInt::sshl_ov(unsigned ShAmt, bool &Overflow) const {
void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
// Check our assumptions here
assert(!str.empty() && "Invalid string length");
- assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
- "Radix should be 2, 8, 10, or 16!");
+ assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
+ radix == 36) &&
+ "Radix should be 2, 8, 10, 16, or 36!");
StringRef::iterator p = str.begin();
size_t slen = str.size();
@@ -2165,7 +2182,8 @@ void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
bool Signed, bool formatAsCLiteral) const {
- assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) &&
+ assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
+ Radix == 36) &&
"Radix should be 2, 8, 10, or 16!");
const char *Prefix = "";
@@ -2195,7 +2213,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
return;
}
- static const char Digits[] = "0123456789ABCDEF";
+ static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
if (isSingleWord()) {
char Buffer[65];
@@ -2249,7 +2267,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
// For the 2, 8 and 16 bit cases, we can just shift instead of divide
// because the number of bits per digit (1, 3 and 4 respectively) divides
// equaly. We just shift until the value is zero.
- if (Radix != 10) {
+ if (Radix == 2 || Radix == 8 || Radix == 16) {
// Just shift tmp right for each digit width until it becomes zero
unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
unsigned MaskAmt = Radix - 1;
@@ -2260,7 +2278,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
Tmp = Tmp.lshr(ShiftAmt);
}
} else {
- APInt divisor(4, 10);
+ APInt divisor(Radix == 10? 4 : 8, Radix);
while (Tmp != 0) {
APInt APdigit(1, 0);
APInt tmp2(Tmp.getBitWidth(), 0);
diff --git a/lib/Support/Atomic.cpp b/lib/Support/Atomic.cpp
index c7b4bff..94760cc 100644
--- a/lib/Support/Atomic.cpp
+++ b/lib/Support/Atomic.cpp
@@ -22,7 +22,7 @@ using namespace llvm;
#endif
void sys::MemoryFence() {
-#if LLVM_MULTITHREADED==0
+#if LLVM_HAS_ATOMICS == 0
return;
#else
# if defined(__GNUC__)
@@ -38,7 +38,7 @@ void sys::MemoryFence() {
sys::cas_flag sys::CompareAndSwap(volatile sys::cas_flag* ptr,
sys::cas_flag new_value,
sys::cas_flag old_value) {
-#if LLVM_MULTITHREADED==0
+#if LLVM_HAS_ATOMICS == 0
sys::cas_flag result = *ptr;
if (result == old_value)
*ptr = new_value;
@@ -53,7 +53,7 @@ sys::cas_flag sys::CompareAndSwap(volatile sys::cas_flag* ptr,
}
sys::cas_flag sys::AtomicIncrement(volatile sys::cas_flag* ptr) {
-#if LLVM_MULTITHREADED==0
+#if LLVM_HAS_ATOMICS == 0
++(*ptr);
return *ptr;
#elif defined(__GNUC__)
@@ -66,7 +66,7 @@ sys::cas_flag sys::AtomicIncrement(volatile sys::cas_flag* ptr) {
}
sys::cas_flag sys::AtomicDecrement(volatile sys::cas_flag* ptr) {
-#if LLVM_MULTITHREADED==0
+#if LLVM_HAS_ATOMICS == 0
--(*ptr);
return *ptr;
#elif defined(__GNUC__)
@@ -79,7 +79,7 @@ sys::cas_flag sys::AtomicDecrement(volatile sys::cas_flag* ptr) {
}
sys::cas_flag sys::AtomicAdd(volatile sys::cas_flag* ptr, sys::cas_flag val) {
-#if LLVM_MULTITHREADED==0
+#if LLVM_HAS_ATOMICS == 0
*ptr += val;
return *ptr;
#elif defined(__GNUC__)
diff --git a/lib/Support/BlockFrequency.cpp b/lib/Support/BlockFrequency.cpp
new file mode 100644
index 0000000..a63bf83
--- /dev/null
+++ b/lib/Support/BlockFrequency.cpp
@@ -0,0 +1,126 @@
+//====--------------- lib/Support/BlockFrequency.cpp -----------*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Block Frequency class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+
+using namespace llvm;
+
+namespace {
+
+/// mult96bit - Multiply FREQ by N and store result in W array.
+void mult96bit(uint64_t freq, uint32_t N, uint64_t W[2]) {
+ uint64_t u0 = freq & UINT32_MAX;
+ uint64_t u1 = freq >> 32;
+
+ // Represent 96-bit value as w[2]:w[1]:w[0];
+ uint32_t w[3] = { 0, 0, 0 };
+
+ uint64_t t = u0 * N;
+ uint64_t k = t >> 32;
+ w[0] = t;
+ t = u1 * N + k;
+ w[1] = t;
+ w[2] = t >> 32;
+
+ // W[1] - higher bits.
+ // W[0] - lower bits.
+ W[0] = w[0] + ((uint64_t) w[1] << 32);
+ W[1] = w[2];
+}
+
+
+/// div96bit - Divide 96-bit value stored in W array by D. Return 64-bit frequency.
+uint64_t div96bit(uint64_t W[2], uint32_t D) {
+ uint64_t y = W[0];
+ uint64_t x = W[1];
+ int i;
+
+ for (i = 1; i <= 64 && x; ++i) {
+ uint32_t t = (int)x >> 31;
+ x = (x << 1) | (y >> 63);
+ y = y << 1;
+ if ((x | t) >= D) {
+ x -= D;
+ ++y;
+ }
+ }
+
+ return y << (64 - i + 1);
+}
+
+}
+
+
+BlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) {
+ uint32_t n = Prob.getNumerator();
+ uint32_t d = Prob.getDenominator();
+
+ assert(n <= d && "Probability must be less or equal to 1.");
+
+ // If we can overflow use 96-bit operations.
+ if (n > 0 && Frequency > UINT64_MAX / n) {
+ // 96-bit value represented as W[1]:W[0].
+ uint64_t W[2];
+
+ // Probability is less or equal to 1 which means that results must fit
+ // 64-bit.
+ mult96bit(Frequency, n, W);
+ Frequency = div96bit(W, d);
+ return *this;
+ }
+
+ Frequency *= n;
+ Frequency /= d;
+ return *this;
+}
+
+const BlockFrequency
+BlockFrequency::operator*(const BranchProbability &Prob) const {
+ BlockFrequency Freq(Frequency);
+ Freq *= Prob;
+ return Freq;
+}
+
+BlockFrequency &BlockFrequency::operator+=(const BlockFrequency &Freq) {
+ uint64_t Before = Freq.Frequency;
+ Frequency += Freq.Frequency;
+
+ // If overflow, set frequency to the maximum value.
+ if (Frequency < Before)
+ Frequency = UINT64_MAX;
+
+ return *this;
+}
+
+const BlockFrequency
+BlockFrequency::operator+(const BlockFrequency &Prob) const {
+ BlockFrequency Freq(Frequency);
+ Freq += Prob;
+ return Freq;
+}
+
+void BlockFrequency::print(raw_ostream &OS) const {
+ OS << Frequency;
+}
+
+namespace llvm {
+
+raw_ostream &operator<<(raw_ostream &OS, const BlockFrequency &Freq) {
+ Freq.print(OS);
+ return OS;
+}
+
+}
diff --git a/lib/Support/BranchProbability.cpp b/lib/Support/BranchProbability.cpp
index 97342da..49d04ed 100644
--- a/lib/Support/BranchProbability.cpp
+++ b/lib/Support/BranchProbability.cpp
@@ -24,9 +24,8 @@ BranchProbability::BranchProbability(uint32_t n, uint32_t d) {
D = d;
}
-raw_ostream &BranchProbability::print(raw_ostream &OS) const {
+void BranchProbability::print(raw_ostream &OS) const {
OS << N << " / " << D << " = " << ((double)N / D);
- return OS;
}
void BranchProbability::dump() const {
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index 867d930..63a833c 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -9,11 +9,13 @@ add_llvm_library(LLVMSupport
APInt.cpp
APSInt.cpp
Allocator.cpp
+ BlockFrequency.cpp
BranchProbability.cpp
circular_raw_ostream.cpp
CommandLine.cpp
ConstantRange.cpp
CrashRecoveryContext.cpp
+ DataExtractor.cpp
Debug.cpp
DeltaAlgorithm.cpp
DAGDeltaAlgorithm.cpp
@@ -42,7 +44,6 @@ add_llvm_library(LLVMSupport
StringPool.cpp
StringRef.cpp
SystemUtils.cpp
- TargetRegistry.cpp
Timer.cpp
ToolOutputFile.cpp
Triple.cpp
@@ -72,6 +73,7 @@ add_llvm_library(LLVMSupport
SearchForAddressOfSpecialSymbol.cpp
Signals.cpp
system_error.cpp
+ TargetRegistry.cpp
ThreadLocal.cpp
Threading.cpp
TimeValue.cpp
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 2914337..238adcc 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -23,7 +23,6 @@
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/system_error.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/Path.h"
#include "llvm/ADT/OwningPtr.h"
@@ -45,6 +44,7 @@ TEMPLATE_INSTANTIATION(class basic_parser<bool>);
TEMPLATE_INSTANTIATION(class basic_parser<boolOrDefault>);
TEMPLATE_INSTANTIATION(class basic_parser<int>);
TEMPLATE_INSTANTIATION(class basic_parser<unsigned>);
+TEMPLATE_INSTANTIATION(class basic_parser<unsigned long long>);
TEMPLATE_INSTANTIATION(class basic_parser<double>);
TEMPLATE_INSTANTIATION(class basic_parser<float>);
TEMPLATE_INSTANTIATION(class basic_parser<std::string>);
@@ -63,6 +63,7 @@ void parser<bool>::anchor() {}
void parser<boolOrDefault>::anchor() {}
void parser<int>::anchor() {}
void parser<unsigned>::anchor() {}
+void parser<unsigned long long>::anchor() {}
void parser<double>::anchor() {}
void parser<float>::anchor() {}
void parser<std::string>::anchor() {}
@@ -1006,6 +1007,16 @@ bool parser<unsigned>::parse(Option &O, StringRef ArgName,
return false;
}
+// parser<unsigned long long> implementation
+//
+bool parser<unsigned long long>::parse(Option &O, StringRef ArgName,
+ StringRef Arg, unsigned long long &Value){
+
+ if (Arg.getAsInteger(0, Value))
+ return O.error("'" + Arg + "' value invalid for uint argument!");
+ return false;
+}
+
// parser<double>/parser<float> implementation
//
static bool parseDouble(Option &O, StringRef Arg, double &Value) {
@@ -1151,6 +1162,7 @@ PRINT_OPT_DIFF(bool)
PRINT_OPT_DIFF(boolOrDefault)
PRINT_OPT_DIFF(int)
PRINT_OPT_DIFF(unsigned)
+PRINT_OPT_DIFF(unsigned long long)
PRINT_OPT_DIFF(double)
PRINT_OPT_DIFF(float)
PRINT_OPT_DIFF(char)
@@ -1330,10 +1342,7 @@ void cl::PrintOptionValues() {
static void (*OverrideVersionPrinter)() = 0;
-static int TargetArraySortFn(const void *LHS, const void *RHS) {
- typedef std::pair<const char *, const Target*> pair_ty;
- return strcmp(((const pair_ty*)LHS)->first, ((const pair_ty*)RHS)->first);
-}
+static std::vector<void (*)()>* ExtraVersionPrinters = 0;
namespace {
class VersionPrinter {
@@ -1361,37 +1370,27 @@ public:
<< " Built " << __DATE__ << " (" << __TIME__ << ").\n"
#endif
<< " Host: " << sys::getHostTriple() << '\n'
- << " Host CPU: " << CPU << '\n'
- << '\n'
- << " Registered Targets:\n";
-
- std::vector<std::pair<const char *, const Target*> > Targets;
- size_t Width = 0;
- for (TargetRegistry::iterator it = TargetRegistry::begin(),
- ie = TargetRegistry::end(); it != ie; ++it) {
- Targets.push_back(std::make_pair(it->getName(), &*it));
- Width = std::max(Width, strlen(Targets.back().first));
- }
- if (!Targets.empty())
- qsort(&Targets[0], Targets.size(), sizeof(Targets[0]),
- TargetArraySortFn);
-
- for (unsigned i = 0, e = Targets.size(); i != e; ++i) {
- OS << " " << Targets[i].first;
- OS.indent(Width - strlen(Targets[i].first)) << " - "
- << Targets[i].second->getShortDescription() << '\n';
- }
- if (Targets.empty())
- OS << " (none)\n";
+ << " Host CPU: " << CPU << '\n';
}
void operator=(bool OptionWasSpecified) {
if (!OptionWasSpecified) return;
- if (OverrideVersionPrinter == 0) {
- print();
+ if (OverrideVersionPrinter != 0) {
+ (*OverrideVersionPrinter)();
exit(1);
}
- (*OverrideVersionPrinter)();
+ print();
+
+ // Iterate over any registered extra printers and call them to add further
+ // information.
+ if (ExtraVersionPrinters != 0) {
+ outs() << '\n';
+ for (std::vector<void (*)()>::iterator I = ExtraVersionPrinters->begin(),
+ E = ExtraVersionPrinters->end();
+ I != E; ++I)
+ (*I)();
+ }
+
exit(1);
}
};
@@ -1424,3 +1423,10 @@ void cl::PrintVersionMessage() {
void cl::SetVersionPrinter(void (*func)()) {
OverrideVersionPrinter = func;
}
+
+void cl::AddExtraVersionPrinter(void (*func)()) {
+ if (ExtraVersionPrinters == 0)
+ ExtraVersionPrinters = new std::vector<void (*)()>;
+
+ ExtraVersionPrinters->push_back(func);
+}
diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp
index 81382d0..c29cb53 100644
--- a/lib/Support/ConstantRange.cpp
+++ b/lib/Support/ConstantRange.cpp
@@ -21,11 +21,10 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Constants.h"
+#include "llvm/InstrTypes.h"
#include "llvm/Support/ConstantRange.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Instructions.h"
using namespace llvm;
/// Initialize a full (the default) or empty set for the specified type.
@@ -56,56 +55,56 @@ ConstantRange ConstantRange::makeICmpRegion(unsigned Pred,
uint32_t W = CR.getBitWidth();
switch (Pred) {
- default: assert(!"Invalid ICmp predicate to makeICmpRegion()");
- case ICmpInst::ICMP_EQ:
+ default: assert(0 && "Invalid ICmp predicate to makeICmpRegion()");
+ case CmpInst::ICMP_EQ:
return CR;
- case ICmpInst::ICMP_NE:
+ case CmpInst::ICMP_NE:
if (CR.isSingleElement())
return ConstantRange(CR.getUpper(), CR.getLower());
return ConstantRange(W);
- case ICmpInst::ICMP_ULT: {
+ case CmpInst::ICMP_ULT: {
APInt UMax(CR.getUnsignedMax());
if (UMax.isMinValue())
return ConstantRange(W, /* empty */ false);
return ConstantRange(APInt::getMinValue(W), UMax);
}
- case ICmpInst::ICMP_SLT: {
+ case CmpInst::ICMP_SLT: {
APInt SMax(CR.getSignedMax());
if (SMax.isMinSignedValue())
return ConstantRange(W, /* empty */ false);
return ConstantRange(APInt::getSignedMinValue(W), SMax);
}
- case ICmpInst::ICMP_ULE: {
+ case CmpInst::ICMP_ULE: {
APInt UMax(CR.getUnsignedMax());
if (UMax.isMaxValue())
return ConstantRange(W);
return ConstantRange(APInt::getMinValue(W), UMax + 1);
}
- case ICmpInst::ICMP_SLE: {
+ case CmpInst::ICMP_SLE: {
APInt SMax(CR.getSignedMax());
if (SMax.isMaxSignedValue())
return ConstantRange(W);
return ConstantRange(APInt::getSignedMinValue(W), SMax + 1);
}
- case ICmpInst::ICMP_UGT: {
+ case CmpInst::ICMP_UGT: {
APInt UMin(CR.getUnsignedMin());
if (UMin.isMaxValue())
return ConstantRange(W, /* empty */ false);
return ConstantRange(UMin + 1, APInt::getNullValue(W));
}
- case ICmpInst::ICMP_SGT: {
+ case CmpInst::ICMP_SGT: {
APInt SMin(CR.getSignedMin());
if (SMin.isMaxSignedValue())
return ConstantRange(W, /* empty */ false);
return ConstantRange(SMin + 1, APInt::getSignedMinValue(W));
}
- case ICmpInst::ICMP_UGE: {
+ case CmpInst::ICMP_UGE: {
APInt UMin(CR.getUnsignedMin());
if (UMin.isMinValue())
return ConstantRange(W);
return ConstantRange(UMin, APInt::getNullValue(W));
}
- case ICmpInst::ICMP_SGE: {
+ case CmpInst::ICMP_SGE: {
APInt SMin(CR.getSignedMin());
if (SMin.isMinSignedValue())
return ConstantRange(W);
diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp
index 899c389..263114c 100644
--- a/lib/Support/CrashRecoveryContext.cpp
+++ b/lib/Support/CrashRecoveryContext.cpp
@@ -12,6 +12,7 @@
#include "llvm/Config/config.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/ThreadLocal.h"
+#include "llvm/Support/ErrorHandling.h"
#include <setjmp.h>
#include <cstdio>
using namespace llvm;
@@ -123,7 +124,56 @@ CrashRecoveryContext::unregisterCleanup(CrashRecoveryContextCleanup *cleanup) {
#ifdef LLVM_ON_WIN32
-// FIXME: No real Win32 implementation currently.
+#include "Windows/Windows.h"
+
+// On Windows, we can make use of vectored exception handling to
+// catch most crashing situations. Note that this does mean
+// we will be alerted of exceptions *before* structured exception
+// handling has the opportunity to catch it. But that isn't likely
+// to cause problems because nowhere in the project is SEH being
+// used.
+//
+// Vectored exception handling is built on top of SEH, and so it
+// works on a per-thread basis.
+//
+// The vectored exception handler functionality was added in Windows
+// XP, so if support for older versions of Windows is required,
+// it will have to be added.
+//
+// If we want to support as far back as Win2k, we could use the
+// SetUnhandledExceptionFilter API, but there's a risk of that
+// being entirely overwritten (it's not a chain).
+
+static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo)
+{
+ // Lookup the current thread local recovery object.
+ const CrashRecoveryContextImpl *CRCI = CurrentContext.get();
+
+ if (!CRCI) {
+ // Something has gone horribly wrong, so let's just tell everyone
+ // to keep searching
+ CrashRecoveryContext::Disable();
+ return EXCEPTION_CONTINUE_SEARCH;
+ }
+
+ // TODO: We can capture the stack backtrace here and store it on the
+ // implementation if we so choose.
+
+ // Handle the crash
+ const_cast<CrashRecoveryContextImpl*>(CRCI)->HandleCrash();
+
+ // Note that we don't actually get here because HandleCrash calls
+ // longjmp, which means the HandleCrash function never returns.
+ llvm_unreachable("Handled the crash, should have longjmp'ed out of here");
+ return EXCEPTION_CONTINUE_SEARCH;
+}
+
+// Because the Enable and Disable calls are static, it means that
+// there may not actually be an Impl available, or even a current
+// CrashRecoveryContext at all. So we make use of a thread-local
+// exception table. The handles contained in here will either be
+// non-NULL, valid VEH handles, or NULL.
+static sys::ThreadLocal<const void> sCurrentExceptionHandle;
void CrashRecoveryContext::Enable() {
sys::ScopedLock L(gCrashRecoveryContexMutex);
@@ -132,6 +182,13 @@ void CrashRecoveryContext::Enable() {
return;
gCrashRecoveryEnabled = true;
+
+ // We can set up vectored exception handling now. We will install our
+ // handler as the front of the list, though there's no assurances that
+ // it will remain at the front (another call could install itself before
+ // our handler). This 1) isn't likely, and 2) shouldn't cause problems.
+ PVOID handle = ::AddVectoredExceptionHandler(1, ExceptionHandler);
+ sCurrentExceptionHandle.set(handle);
}
void CrashRecoveryContext::Disable() {
@@ -141,6 +198,15 @@ void CrashRecoveryContext::Disable() {
return;
gCrashRecoveryEnabled = false;
+
+ PVOID currentHandle = const_cast<PVOID>(sCurrentExceptionHandle.get());
+ if (currentHandle) {
+ // Now we can remove the vectored exception handler from the chain
+ ::RemoveVectoredExceptionHandler(currentHandle);
+
+ // Reset the handle in our thread-local set.
+ sCurrentExceptionHandle.set(NULL);
+ }
}
#else
diff --git a/lib/Support/DataExtractor.cpp b/lib/Support/DataExtractor.cpp
new file mode 100644
index 0000000..b946c1d
--- /dev/null
+++ b/lib/Support/DataExtractor.cpp
@@ -0,0 +1,175 @@
+//===-- DataExtractor.cpp -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/SwapByteOrder.h"
+using namespace llvm;
+
+template <typename T>
+static T getU(uint32_t *offset_ptr, const DataExtractor *de,
+ bool isLittleEndian, const char *Data) {
+ T val = 0;
+ uint32_t offset = *offset_ptr;
+ if (de->isValidOffsetForDataOfSize(offset, sizeof(val))) {
+ std::memcpy(&val, &Data[offset], sizeof(val));
+ if (sys::isLittleEndianHost() != isLittleEndian)
+ val = sys::SwapByteOrder(val);
+
+ // Advance the offset
+ *offset_ptr += sizeof(val);
+ }
+ return val;
+}
+
+template <typename T>
+static T *getUs(uint32_t *offset_ptr, T *dst, uint32_t count,
+ const DataExtractor *de, bool isLittleEndian, const char *Data){
+ uint32_t offset = *offset_ptr;
+
+ if (count > 0 && de->isValidOffsetForDataOfSize(offset, sizeof(*dst)*count)) {
+ for (T *value_ptr = dst, *end = dst + count; value_ptr != end;
+ ++value_ptr, offset += sizeof(*dst))
+ *value_ptr = getU<T>(offset_ptr, de, isLittleEndian, Data);
+ // Advance the offset
+ *offset_ptr = offset;
+ // Return a non-NULL pointer to the converted data as an indicator of
+ // success
+ return dst;
+ }
+ return NULL;
+}
+
+uint8_t DataExtractor::getU8(uint32_t *offset_ptr) const {
+ return getU<uint8_t>(offset_ptr, this, IsLittleEndian, Data.data());
+}
+
+uint8_t *
+DataExtractor::getU8(uint32_t *offset_ptr, uint8_t *dst, uint32_t count) const {
+ return getUs<uint8_t>(offset_ptr, dst, count, this, IsLittleEndian,
+ Data.data());
+}
+
+
+uint16_t DataExtractor::getU16(uint32_t *offset_ptr) const {
+ return getU<uint16_t>(offset_ptr, this, IsLittleEndian, Data.data());
+}
+
+uint16_t *DataExtractor::getU16(uint32_t *offset_ptr, uint16_t *dst,
+ uint32_t count) const {
+ return getUs<uint16_t>(offset_ptr, dst, count, this, IsLittleEndian,
+ Data.data());
+}
+
+uint32_t DataExtractor::getU32(uint32_t *offset_ptr) const {
+ return getU<uint32_t>(offset_ptr, this, IsLittleEndian, Data.data());
+}
+
+uint32_t *DataExtractor::getU32(uint32_t *offset_ptr, uint32_t *dst,
+ uint32_t count) const {
+ return getUs<uint32_t>(offset_ptr, dst, count, this, IsLittleEndian,
+ Data.data());;
+}
+
+uint64_t DataExtractor::getU64(uint32_t *offset_ptr) const {
+ return getU<uint64_t>(offset_ptr, this, IsLittleEndian, Data.data());
+}
+
+uint64_t *DataExtractor::getU64(uint32_t *offset_ptr, uint64_t *dst,
+ uint32_t count) const {
+ return getUs<uint64_t>(offset_ptr, dst, count, this, IsLittleEndian,
+ Data.data());
+}
+
+uint64_t
+DataExtractor::getUnsigned(uint32_t *offset_ptr, uint32_t byte_size) const {
+ switch (byte_size) {
+ case 1:
+ return getU8(offset_ptr);
+ case 2:
+ return getU16(offset_ptr);
+ case 4:
+ return getU32(offset_ptr);
+ case 8:
+ return getU64(offset_ptr);
+ }
+ llvm_unreachable("getUnsigned unhandled case!");
+}
+
+int64_t
+DataExtractor::getSigned(uint32_t *offset_ptr, uint32_t byte_size) const {
+ switch (byte_size) {
+ case 1:
+ return (int8_t)getU8(offset_ptr);
+ case 2:
+ return (int16_t)getU16(offset_ptr);
+ case 4:
+ return (int32_t)getU32(offset_ptr);
+ case 8:
+ return (int64_t)getU64(offset_ptr);
+ }
+ llvm_unreachable("getSigned unhandled case!");
+}
+
+const char *DataExtractor::getCStr(uint32_t *offset_ptr) const {
+ uint32_t offset = *offset_ptr;
+ StringRef::size_type pos = Data.find('\0', offset);
+ if (pos != StringRef::npos) {
+ *offset_ptr = pos + 1;
+ return Data.data() + offset;
+ }
+ return NULL;
+}
+
+uint64_t DataExtractor::getULEB128(uint32_t *offset_ptr) const {
+ uint64_t result = 0;
+ if (Data.empty())
+ return 0;
+
+ unsigned shift = 0;
+ uint32_t offset = *offset_ptr;
+ uint8_t byte = 0;
+
+ while (isValidOffset(offset)) {
+ byte = Data[offset++];
+ result |= (byte & 0x7f) << shift;
+ shift += 7;
+ if ((byte & 0x80) == 0)
+ break;
+ }
+
+ *offset_ptr = offset;
+ return result;
+}
+
+int64_t DataExtractor::getSLEB128(uint32_t *offset_ptr) const {
+ int64_t result = 0;
+ if (Data.empty())
+ return 0;
+
+ unsigned shift = 0;
+ uint32_t offset = *offset_ptr;
+ uint8_t byte = 0;
+
+ while (isValidOffset(offset)) {
+ byte = Data[offset++];
+ result |= (byte & 0x7f) << shift;
+ shift += 7;
+ if ((byte & 0x80) == 0)
+ break;
+ }
+
+ // Sign bit of byte is 2nd high order bit (0x40)
+ if (shift < 64 && (byte & 0x40))
+ result |= -(1 << shift);
+
+ *offset_ptr = offset;
+ return result;
+}
diff --git a/lib/Support/Disassembler.cpp b/lib/Support/Disassembler.cpp
index 6362aff..c6d73bc 100644
--- a/lib/Support/Disassembler.cpp
+++ b/lib/Support/Disassembler.cpp
@@ -1,4 +1,4 @@
-//===- lib/System/Disassembler.cpp ------------------------------*- C++ -*-===//
+//===- lib/Support/Disassembler.cpp -----------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp
index 0813321..95a9550 100644
--- a/lib/Support/Dwarf.cpp
+++ b/lib/Support/Dwarf.cpp
@@ -82,6 +82,19 @@ const char *llvm::dwarf::TagString(unsigned Tag) {
case DW_TAG_arg_variable: return "DW_TAG_arg_variable";
case DW_TAG_return_variable: return "DW_TAG_return_variable";
case DW_TAG_vector_type: return "DW_TAG_vector_type";
+ case DW_TAG_rvalue_reference_type: return "DW_TAG_rvalue_reference_type";
+ case DW_TAG_template_alias: return "DW_TAG_template_alias";
+ case DW_TAG_MIPS_loop: return "DW_TAG_MIPS_loop";
+ case DW_TAG_type_unit: return "DW_TAG_type_unit";
+ case DW_TAG_format_label: return "DW_TAG_format_label";
+ case DW_TAG_function_template: return "DW_TAG_function_template";
+ case DW_TAG_class_template: return "DW_TAG_class_template";
+ case DW_TAG_GNU_template_template_param:
+ return "DW_TAG_GNU_template_template_param";
+ case DW_TAG_GNU_template_parameter_pack:
+ return "DW_TAG_GNU_template_parameter_pack";
+ case DW_TAG_GNU_formal_parameter_pack:
+ return "DW_TAG_GNU_formal_parameter_pack";
}
return 0;
}
@@ -186,7 +199,30 @@ const char *llvm::dwarf::AttributeString(unsigned Attribute) {
case DW_AT_elemental: return "DW_AT_elemental";
case DW_AT_pure: return "DW_AT_pure";
case DW_AT_recursive: return "DW_AT_recursive";
+ case DW_AT_signature: return "DW_AT_signature";
+ case DW_AT_main_subprogram: return "DW_AT_main_subprogram";
+ case DW_AT_data_bit_offset: return "DW_AT_data_bit_offset";
+ case DW_AT_const_expr: return "DW_AT_const_expr";
+ case DW_AT_enum_class: return "DW_AT_enum_class";
+ case DW_AT_linkage_name: return "DW_AT_linkage_name";
+ case DW_AT_MIPS_loop_begin: return "DW_AT_MIPS_loop_begin";
+ case DW_AT_MIPS_tail_loop_begin: return "DW_AT_MIPS_tail_loop_begin";
+ case DW_AT_MIPS_epilog_begin: return "DW_AT_MIPS_epilog_begin";
+ case DW_AT_MIPS_loop_unroll_factor: return "DW_AT_MIPS_loop_unroll_factor";
+ case DW_AT_MIPS_software_pipeline_depth:
+ return "DW_AT_MIPS_software_pipeline_depth";
case DW_AT_MIPS_linkage_name: return "DW_AT_MIPS_linkage_name";
+ case DW_AT_MIPS_stride: return "DW_AT_MIPS_stride";
+ case DW_AT_MIPS_abstract_name: return "DW_AT_MIPS_abstract_name";
+ case DW_AT_MIPS_clone_origin: return "DW_AT_MIPS_clone_origin";
+ case DW_AT_MIPS_has_inlines: return "DW_AT_MIPS_has_inlines";
+ case DW_AT_MIPS_stride_byte: return "DW_AT_MIPS_stride_byte";
+ case DW_AT_MIPS_stride_elem: return "DW_AT_MIPS_stride_elem";
+ case DW_AT_MIPS_ptr_dopetype: return "DW_AT_MIPS_ptr_dopetype";
+ case DW_AT_MIPS_allocatable_dopetype:
+ return "DW_AT_MIPS_allocatable_dopetype";
+ case DW_AT_MIPS_assumed_shape_dopetype:
+ return "DW_AT_MIPS_assumed_shape_dopetype";
case DW_AT_sf_names: return "DW_AT_sf_names";
case DW_AT_src_info: return "DW_AT_src_info";
case DW_AT_mac_info: return "DW_AT_mac_info";
@@ -194,6 +230,8 @@ const char *llvm::dwarf::AttributeString(unsigned Attribute) {
case DW_AT_body_begin: return "DW_AT_body_begin";
case DW_AT_body_end: return "DW_AT_body_end";
case DW_AT_GNU_vector: return "DW_AT_GNU_vector";
+ case DW_AT_GNU_template_name: return "DW_AT_GNU_template_name";
+ case DW_AT_MIPS_assumed_size: return "DW_AT_MIPS_assumed_size";
case DW_AT_lo_user: return "DW_AT_lo_user";
case DW_AT_hi_user: return "DW_AT_hi_user";
case DW_AT_APPLE_optimized: return "DW_AT_APPLE_optimized";
@@ -237,6 +275,10 @@ const char *llvm::dwarf::FormEncodingString(unsigned Encoding) {
case DW_FORM_ref8: return "DW_FORM_ref8";
case DW_FORM_ref_udata: return "DW_FORM_ref_udata";
case DW_FORM_indirect: return "DW_FORM_indirect";
+ case DW_FORM_sec_offset: return "DW_FORM_sec_offset";
+ case DW_FORM_exprloc: return "DW_FORM_exprloc";
+ case DW_FORM_flag_present: return "DW_FORM_flag_present";
+ case DW_FORM_ref_sig8: return "DW_FORM_ref_sig8";
}
return 0;
}
@@ -397,6 +439,8 @@ const char *llvm::dwarf::OperationEncodingString(unsigned Encoding) {
case DW_OP_form_tls_address: return "DW_OP_form_tls_address";
case DW_OP_call_frame_cfa: return "DW_OP_call_frame_cfa";
case DW_OP_bit_piece: return "DW_OP_bit_piece";
+ case DW_OP_implicit_value: return "DW_OP_implicit_value";
+ case DW_OP_stack_value: return "DW_OP_stack_value";
case DW_OP_lo_user: return "DW_OP_lo_user";
case DW_OP_hi_user: return "DW_OP_hi_user";
}
@@ -416,6 +460,7 @@ const char *llvm::dwarf::AttributeEncodingString(unsigned Encoding) {
case DW_ATE_unsigned: return "DW_ATE_unsigned";
case DW_ATE_unsigned_char: return "DW_ATE_unsigned_char";
case DW_ATE_imaginary_float: return "DW_ATE_imaginary_float";
+ case DW_ATE_UTF: return "DW_ATE_UTF";
case DW_ATE_packed_decimal: return "DW_ATE_packed_decimal";
case DW_ATE_numeric_string: return "DW_ATE_numeric_string";
case DW_ATE_edited: return "DW_ATE_edited";
@@ -602,6 +647,7 @@ const char *llvm::dwarf::LNExtendedString(unsigned Encoding) {
case DW_LNE_end_sequence: return "DW_LNE_end_sequence";
case DW_LNE_set_address: return "DW_LNE_set_address";
case DW_LNE_define_file: return "DW_LNE_define_file";
+ case DW_LNE_set_discriminator: return "DW_LNE_set_discriminator";
case DW_LNE_lo_user: return "DW_LNE_lo_user";
case DW_LNE_hi_user: return "DW_LNE_hi_user";
}
@@ -651,6 +697,9 @@ const char *llvm::dwarf::CallFrameString(unsigned Encoding) {
case DW_CFA_val_offset: return "DW_CFA_val_offset";
case DW_CFA_val_offset_sf: return "DW_CFA_val_offset_sf";
case DW_CFA_val_expression: return "DW_CFA_val_expression";
+ case DW_CFA_MIPS_advance_loc8: return "DW_CFA_MIPS_advance_loc8";
+ case DW_CFA_GNU_window_save: return "DW_CFA_GNU_window_save";
+ case DW_CFA_GNU_args_size: return "DW_CFA_GNU_args_size";
case DW_CFA_lo_user: return "DW_CFA_lo_user";
case DW_CFA_hi_user: return "DW_CFA_hi_user";
}
diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp
index 455c380..fb02c07 100644
--- a/lib/Support/DynamicLibrary.cpp
+++ b/lib/Support/DynamicLibrary.cpp
@@ -9,28 +9,26 @@
//
// This header file implements the operating system DynamicLibrary concept.
//
-// FIXME: This file leaks the ExplicitSymbols and OpenedHandles vector, and is
-// not thread safe!
+// FIXME: This file leaks ExplicitSymbols and OpenedHandles!
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Config/config.h"
#include <cstdio>
#include <cstring>
-#include <map>
-#include <vector>
// Collection of symbol name/value pairs to be searched prior to any libraries.
-static std::map<std::string, void*> *ExplicitSymbols = 0;
+static llvm::StringMap<void *> *ExplicitSymbols = 0;
namespace {
struct ExplicitSymbolsDeleter {
~ExplicitSymbolsDeleter() {
- if (ExplicitSymbols)
- delete ExplicitSymbols;
+ delete ExplicitSymbols;
}
};
@@ -38,13 +36,22 @@ struct ExplicitSymbolsDeleter {
static ExplicitSymbolsDeleter Dummy;
-void llvm::sys::DynamicLibrary::AddSymbol(const char* symbolName,
+
+static llvm::sys::SmartMutex<true>& getMutex() {
+ static llvm::sys::SmartMutex<true> HandlesMutex;
+ return HandlesMutex;
+}
+
+void llvm::sys::DynamicLibrary::AddSymbol(StringRef symbolName,
void *symbolValue) {
+ SmartScopedLock<true> lock(getMutex());
if (ExplicitSymbols == 0)
- ExplicitSymbols = new std::map<std::string, void*>();
+ ExplicitSymbols = new llvm::StringMap<void*>();
(*ExplicitSymbols)[symbolName] = symbolValue;
}
+char llvm::sys::DynamicLibrary::Invalid = 0;
+
#ifdef LLVM_ON_WIN32
#include "Windows/DynamicLibrary.inc"
@@ -61,66 +68,78 @@ using namespace llvm::sys;
//=== independent code.
//===----------------------------------------------------------------------===//
-static std::vector<void *> *OpenedHandles = 0;
-
-
-static SmartMutex<true>& getMutex() {
- static SmartMutex<true> HandlesMutex;
- return HandlesMutex;
-}
+static DenseSet<void *> *OpenedHandles = 0;
+DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
+ std::string *errMsg) {
+ SmartScopedLock<true> lock(getMutex());
-bool DynamicLibrary::LoadLibraryPermanently(const char *Filename,
- std::string *ErrMsg) {
- void *H = dlopen(Filename, RTLD_LAZY|RTLD_GLOBAL);
- if (H == 0) {
- if (ErrMsg) *ErrMsg = dlerror();
- return true;
+ void *handle = dlopen(filename, RTLD_LAZY|RTLD_GLOBAL);
+ if (handle == 0) {
+ if (errMsg) *errMsg = dlerror();
+ return DynamicLibrary();
}
+
#ifdef __CYGWIN__
// Cygwin searches symbols only in the main
// with the handle of dlopen(NULL, RTLD_GLOBAL).
- if (Filename == NULL)
- H = RTLD_DEFAULT;
+ if (filename == NULL)
+ handle = RTLD_DEFAULT;
#endif
- SmartScopedLock<true> Lock(getMutex());
+
if (OpenedHandles == 0)
- OpenedHandles = new std::vector<void *>();
- OpenedHandles->push_back(H);
- return false;
+ OpenedHandles = new DenseSet<void *>();
+
+ // If we've already loaded this library, dlclose() the handle in order to
+ // keep the internal refcount at +1.
+ if (!OpenedHandles->insert(handle).second)
+ dlclose(handle);
+
+ return DynamicLibrary(handle);
+}
+
+void *DynamicLibrary::getAddressOfSymbol(const char *symbolName) {
+ if (!isValid())
+ return NULL;
+ return dlsym(Data, symbolName);
}
+
#else
using namespace llvm;
using namespace llvm::sys;
-bool DynamicLibrary::LoadLibraryPermanently(const char *Filename,
- std::string *ErrMsg) {
- if (ErrMsg) *ErrMsg = "dlopen() not supported on this platform";
- return true;
+DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
+ std::string *errMsg) {
+ if (errMsg) *errMsg = "dlopen() not supported on this platform";
+ return DynamicLibrary();
}
+
+void *DynamicLibrary::getAddressOfSymbol(const char *symbolName) {
+ return NULL;
+}
+
#endif
namespace llvm {
void *SearchForAddressOfSpecialSymbol(const char* symbolName);
}
-void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
+void* DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) {
+ SmartScopedLock<true> Lock(getMutex());
+
// First check symbols added via AddSymbol().
if (ExplicitSymbols) {
- std::map<std::string, void *>::iterator I =
- ExplicitSymbols->find(symbolName);
- std::map<std::string, void *>::iterator E = ExplicitSymbols->end();
+ StringMap<void *>::iterator i = ExplicitSymbols->find(symbolName);
- if (I != E)
- return I->second;
+ if (i != ExplicitSymbols->end())
+ return i->second;
}
#if HAVE_DLFCN_H
// Now search the libraries.
- SmartScopedLock<true> Lock(getMutex());
if (OpenedHandles) {
- for (std::vector<void *>::iterator I = OpenedHandles->begin(),
+ for (DenseSet<void *>::iterator I = OpenedHandles->begin(),
E = OpenedHandles->end(); I != E; ++I) {
//lt_ptr ptr = lt_dlsym(*I, symbolName);
void *ptr = dlsym(*I, symbolName);
diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp
index 1568342..17b8271 100644
--- a/lib/Support/FoldingSet.cpp
+++ b/lib/Support/FoldingSet.cpp
@@ -64,10 +64,8 @@ void FoldingSetNodeID::AddPointer(const void *Ptr) {
// depend on the host. It doesn't matter however, because hashing on
// pointer values in inherently unstable. Nothing should depend on the
// ordering of nodes in the folding set.
- intptr_t PtrI = (intptr_t)Ptr;
- Bits.push_back(unsigned(PtrI));
- if (sizeof(intptr_t) > sizeof(unsigned))
- Bits.push_back(unsigned(uint64_t(PtrI) >> 32));
+ Bits.append(reinterpret_cast<unsigned *>(&Ptr),
+ reinterpret_cast<unsigned *>(&Ptr+1));
}
void FoldingSetNodeID::AddInteger(signed I) {
Bits.push_back(I);
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index c525a12..a19e4b4 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -213,13 +213,13 @@ std::string sys::getHostCPUName() {
case 30: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
// As found in a Summer 2010 model iMac.
case 37: // Intel Core i7, laptop version.
+ case 44: // Intel Core i7 processor and Intel Xeon processor. All
+ // processors are manufactured using the 32 nm process.
return "corei7";
// SandyBridge:
case 42: // Intel Core i7 processor. All processors are manufactured
// using the 32 nm process.
- case 44: // Intel Core i7 processor and Intel Xeon processor. All
- // processors are manufactured using the 32 nm process.
case 45:
return "corei7-avx";
diff --git a/lib/Support/IncludeFile.cpp b/lib/Support/IncludeFile.cpp
index 5da8826..e67acb3 100644
--- a/lib/Support/IncludeFile.cpp
+++ b/lib/Support/IncludeFile.cpp
@@ -1,4 +1,4 @@
-//===- lib/System/IncludeFile.cpp - Ensure Linking Of Implementation -----===//
+//===- lib/Support/IncludeFile.cpp - Ensure Linking Of Implementation -----===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Support/Memory.cpp b/lib/Support/Memory.cpp
index a9689b2..2a1642a 100644
--- a/lib/Support/Memory.cpp
+++ b/lib/Support/Memory.cpp
@@ -16,6 +16,10 @@
#include "llvm/Support/Valgrind.h"
#include "llvm/Config/config.h"
+#if defined(__mips__)
+#include <sys/cachectl.h>
+#endif
+
namespace llvm {
using namespace sys;
}
@@ -66,6 +70,8 @@ void llvm::sys::Memory::InvalidateInstructionCache(const void *Addr,
char *Start = (char*) Addr;
char *End = Start + Len;
__clear_cache(Start, End);
+# elif defined(__mips__)
+ cacheflush((char*)Addr, Len, BCACHE);
# endif
#endif // end apple
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index d264be9..0771af5 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -275,16 +275,16 @@ static bool shouldUseMmap(int FD,
error_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
OwningPtr<MemoryBuffer> &result,
- size_t FileSize, size_t MapSize,
- off_t Offset,
+ uint64_t FileSize, uint64_t MapSize,
+ int64_t Offset,
bool RequiresNullTerminator) {
static int PageSize = sys::Process::GetPageSize();
// Default is to map the full file.
- if (MapSize == size_t(-1)) {
+ if (MapSize == uint64_t(-1)) {
// If we don't know the file size, use fstat to find out. fstat on an open
// file descriptor is cheaper than stat on a random path.
- if (FileSize == size_t(-1)) {
+ if (FileSize == uint64_t(-1)) {
struct stat FileInfo;
// TODO: This should use fstat64 when available.
if (fstat(FD, &FileInfo) == -1) {
diff --git a/lib/Support/MemoryObject.cpp b/lib/Support/MemoryObject.cpp
index 91e3ecd..b20ab89 100644
--- a/lib/Support/MemoryObject.cpp
+++ b/lib/Support/MemoryObject.cpp
@@ -19,8 +19,11 @@ int MemoryObject::readBytes(uint64_t address,
uint64_t* copied) const {
uint64_t current = address;
uint64_t limit = getBase() + getExtent();
-
- while (current - address < size && current < limit) {
+
+ if (current + size > limit)
+ return -1;
+
+ while (current - address < size) {
if (readByte(current, &buf[(current - address)]))
return -1;
diff --git a/lib/Support/Mutex.cpp b/lib/Support/Mutex.cpp
index b408973..8874e94 100644
--- a/lib/Support/Mutex.cpp
+++ b/lib/Support/Mutex.cpp
@@ -152,6 +152,6 @@ MutexImpl::tryacquire()
#elif defined( LLVM_ON_WIN32)
#include "Windows/Mutex.inc"
#else
-#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/Mutex.cpp
+#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in Support/Mutex.cpp
#endif
#endif
diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp
index 8fbaf2d..e5b7cd3 100644
--- a/lib/Support/Path.cpp
+++ b/lib/Support/Path.cpp
@@ -121,7 +121,7 @@ sys::IdentifyFileType(const char *magic, unsigned length) {
case 7: return Mach_O_DynamicLinker_FileType;
case 8: return Mach_O_Bundle_FileType;
case 9: return Mach_O_DynamicallyLinkedSharedLibStub_FileType;
- case 10: break; // FIXME: MH_DSYM companion file with only debug.
+ case 10: return Mach_O_DSYMCompanion_FileType;
}
break;
}
diff --git a/lib/Support/PathV2.cpp b/lib/Support/PathV2.cpp
index 896c94c..bebe442 100644
--- a/lib/Support/PathV2.cpp
+++ b/lib/Support/PathV2.cpp
@@ -490,6 +490,36 @@ bool is_separator(char value) {
}
}
+void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result) {
+ result.clear();
+
+ // Check whether the temporary directory is specified by an environment
+ // variable.
+ const char *EnvironmentVariable;
+#ifdef LLVM_ON_WIN32
+ EnvironmentVariable = "TEMP";
+#else
+ EnvironmentVariable = "TMPDIR";
+#endif
+ if (char *RequestedDir = getenv(EnvironmentVariable)) {
+ result.append(RequestedDir, RequestedDir + strlen(RequestedDir));
+ return;
+ }
+
+ // Fall back to a system default.
+ const char *DefaultResult;
+#ifdef LLVM_ON_WIN32
+ (void)erasedOnReboot;
+ DefaultResult = "C:\\TEMP";
+#else
+ if (erasedOnReboot)
+ DefaultResult = "/tmp";
+ else
+ DefaultResult = "/var/tmp";
+#endif
+ result.append(DefaultResult, DefaultResult + strlen(DefaultResult));
+}
+
bool has_root_name(const Twine &path) {
SmallString<128> path_storage;
StringRef p = path.toStringRef(path_storage);
@@ -626,7 +656,7 @@ error_code create_directories(const Twine &path, bool &existed) {
if (error_code ec = fs::exists(parent, parent_exists)) return ec;
if (!parent_exists)
- return create_directories(parent, existed);
+ if (error_code ec = create_directories(parent, existed)) return ec;
return create_directory(p, existed);
}
@@ -682,14 +712,12 @@ bool is_other(file_status status) {
!is_symlink(status);
}
-void directory_entry::replace_filename(const Twine &filename, file_status st,
- file_status symlink_st) {
+void directory_entry::replace_filename(const Twine &filename, file_status st) {
SmallString<128> path(Path.begin(), Path.end());
path::remove_filename(path);
path::append(path, filename);
Path = path.str();
Status = st;
- SymlinkStatus = symlink_st;
}
error_code has_magic(const Twine &path, const Twine &magic, bool &result) {
diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp
index 082b701..ef33073 100644
--- a/lib/Support/PrettyStackTrace.cpp
+++ b/lib/Support/PrettyStackTrace.cpp
@@ -62,7 +62,7 @@ extern "C" {
CRASH_REPORTER_CLIENT_HIDDEN
struct crashreporter_annotations_t gCRAnnotations
__attribute__((section("__DATA," CRASHREPORTER_ANNOTATIONS_SECTION)))
- = { CRASHREPORTER_ANNOTATIONS_VERSION, 0, 0, 0, 0 };
+ = { CRASHREPORTER_ANNOTATIONS_VERSION, 0, 0, 0, 0, 0, 0 };
}
#elif defined (__APPLE__) && HAVE_CRASHREPORTER_INFO
static const char *__crashreporter_info__ = 0;
diff --git a/lib/Support/RWMutex.cpp b/lib/Support/RWMutex.cpp
index fc02f9c..d0b1e10 100644
--- a/lib/Support/RWMutex.cpp
+++ b/lib/Support/RWMutex.cpp
@@ -152,6 +152,6 @@ RWMutexImpl::writer_release()
#elif defined( LLVM_ON_WIN32)
#include "Windows/RWMutex.inc"
#else
-#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/Mutex.cpp
+#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in Support/Mutex.cpp
#endif
#endif
diff --git a/lib/Support/SearchForAddressOfSpecialSymbol.cpp b/lib/Support/SearchForAddressOfSpecialSymbol.cpp
index d638301..2d23902 100644
--- a/lib/Support/SearchForAddressOfSpecialSymbol.cpp
+++ b/lib/Support/SearchForAddressOfSpecialSymbol.cpp
@@ -28,21 +28,6 @@ static void *DoSearch(const char* symbolName) {
#ifdef __APPLE__
{
- EXPLICIT_SYMBOL(__ashldi3);
- EXPLICIT_SYMBOL(__ashrdi3);
- EXPLICIT_SYMBOL(__cmpdi2);
- EXPLICIT_SYMBOL(__divdi3);
- EXPLICIT_SYMBOL(__fixdfdi);
- EXPLICIT_SYMBOL(__fixsfdi);
- EXPLICIT_SYMBOL(__fixunsdfdi);
- EXPLICIT_SYMBOL(__fixunssfdi);
- EXPLICIT_SYMBOL(__floatdidf);
- EXPLICIT_SYMBOL(__floatdisf);
- EXPLICIT_SYMBOL(__lshrdi3);
- EXPLICIT_SYMBOL(__moddi3);
- EXPLICIT_SYMBOL(__udivdi3);
- EXPLICIT_SYMBOL(__umoddi3);
-
// __eprintf is sometimes used for assert() handling on x86.
//
// FIXME: Currently disabled when using Clang, as we don't always have our
diff --git a/lib/Support/StringExtras.cpp b/lib/Support/StringExtras.cpp
index eb2fa08..49c5ac4 100644
--- a/lib/Support/StringExtras.cpp
+++ b/lib/Support/StringExtras.cpp
@@ -51,11 +51,10 @@ std::pair<StringRef, StringRef> llvm::getToken(StringRef Source,
void llvm::SplitString(StringRef Source,
SmallVectorImpl<StringRef> &OutFragments,
StringRef Delimiters) {
- StringRef S2, S;
- tie(S2, S) = getToken(Source, Delimiters);
- while (!S2.empty()) {
- OutFragments.push_back(S2);
- tie(S2, S) = getToken(S, Delimiters);
+ std::pair<StringRef, StringRef> S = getToken(Source, Delimiters);
+ while (!S.first.empty()) {
+ OutFragments.push_back(S.first);
+ S = getToken(S.second, Delimiters);
}
}
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index 8c3fc09..b5b4f94 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -46,12 +46,12 @@ int StringRef::compare_lower(StringRef RHS) const {
/// compare_numeric - Compare strings, handle embedded numbers.
int StringRef::compare_numeric(StringRef RHS) const {
for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) {
- if (Data[I] == RHS.Data[I])
- continue;
+ // Check for sequences of digits.
if (ascii_isdigit(Data[I]) && ascii_isdigit(RHS.Data[I])) {
- // The longer sequence of numbers is larger. This doesn't really handle
- // prefixed zeros well.
- for (size_t J = I+1; J != E+1; ++J) {
+ // The longer sequence of numbers is considered larger.
+ // This doesn't really handle prefixed zeros well.
+ size_t J;
+ for (J = I + 1; J != E + 1; ++J) {
bool ld = J < Length && ascii_isdigit(Data[J]);
bool rd = J < RHS.Length && ascii_isdigit(RHS.Data[J]);
if (ld != rd)
@@ -59,8 +59,15 @@ int StringRef::compare_numeric(StringRef RHS) const {
if (!rd)
break;
}
+ // The two number sequences have the same length (J-I), just memcmp them.
+ if (int Res = compareMemory(Data + I, RHS.Data + I, J - I))
+ return Res < 0 ? -1 : 1;
+ // Identical number sequences, continue search after the numbers.
+ I = J - 1;
+ continue;
}
- return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1;
+ if (Data[I] != RHS.Data[I])
+ return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1;
}
if (Length == RHS.Length)
return 0;
diff --git a/lib/Support/TargetRegistry.cpp b/lib/Support/TargetRegistry.cpp
index 293a5d7..7497bfe 100644
--- a/lib/Support/TargetRegistry.cpp
+++ b/lib/Support/TargetRegistry.cpp
@@ -7,9 +7,13 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Host.h"
+#include "llvm/Support/raw_ostream.h"
#include <cassert>
+#include <vector>
using namespace llvm;
// Clients are responsible for avoid race conditions in registration.
@@ -90,3 +94,29 @@ const Target *TargetRegistry::getClosestTargetForJIT(std::string &Error) {
return TheTarget;
}
+static int TargetArraySortFn(const void *LHS, const void *RHS) {
+ typedef std::pair<StringRef, const Target*> pair_ty;
+ return ((const pair_ty*)LHS)->first.compare(((const pair_ty*)RHS)->first);
+}
+
+void TargetRegistry::printRegisteredTargetsForVersion() {
+ std::vector<std::pair<StringRef, const Target*> > Targets;
+ size_t Width = 0;
+ for (TargetRegistry::iterator I = TargetRegistry::begin(),
+ E = TargetRegistry::end();
+ I != E; ++I) {
+ Targets.push_back(std::make_pair(I->getName(), &*I));
+ Width = std::max(Width, Targets.back().first.size());
+ }
+ array_pod_sort(Targets.begin(), Targets.end(), TargetArraySortFn);
+
+ raw_ostream &OS = outs();
+ OS << " Registered Targets:\n";
+ for (unsigned i = 0, e = Targets.size(); i != e; ++i) {
+ OS << " " << Targets[i].first;
+ OS.indent(Width - Targets[i].first.size()) << " - "
+ << Targets[i].second->getShortDescription() << '\n';
+ }
+ if (Targets.empty())
+ OS << " (none)\n";
+}
diff --git a/lib/Support/ThreadLocal.cpp b/lib/Support/ThreadLocal.cpp
index 6b43048..fdb251c 100644
--- a/lib/Support/ThreadLocal.cpp
+++ b/lib/Support/ThreadLocal.cpp
@@ -79,6 +79,6 @@ void ThreadLocalImpl::removeInstance() {
#elif defined( LLVM_ON_WIN32)
#include "Windows/ThreadLocal.inc"
#else
-#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/ThreadLocal.cpp
+#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 set in Support/ThreadLocal.cpp
#endif
#endif
diff --git a/lib/Support/Threading.cpp b/lib/Support/Threading.cpp
index 2957956..8f0bb93 100644
--- a/lib/Support/Threading.cpp
+++ b/lib/Support/Threading.cpp
@@ -24,7 +24,7 @@ static bool multithreaded_mode = false;
static sys::Mutex* global_lock = 0;
bool llvm::llvm_start_multithreaded() {
-#ifdef LLVM_MULTITHREADED
+#if ENABLE_THREADS != 0
assert(!multithreaded_mode && "Already multithreaded!");
multithreaded_mode = true;
global_lock = new sys::Mutex(true);
@@ -39,7 +39,7 @@ bool llvm::llvm_start_multithreaded() {
}
void llvm::llvm_stop_multithreaded() {
-#ifdef LLVM_MULTITHREADED
+#if ENABLE_THREADS != 0
assert(multithreaded_mode && "Not currently multithreaded!");
// We fence here to insure that all threaded operations are complete BEFORE we
@@ -63,7 +63,7 @@ void llvm::llvm_release_global_lock() {
if (multithreaded_mode) global_lock->release();
}
-#if defined(LLVM_MULTITHREADED) && defined(HAVE_PTHREAD_H)
+#if ENABLE_THREADS != 0 && defined(HAVE_PTHREAD_H)
#include <pthread.h>
struct ThreadInfo {
@@ -102,13 +102,44 @@ void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData,
error:
::pthread_attr_destroy(&Attr);
}
+#elif ENABLE_THREADS!=0 && defined(LLVM_ON_WIN32)
+#include "Windows/Windows.h"
+#include <process.h>
-#else
+struct ThreadInfo {
+ void (*func)(void*);
+ void *param;
+};
-// No non-pthread implementation, currently.
+static unsigned __stdcall ThreadCallback(void *param) {
+ struct ThreadInfo *info = reinterpret_cast<struct ThreadInfo *>(param);
+ info->func(info->param);
+
+ return 0;
+}
void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData,
unsigned RequestedStackSize) {
+ struct ThreadInfo param = { Fn, UserData };
+
+ HANDLE hThread = (HANDLE)::_beginthreadex(NULL,
+ RequestedStackSize, ThreadCallback,
+ &param, 0, NULL);
+
+ if (hThread) {
+ // We actually don't care whether the wait succeeds or fails, in
+ // the same way we don't care whether the pthread_join call succeeds
+ // or fails. There's not much we could do if this were to fail. But
+ // on success, this call will wait until the thread finishes executing
+ // before returning.
+ (void)::WaitForSingleObject(hThread, INFINITE);
+ ::CloseHandle(hThread);
+ }
+}
+#else
+// Support for non-Win32, non-pthread implementation.
+void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData,
+ unsigned RequestedStackSize) {
(void) RequestedStackSize;
Fn(UserData);
}
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 7e094ee..c61af37 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -8,16 +8,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Triple.h"
-
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Twine.h"
-#include <cassert>
#include <cstring>
using namespace llvm;
-//
-
const char *Triple::getArchTypeName(ArchType Kind) {
switch (Kind) {
case InvalidArch: return "<invalid>";
@@ -29,6 +24,8 @@ const char *Triple::getArchTypeName(ArchType Kind) {
case cellspu: return "cellspu";
case mips: return "mips";
case mipsel: return "mipsel";
+ case mips64: return "mips64";
+ case mips64el:return "mips64el";
case msp430: return "msp430";
case ppc64: return "powerpc64";
case ppc: return "powerpc";
@@ -43,6 +40,8 @@ const char *Triple::getArchTypeName(ArchType Kind) {
case mblaze: return "mblaze";
case ptx32: return "ptx32";
case ptx64: return "ptx64";
+ case le32: return "le32";
+ case amdil: return "amdil";
}
return "<invalid>";
@@ -77,6 +76,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
case ptx32: return "ptx";
case ptx64: return "ptx";
+ case le32: return "le32";
+ case amdil: return "amdil";
}
}
@@ -102,6 +103,7 @@ const char *Triple::getOSTypeName(OSType Kind) {
case DragonFly: return "dragonfly";
case FreeBSD: return "freebsd";
case IOS: return "ios";
+ case KFreeBSD: return "kfreebsd";
case Linux: return "linux";
case Lv2: return "lv2";
case MacOSX: return "macosx";
@@ -114,6 +116,7 @@ const char *Triple::getOSTypeName(OSType Kind) {
case Haiku: return "haiku";
case Minix: return "minix";
case RTEMS: return "rtems";
+ case NativeClient: return "nacl";
}
return "<invalid>";
@@ -144,10 +147,16 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
return mips;
if (Name == "mipsel")
return mipsel;
+ if (Name == "mips64")
+ return mips64;
+ if (Name == "mips64el")
+ return mips64el;
if (Name == "msp430")
return msp430;
if (Name == "ppc64")
return ppc64;
+ if (Name == "ppc32")
+ return ppc;
if (Name == "ppc")
return ppc;
if (Name == "mblaze")
@@ -172,6 +181,10 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
return ptx32;
if (Name == "ptx64")
return ptx64;
+ if (Name == "le32")
+ return le32;
+ if (Name == "amdil")
+ return amdil;
return UnknownArch;
}
@@ -207,13 +220,16 @@ Triple::ArchType Triple::getArchTypeForDarwinArchName(StringRef Str) {
// This is derived from the driver driver.
if (Str == "arm" || Str == "armv4t" || Str == "armv5" || Str == "xscale" ||
- Str == "armv6" || Str == "armv7")
+ Str == "armv6" || Str == "armv7" || Str == "armv7f" || Str == "armv7k" ||
+ Str == "armv7s")
return Triple::arm;
if (Str == "ptx32")
return Triple::ptx32;
if (Str == "ptx64")
return Triple::ptx64;
+ if (Str == "amdil")
+ return Triple::amdil;
return Triple::UnknownArch;
}
@@ -249,6 +265,10 @@ const char *Triple::getArchNameForAssembler() {
return "ptx32";
if (Str == "ptx64")
return "ptx64";
+ if (Str == "le32")
+ return "le32";
+ if (Str == "amdil")
+ return "amdil";
return NULL;
}
@@ -288,6 +308,10 @@ Triple::ArchType Triple::ParseArch(StringRef ArchName) {
else if (ArchName == "mipsel" || ArchName == "mipsallegrexel" ||
ArchName == "psp")
return mipsel;
+ else if (ArchName == "mips64" || ArchName == "mips64eb")
+ return mips64;
+ else if (ArchName == "mips64el")
+ return mips64el;
else if (ArchName == "sparc")
return sparc;
else if (ArchName == "sparcv9")
@@ -302,6 +326,10 @@ Triple::ArchType Triple::ParseArch(StringRef ArchName) {
return ptx32;
else if (ArchName == "ptx64")
return ptx64;
+ else if (ArchName == "le32")
+ return le32;
+ else if (ArchName == "amdil")
+ return amdil;
else
return UnknownArch;
}
@@ -330,6 +358,8 @@ Triple::OSType Triple::ParseOS(StringRef OSName) {
return FreeBSD;
else if (OSName.startswith("ios"))
return IOS;
+ else if (OSName.startswith("kfreebsd"))
+ return KFreeBSD;
else if (OSName.startswith("linux"))
return Linux;
else if (OSName.startswith("lv2"))
@@ -354,6 +384,8 @@ Triple::OSType Triple::ParseOS(StringRef OSName) {
return Minix;
else if (OSName.startswith("rtems"))
return RTEMS;
+ else if (OSName.startswith("nacl"))
+ return NativeClient;
else
return UnknownOS;
}
diff --git a/lib/Support/Twine.cpp b/lib/Support/Twine.cpp
index d62123c..3d04bc3 100644
--- a/lib/Support/Twine.cpp
+++ b/lib/Support/Twine.cpp
@@ -16,7 +16,7 @@ using namespace llvm;
std::string Twine::str() const {
// If we're storing only a std::string, just return it.
if (LHSKind == StdStringKind && RHSKind == EmptyKind)
- return *static_cast<const std::string*>(LHS);
+ return *LHS.stdString;
// Otherwise, flatten and copy the contents first.
SmallString<256> Vec;
@@ -40,9 +40,9 @@ StringRef Twine::toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const {
switch (getLHSKind()) {
case CStringKind:
// Already null terminated, yay!
- return StringRef(static_cast<const char*>(LHS));
+ return StringRef(LHS.cString);
case StdStringKind: {
- const std::string *str = static_cast<const std::string*>(LHS);
+ const std::string *str = LHS.stdString;
return StringRef(str->c_str(), str->size());
}
default:
@@ -55,48 +55,51 @@ StringRef Twine::toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const {
return StringRef(Out.data(), Out.size());
}
-void Twine::printOneChild(raw_ostream &OS, const void *Ptr,
+void Twine::printOneChild(raw_ostream &OS, Child Ptr,
NodeKind Kind) const {
switch (Kind) {
case Twine::NullKind: break;
case Twine::EmptyKind: break;
case Twine::TwineKind:
- static_cast<const Twine*>(Ptr)->print(OS);
+ Ptr.twine->print(OS);
break;
case Twine::CStringKind:
- OS << static_cast<const char*>(Ptr);
+ OS << Ptr.cString;
break;
case Twine::StdStringKind:
- OS << *static_cast<const std::string*>(Ptr);
+ OS << *Ptr.stdString;
break;
case Twine::StringRefKind:
- OS << *static_cast<const StringRef*>(Ptr);
+ OS << *Ptr.stringRef;
+ break;
+ case Twine::CharKind:
+ OS << Ptr.character;
break;
case Twine::DecUIKind:
- OS << (unsigned)(uintptr_t)Ptr;
+ OS << Ptr.decUI;
break;
case Twine::DecIKind:
- OS << (int)(intptr_t)Ptr;
+ OS << Ptr.decI;
break;
case Twine::DecULKind:
- OS << *static_cast<const unsigned long*>(Ptr);
+ OS << *Ptr.decUL;
break;
case Twine::DecLKind:
- OS << *static_cast<const long*>(Ptr);
+ OS << *Ptr.decL;
break;
case Twine::DecULLKind:
- OS << *static_cast<const unsigned long long*>(Ptr);
+ OS << *Ptr.decULL;
break;
case Twine::DecLLKind:
- OS << *static_cast<const long long*>(Ptr);
+ OS << *Ptr.decLL;
break;
case Twine::UHexKind:
- OS.write_hex(*static_cast<const uint64_t*>(Ptr));
+ OS.write_hex(*Ptr.uHex);
break;
}
}
-void Twine::printOneChildRepr(raw_ostream &OS, const void *Ptr,
+void Twine::printOneChildRepr(raw_ostream &OS, Child Ptr,
NodeKind Kind) const {
switch (Kind) {
case Twine::NullKind:
@@ -105,40 +108,43 @@ void Twine::printOneChildRepr(raw_ostream &OS, const void *Ptr,
OS << "empty"; break;
case Twine::TwineKind:
OS << "rope:";
- static_cast<const Twine*>(Ptr)->printRepr(OS);
+ Ptr.twine->printRepr(OS);
break;
case Twine::CStringKind:
OS << "cstring:\""
- << static_cast<const char*>(Ptr) << "\"";
+ << Ptr.cString << "\"";
break;
case Twine::StdStringKind:
OS << "std::string:\""
- << static_cast<const std::string*>(Ptr) << "\"";
+ << Ptr.stdString << "\"";
break;
case Twine::StringRefKind:
OS << "stringref:\""
- << static_cast<const StringRef*>(Ptr) << "\"";
+ << Ptr.stringRef << "\"";
+ break;
+ case Twine::CharKind:
+ OS << "char:\"" << Ptr.character << "\"";
break;
case Twine::DecUIKind:
- OS << "decUI:\"" << (unsigned)(uintptr_t)Ptr << "\"";
+ OS << "decUI:\"" << Ptr.decUI << "\"";
break;
case Twine::DecIKind:
- OS << "decI:\"" << (int)(intptr_t)Ptr << "\"";
+ OS << "decI:\"" << Ptr.decI << "\"";
break;
case Twine::DecULKind:
- OS << "decUL:\"" << *static_cast<const unsigned long*>(Ptr) << "\"";
+ OS << "decUL:\"" << *Ptr.decUL << "\"";
break;
case Twine::DecLKind:
- OS << "decL:\"" << *static_cast<const long*>(Ptr) << "\"";
+ OS << "decL:\"" << *Ptr.decL << "\"";
break;
case Twine::DecULLKind:
- OS << "decULL:\"" << *static_cast<const unsigned long long*>(Ptr) << "\"";
+ OS << "decULL:\"" << *Ptr.decULL << "\"";
break;
case Twine::DecLLKind:
- OS << "decLL:\"" << *static_cast<const long long*>(Ptr) << "\"";
+ OS << "decLL:\"" << *Ptr.decLL << "\"";
break;
case Twine::UHexKind:
- OS << "uhex:\"" << static_cast<const uint64_t*>(Ptr) << "\"";
+ OS << "uhex:\"" << Ptr.uHex << "\"";
break;
}
}
diff --git a/lib/Support/Unix/Host.inc b/lib/Support/Unix/Host.inc
index 5fd0e5e..dda3ce2 100644
--- a/lib/Support/Unix/Host.inc
+++ b/lib/Support/Unix/Host.inc
@@ -22,6 +22,7 @@
#include <sys/utsname.h>
#include <cctype>
#include <string>
+#include <cstdlib> // ::getenv
using namespace llvm;
diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
index f295b92..85c7c40 100644
--- a/lib/Support/Unix/Path.inc
+++ b/lib/Support/Unix/Path.inc
@@ -252,8 +252,8 @@ Path::GetUserHomeDirectory() {
Path
Path::GetCurrentDirectory() {
char pathname[MAXPATHLEN];
- if (!getcwd(pathname,MAXPATHLEN)) {
- assert (false && "Could not query current working directory.");
+ if (!getcwd(pathname, MAXPATHLEN)) {
+ assert(false && "Could not query current working directory.");
return Path();
}
diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc
index 03ff283..bbbc344 100644
--- a/lib/Support/Unix/PathV2.inc
+++ b/lib/Support/Unix/PathV2.inc
@@ -42,6 +42,9 @@
#if HAVE_STDIO_H
#include <stdio.h>
#endif
+#if HAVE_LIMITS_H
+#include <limits.h>
+#endif
using namespace llvm;
@@ -342,19 +345,22 @@ error_code status(const Twine &path, file_status &result) {
}
error_code unique_file(const Twine &model, int &result_fd,
- SmallVectorImpl<char> &result_path) {
+ SmallVectorImpl<char> &result_path,
+ bool makeAbsolute) {
SmallString<128> Model;
model.toVector(Model);
// Null terminate.
Model.c_str();
- // Make model absolute by prepending a temp directory if it's not already.
- bool absolute = path::is_absolute(Twine(Model));
- if (!absolute) {
- SmallString<128> TDir;
- if (error_code ec = TempDir(TDir)) return ec;
- path::append(TDir, Twine(Model));
- Model.swap(TDir);
+ if (makeAbsolute) {
+ // Make model absolute by prepending a temp directory if it's not already.
+ bool absolute = path::is_absolute(Twine(Model));
+ if (!absolute) {
+ SmallString<128> TDir;
+ if (error_code ec = TempDir(TDir)) return ec;
+ path::append(TDir, Twine(Model));
+ Model.swap(TDir);
+ }
}
// Replace '%' with random chars. From here on, DO NOT modify model. It may be
diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc
index 5cdb11c..da440fd 100644
--- a/lib/Support/Unix/Process.inc
+++ b/lib/Support/Unix/Process.inc
@@ -293,3 +293,7 @@ const char *Process::OutputBold(bool bg) {
const char *Process::ResetColor() {
return "\033[0m";
}
+
+void Process::SetWorkingDirectory(std::string Path) {
+ ::chdir(Path.c_str());
+}
diff --git a/lib/Support/Windows/DynamicLibrary.inc b/lib/Support/Windows/DynamicLibrary.inc
index fc5f580..83da82a 100644
--- a/lib/Support/Windows/DynamicLibrary.inc
+++ b/lib/Support/Windows/DynamicLibrary.inc
@@ -39,7 +39,7 @@ using namespace sys;
//=== and must not be UNIX code.
//===----------------------------------------------------------------------===//
-static std::vector<HMODULE> OpenedHandles;
+static DenseSet<HMODULE> *OpenedHandles;
extern "C" {
@@ -63,30 +63,43 @@ extern "C" {
#endif
stricmp(ModuleName, "msvcrt20") != 0 &&
stricmp(ModuleName, "msvcrt40") != 0) {
- OpenedHandles.push_back((HMODULE)ModuleBase);
+ OpenedHandles->insert((HMODULE)ModuleBase);
}
return TRUE;
}
}
-bool DynamicLibrary::LoadLibraryPermanently(const char *filename,
- std::string *ErrMsg) {
- if (filename) {
- HMODULE a_handle = LoadLibrary(filename);
+DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
+ std::string *errMsg) {
+ SmartScopedLock<true> lock(getMutex());
- if (a_handle == 0)
- return MakeErrMsg(ErrMsg, std::string(filename) + ": Can't open : ");
+ if (!filename) {
+ // When no file is specified, enumerate all DLLs and EXEs in the process.
+ if (OpenedHandles == 0)
+ OpenedHandles = new DenseSet<HMODULE>();
- OpenedHandles.push_back(a_handle);
- } else {
- // When no file is specified, enumerate all DLLs and EXEs in the
- // process.
EnumerateLoadedModules(GetCurrentProcess(), ELM_Callback, 0);
+ // Dummy library that represents "search all handles".
+ // This is mostly to ensure that the return value still shows up as "valid".
+ return DynamicLibrary(&OpenedHandles);
}
+
+ HMODULE a_handle = LoadLibrary(filename);
- // Because we don't remember the handle, we will never free it; hence,
- // it is loaded permanently.
- return false;
+ if (a_handle == 0) {
+ MakeErrMsg(errMsg, std::string(filename) + ": Can't open : ");
+ return DynamicLibrary();
+ }
+
+ if (OpenedHandles == 0)
+ OpenedHandles = new DenseSet<HMODULE>();
+
+ // If we've already loaded this library, FreeLibrary() the handle in order to
+ // keep the internal refcount at +1.
+ if (!OpenedHandles->insert(a_handle).second)
+ FreeLibrary(a_handle);
+
+ return DynamicLibrary(a_handle);
}
// Stack probing routines are in the support library (e.g. libgcc), but we don't
@@ -101,21 +114,24 @@ bool DynamicLibrary::LoadLibraryPermanently(const char *filename,
#undef EXPLICIT_SYMBOL2
void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
+ SmartScopedLock<true> Lock(getMutex());
+
// First check symbols added via AddSymbol().
if (ExplicitSymbols) {
- std::map<std::string, void *>::iterator I =
- ExplicitSymbols->find(symbolName);
- std::map<std::string, void *>::iterator E = ExplicitSymbols->end();
- if (I != E)
- return I->second;
+ StringMap<void *>::iterator i = ExplicitSymbols->find(symbolName);
+
+ if (i != ExplicitSymbols->end())
+ return i->second;
}
// Now search the libraries.
- for (std::vector<HMODULE>::iterator I = OpenedHandles.begin(),
- E = OpenedHandles.end(); I != E; ++I) {
- FARPROC ptr = GetProcAddress((HMODULE)*I, symbolName);
- if (ptr) {
- return (void *)(intptr_t)ptr;
+ if (OpenedHandles) {
+ for (DenseSet<HMODULE>::iterator I = OpenedHandles->begin(),
+ E = OpenedHandles->end(); I != E; ++I) {
+ FARPROC ptr = GetProcAddress((HMODULE)*I, symbolName);
+ if (ptr) {
+ return (void *)(intptr_t)ptr;
+ }
}
}
@@ -134,4 +150,14 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
return 0;
}
+
+void *DynamicLibrary::getAddressOfSymbol(const char *symbolName) {
+ if (!isValid())
+ return NULL;
+ if (Data == &OpenedHandles)
+ return SearchForAddressOfSymbol(symbolName);
+ return (void *)(intptr_t)GetProcAddress((HMODULE)Data, symbolName);
+}
+
+
}
diff --git a/lib/Support/Windows/Memory.inc b/lib/Support/Windows/Memory.inc
index 9f69e73..fcc7283 100644
--- a/lib/Support/Windows/Memory.inc
+++ b/lib/Support/Windows/Memory.inc
@@ -32,11 +32,16 @@ MemoryBlock Memory::AllocateRWX(size_t NumBytes,
static const size_t pageSize = Process::GetPageSize();
size_t NumPages = (NumBytes+pageSize-1)/pageSize;
- //FIXME: support NearBlock if ever needed on Win64.
+ PVOID start = NearBlock ? static_cast<unsigned char *>(NearBlock->base()) +
+ NearBlock->size() : NULL;
- void *pa = VirtualAlloc(NULL, NumPages*pageSize, MEM_COMMIT,
+ void *pa = VirtualAlloc(start, NumPages*pageSize, MEM_RESERVE | MEM_COMMIT,
PAGE_EXECUTE_READWRITE);
if (pa == NULL) {
+ if (NearBlock) {
+ // Try again without the NearBlock hint
+ return AllocateRWX(NumBytes, NULL, ErrMsg);
+ }
MakeErrMsg(ErrMsg, "Can't allocate RWX Memory: ");
return MemoryBlock();
}
@@ -54,20 +59,62 @@ bool Memory::ReleaseRWX(MemoryBlock &M, std::string *ErrMsg) {
return false;
}
+static DWORD getProtection(const void *addr) {
+ MEMORY_BASIC_INFORMATION info;
+ if (sizeof(info) == ::VirtualQuery(addr, &info, sizeof(info))) {
+ return info.Protect;
+ }
+ return 0;
+}
+
bool Memory::setWritable(MemoryBlock &M, std::string *ErrMsg) {
+ if (!setRangeWritable(M.Address, M.Size)) {
+ return MakeErrMsg(ErrMsg, "Cannot set memory to writeable: ");
+ }
return true;
}
bool Memory::setExecutable(MemoryBlock &M, std::string *ErrMsg) {
- return false;
+ if (!setRangeExecutable(M.Address, M.Size)) {
+ return MakeErrMsg(ErrMsg, "Cannot set memory to executable: ");
+ }
+ return true;
}
bool Memory::setRangeWritable(const void *Addr, size_t Size) {
- return true;
+ DWORD prot = getProtection(Addr);
+ if (!prot)
+ return false;
+
+ if (prot == PAGE_EXECUTE || prot == PAGE_EXECUTE_READ) {
+ prot = PAGE_EXECUTE_READWRITE;
+ } else if (prot == PAGE_NOACCESS || prot == PAGE_READONLY) {
+ prot = PAGE_READWRITE;
+ }
+
+ DWORD oldProt;
+ sys::Memory::InvalidateInstructionCache(Addr, Size);
+ return ::VirtualProtect(const_cast<LPVOID>(Addr), Size, prot, &oldProt)
+ == TRUE;
}
bool Memory::setRangeExecutable(const void *Addr, size_t Size) {
- return false;
+ DWORD prot = getProtection(Addr);
+ if (!prot)
+ return false;
+
+ if (prot == PAGE_NOACCESS) {
+ prot = PAGE_EXECUTE;
+ } else if (prot == PAGE_READONLY) {
+ prot = PAGE_EXECUTE_READ;
+ } else if (prot == PAGE_READWRITE) {
+ prot = PAGE_EXECUTE_READWRITE;
+ }
+
+ DWORD oldProt;
+ sys::Memory::InvalidateInstructionCache(Addr, Size);
+ return ::VirtualProtect(const_cast<LPVOID>(Addr), Size, prot, &oldProt)
+ == TRUE;
}
}
diff --git a/lib/Support/Windows/PathV2.inc b/lib/Support/Windows/PathV2.inc
index af71b73..bc597b2 100644
--- a/lib/Support/Windows/PathV2.inc
+++ b/lib/Support/Windows/PathV2.inc
@@ -445,13 +445,35 @@ error_code file_size(const Twine &path, uint64_t &result) {
return success;
}
+static bool isReservedName(StringRef path) {
+ // This list of reserved names comes from MSDN, at:
+ // http://msdn.microsoft.com/en-us/library/aa365247%28v=vs.85%29.aspx
+ static const char *sReservedNames[] = { "nul", "con", "prn", "aux",
+ "com1", "com2", "com3", "com4", "com5", "com6",
+ "com7", "com8", "com9", "lpt1", "lpt2", "lpt3",
+ "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9" };
+
+ // First, check to see if this is a device namespace, which always
+ // starts with \\.\, since device namespaces are not legal file paths.
+ if (path.startswith("\\\\.\\"))
+ return true;
+
+ // Then compare against the list of ancient reserved names
+ for (size_t i = 0; i < sizeof(sReservedNames) / sizeof(const char *); ++i) {
+ if (path.equals_lower(sReservedNames[i]))
+ return true;
+ }
+
+ // The path isn't what we consider reserved.
+ return false;
+}
+
error_code status(const Twine &path, file_status &result) {
SmallString<128> path_storage;
SmallVector<wchar_t, 128> path_utf16;
StringRef path8 = path.toStringRef(path_storage);
- // FIXME: We should detect as many "special file name" as possible.
- if (path8.compare_lower("nul") == 0) {
+ if (isReservedName(path8)) {
result = file_status(file_type::character_file);
return success;
}
@@ -501,7 +523,8 @@ handle_status_error:
}
error_code unique_file(const Twine &model, int &result_fd,
- SmallVectorImpl<char> &result_path) {
+ SmallVectorImpl<char> &result_path,
+ bool makeAbsolute) {
// Use result_path as temp storage.
result_path.set_size(0);
StringRef m = model.toStringRef(result_path);
@@ -509,17 +532,19 @@ error_code unique_file(const Twine &model, int &result_fd,
SmallVector<wchar_t, 128> model_utf16;
if (error_code ec = UTF8ToUTF16(m, model_utf16)) return ec;
- // Make model absolute by prepending a temp directory if it's not already.
- bool absolute = path::is_absolute(m);
-
- if (!absolute) {
- SmallVector<wchar_t, 64> temp_dir;
- if (error_code ec = TempDir(temp_dir)) return ec;
- // Handle c: by removing it.
- if (model_utf16.size() > 2 && model_utf16[1] == L':') {
- model_utf16.erase(model_utf16.begin(), model_utf16.begin() + 2);
+ if (makeAbsolute) {
+ // Make model absolute by prepending a temp directory if it's not already.
+ bool absolute = path::is_absolute(m);
+
+ if (!absolute) {
+ SmallVector<wchar_t, 64> temp_dir;
+ if (error_code ec = TempDir(temp_dir)) return ec;
+ // Handle c: by removing it.
+ if (model_utf16.size() > 2 && model_utf16[1] == L':') {
+ model_utf16.erase(model_utf16.begin(), model_utf16.begin() + 2);
+ }
+ model_utf16.insert(model_utf16.begin(), temp_dir.begin(), temp_dir.end());
}
- model_utf16.insert(model_utf16.begin(), temp_dir.begin(), temp_dir.end());
}
// Replace '%' with random chars. From here on, DO NOT modify model. It may be
diff --git a/lib/Support/Windows/Process.inc b/lib/Support/Windows/Process.inc
index 06a7f00..fe54eb1 100644
--- a/lib/Support/Windows/Process.inc
+++ b/lib/Support/Windows/Process.inc
@@ -15,6 +15,7 @@
#include <psapi.h>
#include <malloc.h>
#include <io.h>
+#include <direct.h>
#ifdef __MINGW32__
#if (HAVE_LIBPSAPI != 1)
@@ -219,4 +220,8 @@ const char *Process::ResetColor() {
return 0;
}
+void Process::SetWorkingDirectory(std::string Path) {
+ ::_chdir(Path.c_str());
+}
+
}
diff --git a/lib/Support/Windows/RWMutex.inc b/lib/Support/Windows/RWMutex.inc
index 471f8fa..26b9bba 100644
--- a/lib/Support/Windows/RWMutex.inc
+++ b/lib/Support/Windows/RWMutex.inc
@@ -18,39 +18,115 @@
#include "Windows.h"
-// FIXME: Windows does not have reader-writer locks pre-Vista. If you want
-// real reader-writer locks, you a threads implementation for Windows.
-
namespace llvm {
using namespace sys;
+// Windows has slim read-writer lock support on Vista and higher, so we
+// will attempt to load the APIs. If they exist, we will use them, and
+// if not, we will fall back on critical sections. When we drop support
+// for XP, we can stop lazy-loading these APIs and just use them directly.
+#if defined(__MINGW32__)
+ // Taken from WinNT.h
+ typedef struct _RTL_SRWLOCK {
+ PVOID Ptr;
+ } RTL_SRWLOCK, *PRTL_SRWLOCK;
+
+ // Taken from WinBase.h
+ typedef RTL_SRWLOCK SRWLOCK, *PSRWLOCK;
+#endif
+
+static VOID (WINAPI *fpInitializeSRWLock)(PSRWLOCK lock) = NULL;
+static VOID (WINAPI *fpAcquireSRWLockExclusive)(PSRWLOCK lock) = NULL;
+static VOID (WINAPI *fpAcquireSRWLockShared)(PSRWLOCK lock) = NULL;
+static VOID (WINAPI *fpReleaseSRWLockExclusive)(PSRWLOCK lock) = NULL;
+static VOID (WINAPI *fpReleaseSRWLockShared)(PSRWLOCK lock) = NULL;
+
+static bool sHasSRW = false;
+
+static bool loadSRW() {
+ static bool sChecked = false;
+ if (!sChecked) {
+ sChecked = true;
+
+ HMODULE hLib = ::LoadLibrary(TEXT("Kernel32"));
+ if (hLib) {
+ fpInitializeSRWLock =
+ (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib,
+ "InitializeSRWLock");
+ fpAcquireSRWLockExclusive =
+ (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib,
+ "AcquireSRWLockExclusive");
+ fpAcquireSRWLockShared =
+ (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib,
+ "AcquireSRWLockShared");
+ fpReleaseSRWLockExclusive =
+ (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib,
+ "ReleaseSRWLockExclusive");
+ fpReleaseSRWLockShared =
+ (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib,
+ "ReleaseSRWLockShared");
+ ::FreeLibrary(hLib);
+
+ if (fpInitializeSRWLock != NULL) {
+ sHasSRW = true;
+ }
+ }
+ }
+ return sHasSRW;
+}
+
RWMutexImpl::RWMutexImpl() {
- data_ = calloc(1, sizeof(CRITICAL_SECTION));
- InitializeCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+ if (loadSRW()) {
+ data_ = calloc(1, sizeof(SRWLOCK));
+ fpInitializeSRWLock(static_cast<PSRWLOCK>(data_));
+ } else {
+ data_ = calloc(1, sizeof(CRITICAL_SECTION));
+ InitializeCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+ }
}
RWMutexImpl::~RWMutexImpl() {
- DeleteCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
- free(data_);
+ if (sHasSRW) {
+ // Nothing to do in the case of slim reader/writers
+ } else {
+ DeleteCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+ free(data_);
+ }
}
bool RWMutexImpl::reader_acquire() {
- EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+ if (sHasSRW) {
+ fpAcquireSRWLockShared(static_cast<PSRWLOCK>(data_));
+ } else {
+ EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+ }
return true;
}
bool RWMutexImpl::reader_release() {
- LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+ if (sHasSRW) {
+ fpReleaseSRWLockShared(static_cast<PSRWLOCK>(data_));
+ } else {
+ LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+ }
return true;
}
bool RWMutexImpl::writer_acquire() {
- EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+ if (sHasSRW) {
+ fpAcquireSRWLockExclusive(static_cast<PSRWLOCK>(data_));
+ } else {
+ EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+ }
return true;
}
bool RWMutexImpl::writer_release() {
- LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+ if (sHasSRW) {
+ fpReleaseSRWLockExclusive(static_cast<PSRWLOCK>(data_));
+ } else {
+ LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+ }
return true;
}
diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc
index 14f3f21..0d4b8a2 100644
--- a/lib/Support/Windows/Signals.inc
+++ b/lib/Support/Windows/Signals.inc
@@ -23,14 +23,133 @@
#endif
#include <psapi.h>
-#ifdef __MINGW32__
+#ifdef _MSC_VER
+ #pragma comment(lib, "psapi.lib")
+ #pragma comment(lib, "dbghelp.lib")
+#elif __MINGW32__
#if ((HAVE_LIBIMAGEHLP != 1) || (HAVE_LIBPSAPI != 1))
#error "libimagehlp.a & libpsapi.a should be present"
#endif
-#else
- #pragma comment(lib, "psapi.lib")
- #pragma comment(lib, "dbghelp.lib")
-#endif
+ // The version of g++ that comes with MinGW does *not* properly understand
+ // the ll format specifier for printf. However, MinGW passes the format
+ // specifiers on to the MSVCRT entirely, and the CRT understands the ll
+ // specifier. So these warnings are spurious in this case. Since we compile
+ // with -Wall, this will generate these warnings which should be ignored. So
+ // we will turn off the warnings for this just file. However, MinGW also does
+ // not support push and pop for diagnostics, so we have to manually turn it
+ // back on at the end of the file.
+ #pragma GCC diagnostic ignored "-Wformat"
+ #pragma GCC diagnostic ignored "-Wformat-extra-args"
+
+ #if !defined(__MINGW64_VERSION_MAJOR)
+ // MinGW.org does not have updated support for the 64-bit versions of the
+ // DebugHlp APIs. So we will have to load them manually. The structures and
+ // method signatures were pulled from DbgHelp.h in the Windows Platform SDK,
+ // and adjusted for brevity.
+ typedef struct _IMAGEHLP_LINE64 {
+ DWORD SizeOfStruct;
+ PVOID Key;
+ DWORD LineNumber;
+ PCHAR FileName;
+ DWORD64 Address;
+ } IMAGEHLP_LINE64, *PIMAGEHLP_LINE64;
+
+ typedef struct _IMAGEHLP_SYMBOL64 {
+ DWORD SizeOfStruct;
+ DWORD64 Address;
+ DWORD Size;
+ DWORD Flags;
+ DWORD MaxNameLength;
+ CHAR Name[1];
+ } IMAGEHLP_SYMBOL64, *PIMAGEHLP_SYMBOL64;
+
+ typedef struct _tagADDRESS64 {
+ DWORD64 Offset;
+ WORD Segment;
+ ADDRESS_MODE Mode;
+ } ADDRESS64, *LPADDRESS64;
+
+ typedef struct _KDHELP64 {
+ DWORD64 Thread;
+ DWORD ThCallbackStack;
+ DWORD ThCallbackBStore;
+ DWORD NextCallback;
+ DWORD FramePointer;
+ DWORD64 KiCallUserMode;
+ DWORD64 KeUserCallbackDispatcher;
+ DWORD64 SystemRangeStart;
+ DWORD64 KiUserExceptionDispatcher;
+ DWORD64 StackBase;
+ DWORD64 StackLimit;
+ DWORD64 Reserved[5];
+ } KDHELP64, *PKDHELP64;
+
+ typedef struct _tagSTACKFRAME64 {
+ ADDRESS64 AddrPC;
+ ADDRESS64 AddrReturn;
+ ADDRESS64 AddrFrame;
+ ADDRESS64 AddrStack;
+ ADDRESS64 AddrBStore;
+ PVOID FuncTableEntry;
+ DWORD64 Params[4];
+ BOOL Far;
+ BOOL Virtual;
+ DWORD64 Reserved[3];
+ KDHELP64 KdHelp;
+ } STACKFRAME64, *LPSTACKFRAME64;
+
+typedef BOOL (__stdcall *PREAD_PROCESS_MEMORY_ROUTINE64)(HANDLE hProcess,
+ DWORD64 qwBaseAddress, PVOID lpBuffer, DWORD nSize,
+ LPDWORD lpNumberOfBytesRead);
+
+typedef PVOID (__stdcall *PFUNCTION_TABLE_ACCESS_ROUTINE64)( HANDLE ahProcess,
+ DWORD64 AddrBase);
+
+typedef DWORD64 (__stdcall *PGET_MODULE_BASE_ROUTINE64)(HANDLE hProcess,
+ DWORD64 Address);
+
+typedef DWORD64 (__stdcall *PTRANSLATE_ADDRESS_ROUTINE64)(HANDLE hProcess,
+ HANDLE hThread, LPADDRESS64 lpaddr);
+
+typedef BOOL (WINAPI *fpStackWalk64)(DWORD, HANDLE, HANDLE, LPSTACKFRAME64,
+ PVOID, PREAD_PROCESS_MEMORY_ROUTINE64,
+ PFUNCTION_TABLE_ACCESS_ROUTINE64,
+ PGET_MODULE_BASE_ROUTINE64,
+ PTRANSLATE_ADDRESS_ROUTINE64);
+static fpStackWalk64 StackWalk64;
+
+typedef DWORD64 (WINAPI *fpSymGetModuleBase64)(HANDLE, DWORD64);
+static fpSymGetModuleBase64 SymGetModuleBase64;
+
+typedef BOOL (WINAPI *fpSymGetSymFromAddr64)(HANDLE, DWORD64,
+ PDWORD64, PIMAGEHLP_SYMBOL64);
+static fpSymGetSymFromAddr64 SymGetSymFromAddr64;
+
+typedef BOOL (WINAPI *fpSymGetLineFromAddr64)(HANDLE, DWORD64,
+ PDWORD, PIMAGEHLP_LINE64);
+static fpSymGetLineFromAddr64 SymGetLineFromAddr64;
+
+typedef PVOID (WINAPI *fpSymFunctionTableAccess64)(HANDLE, DWORD64);
+static fpSymFunctionTableAccess64 SymFunctionTableAccess64;
+
+static bool load64BitDebugHelp(void) {
+ HMODULE hLib = ::LoadLibrary("Dbghelp.dll");
+ if (hLib) {
+ StackWalk64 = (fpStackWalk64)
+ ::GetProcAddress(hLib, "StackWalk64");
+ SymGetModuleBase64 = (fpSymGetModuleBase64)
+ ::GetProcAddress(hLib, "SymGetModuleBase64");
+ SymGetSymFromAddr64 = (fpSymGetSymFromAddr64)
+ ::GetProcAddress(hLib, "SymGetSymFromAddr64");
+ SymGetLineFromAddr64 = (fpSymGetLineFromAddr64)
+ ::GetProcAddress(hLib, "SymGetLineFromAddr64");
+ SymFunctionTableAccess64 = (fpSymFunctionTableAccess64)
+ ::GetProcAddress(hLib, "SymFunctionTableAccess64");
+ }
+ return StackWalk64 != NULL;
+}
+ #endif // !defined(__MINGW64_VERSION_MAJOR)
+#endif // __MINGW32__
// Forward declare.
static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep);
@@ -90,6 +209,18 @@ static int CRTReportHook(int ReportType, char *Message, int *Return) {
#endif
static void RegisterHandler() {
+#if __MINGW32__ && !defined(__MINGW64_VERSION_MAJOR)
+ // On MinGW.org, we need to load up the symbols explicitly, because the
+ // Win32 framework they include does not have support for the 64-bit
+ // versions of the APIs we need. If we cannot load up the APIs (which
+ // would be unexpected as they should exist on every version of Windows
+ // we support), we will bail out since there would be nothing to report.
+ if (!load64BitDebugHelp()) {
+ assert(false && "These APIs should always be available");
+ return;
+ }
+#endif
+
if (RegisteredUnhandledExceptionFilter) {
EnterCriticalSection(&CriticalSection);
return;
@@ -213,20 +344,28 @@ void llvm::sys::RunInterruptHandlers() {
static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
Cleanup();
-#ifdef _WIN64
- // TODO: provide a x64 friendly version of the following
-#else
-
// Initialize the STACKFRAME structure.
- STACKFRAME StackFrame;
+ STACKFRAME64 StackFrame;
memset(&StackFrame, 0, sizeof(StackFrame));
+ DWORD machineType;
+#if defined(_M_X64)
+ machineType = IMAGE_FILE_MACHINE_AMD64;
+ StackFrame.AddrPC.Offset = ep->ContextRecord->Rip;
+ StackFrame.AddrPC.Mode = AddrModeFlat;
+ StackFrame.AddrStack.Offset = ep->ContextRecord->Rsp;
+ StackFrame.AddrStack.Mode = AddrModeFlat;
+ StackFrame.AddrFrame.Offset = ep->ContextRecord->Rbp;
+ StackFrame.AddrFrame.Mode = AddrModeFlat;
+#elif defined(_M_IX86)
+ machineType = IMAGE_FILE_MACHINE_I386;
StackFrame.AddrPC.Offset = ep->ContextRecord->Eip;
StackFrame.AddrPC.Mode = AddrModeFlat;
StackFrame.AddrStack.Offset = ep->ContextRecord->Esp;
StackFrame.AddrStack.Mode = AddrModeFlat;
StackFrame.AddrFrame.Offset = ep->ContextRecord->Ebp;
StackFrame.AddrFrame.Mode = AddrModeFlat;
+#endif
HANDLE hProcess = GetCurrentProcess();
HANDLE hThread = GetCurrentThread();
@@ -236,9 +375,9 @@ static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
SymInitialize(hProcess, NULL, TRUE);
while (true) {
- if (!StackWalk(IMAGE_FILE_MACHINE_I386, hProcess, hThread, &StackFrame,
- ep->ContextRecord, NULL, SymFunctionTableAccess,
- SymGetModuleBase, NULL)) {
+ if (!StackWalk64(machineType, hProcess, hThread, &StackFrame,
+ ep->ContextRecord, NULL, SymFunctionTableAccess64,
+ SymGetModuleBase64, NULL)) {
break;
}
@@ -246,54 +385,66 @@ static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
break;
// Print the PC in hexadecimal.
- DWORD PC = StackFrame.AddrPC.Offset;
- fprintf(stderr, "%08lX", PC);
+ DWORD64 PC = StackFrame.AddrPC.Offset;
+#if defined(_M_X64)
+ fprintf(stderr, "0x%016llX", PC);
+#elif defined(_M_IX86)
+ fprintf(stderr, "0x%08lX", static_cast<DWORD>(PC));
+#endif
// Print the parameters. Assume there are four.
+#if defined(_M_X64)
+ fprintf(stderr, " (0x%016llX 0x%016llX 0x%016llX 0x%016llX)",
+ StackFrame.Params[0],
+ StackFrame.Params[1],
+ StackFrame.Params[2],
+ StackFrame.Params[3]);
+#elif defined(_M_IX86)
fprintf(stderr, " (0x%08lX 0x%08lX 0x%08lX 0x%08lX)",
- StackFrame.Params[0],
- StackFrame.Params[1], StackFrame.Params[2], StackFrame.Params[3]);
-
+ static_cast<DWORD>(StackFrame.Params[0]),
+ static_cast<DWORD>(StackFrame.Params[1]),
+ static_cast<DWORD>(StackFrame.Params[2]),
+ static_cast<DWORD>(StackFrame.Params[3]));
+#endif
// Verify the PC belongs to a module in this process.
- if (!SymGetModuleBase(hProcess, PC)) {
+ if (!SymGetModuleBase64(hProcess, PC)) {
fputs(" <unknown module>\n", stderr);
continue;
}
// Print the symbol name.
char buffer[512];
- IMAGEHLP_SYMBOL *symbol = reinterpret_cast<IMAGEHLP_SYMBOL *>(buffer);
- memset(symbol, 0, sizeof(IMAGEHLP_SYMBOL));
- symbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL);
- symbol->MaxNameLength = 512 - sizeof(IMAGEHLP_SYMBOL);
+ IMAGEHLP_SYMBOL64 *symbol = reinterpret_cast<IMAGEHLP_SYMBOL64 *>(buffer);
+ memset(symbol, 0, sizeof(IMAGEHLP_SYMBOL64));
+ symbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL64);
+ symbol->MaxNameLength = 512 - sizeof(IMAGEHLP_SYMBOL64);
- DWORD dwDisp;
- if (!SymGetSymFromAddr(hProcess, PC, &dwDisp, symbol)) {
+ DWORD64 dwDisp;
+ if (!SymGetSymFromAddr64(hProcess, PC, &dwDisp, symbol)) {
fputc('\n', stderr);
continue;
}
buffer[511] = 0;
if (dwDisp > 0)
- fprintf(stderr, ", %s()+%04lu bytes(s)", symbol->Name, dwDisp);
+ fprintf(stderr, ", %s() + 0x%llX bytes(s)", symbol->Name, dwDisp);
else
fprintf(stderr, ", %s", symbol->Name);
// Print the source file and line number information.
- IMAGEHLP_LINE line;
+ IMAGEHLP_LINE64 line;
+ DWORD dwLineDisp;
memset(&line, 0, sizeof(line));
line.SizeOfStruct = sizeof(line);
- if (SymGetLineFromAddr(hProcess, PC, &dwDisp, &line)) {
+ if (SymGetLineFromAddr64(hProcess, PC, &dwLineDisp, &line)) {
fprintf(stderr, ", %s, line %lu", line.FileName, line.LineNumber);
- if (dwDisp > 0)
- fprintf(stderr, "+%04lu byte(s)", dwDisp);
+ if (dwLineDisp > 0)
+ fprintf(stderr, " + 0x%lX byte(s)", dwLineDisp);
}
fputc('\n', stderr);
}
-#endif
-
if (ExitOnUnhandledExceptions)
_exit(-3);
@@ -326,3 +477,12 @@ static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType) {
LeaveCriticalSection(&CriticalSection);
return FALSE;
}
+
+#if __MINGW32__
+ // We turned these warnings off for this file so that MinGW-g++ doesn't
+ // complain about the ll format specifiers used. Now we are turning the
+ // warnings back on. If MinGW starts to support diagnostic stacks, we can
+ // replace this with a pop.
+ #pragma GCC diagnostic warning "-Wformat"
+ #pragma GCC diagnostic warning "-Wformat-extra-args"
+#endif
diff --git a/lib/Support/Windows/Windows.h b/lib/Support/Windows/Windows.h
index 4a1553b..67b6f01 100644
--- a/lib/Support/Windows/Windows.h
+++ b/lib/Support/Windows/Windows.h
@@ -19,9 +19,9 @@
// mingw-w64 tends to define it as 0x0502 in its headers.
#undef _WIN32_WINNT
-// Require at least Windows 2000 API.
-#define _WIN32_WINNT 0x0500
-#define _WIN32_IE 0x0500 // MinGW at it again.
+// Require at least Windows XP(5.1) API.
+#define _WIN32_WINNT 0x0501
+#define _WIN32_IE 0x0600 // MinGW at it again.
#define WIN32_LEAN_AND_MEAN
#include "llvm/Config/config.h" // Get build system configuration settings
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 5a71fa3..4927e9a 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -84,7 +84,7 @@ void raw_ostream::SetBuffered() {
}
void raw_ostream::SetBufferAndMode(char *BufferStart, size_t Size,
- BufferKind Mode) {
+ BufferKind Mode) {
assert(((Mode == Unbuffered && BufferStart == 0 && Size == 0) ||
(Mode != Unbuffered && BufferStart && Size)) &&
"stream must be unbuffered or have at least one byte");
@@ -121,7 +121,8 @@ raw_ostream &raw_ostream::operator<<(unsigned long N) {
raw_ostream &raw_ostream::operator<<(long N) {
if (N < 0) {
*this << '-';
- N = -N;
+ // Avoid undefined behavior on LONG_MIN with a cast.
+ N = -(unsigned long)N;
}
return this->operator<<(static_cast<unsigned long>(N));
@@ -284,7 +285,7 @@ raw_ostream &raw_ostream::write(unsigned char C) {
raw_ostream &raw_ostream::write(const char *Ptr, size_t Size) {
// Group exceptional cases into a single branch.
- if (BUILTIN_EXPECT(OutBufCur+Size > OutBufEnd, false)) {
+ if (BUILTIN_EXPECT(size_t(OutBufEnd - OutBufCur) < Size, false)) {
if (BUILTIN_EXPECT(!OutBufStart, false)) {
if (BufferMode == Unbuffered) {
write_impl(Ptr, Size);
diff --git a/lib/TableGen/CMakeLists.txt b/lib/TableGen/CMakeLists.txt
new file mode 100644
index 0000000..0db4134
--- /dev/null
+++ b/lib/TableGen/CMakeLists.txt
@@ -0,0 +1,16 @@
+## FIXME: This only requires RTTI because tblgen uses it. Fix that.
+set(LLVM_REQUIRES_RTTI 1)
+set(LLVM_REQUIRES_EH 1)
+
+add_llvm_library(LLVMTableGen
+ Error.cpp
+ Main.cpp
+ Record.cpp
+ TableGenBackend.cpp
+ TGLexer.cpp
+ TGParser.cpp
+ )
+
+add_llvm_library_dependencies(LLVMTableGen
+ LLVMSupport
+ )
diff --git a/lib/TableGen/Error.cpp b/lib/TableGen/Error.cpp
new file mode 100644
index 0000000..5b2cbbf
--- /dev/null
+++ b/lib/TableGen/Error.cpp
@@ -0,0 +1,39 @@
+//===- Error.cpp - tblgen error handling helper routines --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains error handling helper routines to pretty-print diagnostic
+// messages from tblgen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TableGen/Error.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+SourceMgr SrcMgr;
+
+void PrintError(SMLoc ErrorLoc, const Twine &Msg) {
+ SrcMgr.PrintMessage(ErrorLoc, Msg, "error");
+}
+
+void PrintError(const char *Loc, const Twine &Msg) {
+ SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg, "error");
+}
+
+void PrintError(const Twine &Msg) {
+ errs() << "error:" << Msg << "\n";
+}
+
+void PrintError(const TGError &Error) {
+ PrintError(Error.getLoc(), Error.getMessage());
+}
+
+} // end namespace llvm
diff --git a/lib/TableGen/Main.cpp b/lib/TableGen/Main.cpp
new file mode 100644
index 0000000..01bc55e
--- /dev/null
+++ b/lib/TableGen/Main.cpp
@@ -0,0 +1,124 @@
+//===- Main.cpp - Top-Level TableGen implementation -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TableGen is a tool which can be used to build up a description of something,
+// then invoke one or more "tablegen backends" to emit information about the
+// description in some predefined format. In practice, this is used by the LLVM
+// code generators to automate generation of a code generator through a
+// high-level description of the target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "TGParser.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenAction.h"
+#include <algorithm>
+#include <cstdio>
+using namespace llvm;
+
+namespace {
+ cl::opt<std::string>
+ OutputFilename("o", cl::desc("Output filename"), cl::value_desc("filename"),
+ cl::init("-"));
+
+ cl::opt<std::string>
+ DependFilename("d", cl::desc("Dependency filename"), cl::value_desc("filename"),
+ cl::init(""));
+
+ cl::opt<std::string>
+ InputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-"));
+
+ cl::list<std::string>
+ IncludeDirs("I", cl::desc("Directory of include files"),
+ cl::value_desc("directory"), cl::Prefix);
+}
+
+namespace llvm {
+
+int TableGenMain(char *argv0, TableGenAction &Action) {
+ RecordKeeper Records;
+
+ try {
+ // Parse the input file.
+ OwningPtr<MemoryBuffer> File;
+ if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), File)) {
+ errs() << "Could not open input file '" << InputFilename << "': "
+ << ec.message() <<"\n";
+ return 1;
+ }
+ MemoryBuffer *F = File.take();
+
+ // Tell SrcMgr about this buffer, which is what TGParser will pick up.
+ SrcMgr.AddNewSourceBuffer(F, SMLoc());
+
+ // Record the location of the include directory so that the lexer can find
+ // it later.
+ SrcMgr.setIncludeDirs(IncludeDirs);
+
+ TGParser Parser(SrcMgr, Records);
+
+ if (Parser.ParseFile())
+ return 1;
+
+ std::string Error;
+ tool_output_file Out(OutputFilename.c_str(), Error);
+ if (!Error.empty()) {
+ errs() << argv0 << ": error opening " << OutputFilename
+ << ":" << Error << "\n";
+ return 1;
+ }
+ if (!DependFilename.empty()) {
+ if (OutputFilename == "-") {
+ errs() << argv0 << ": the option -d must be used together with -o\n";
+ return 1;
+ }
+ tool_output_file DepOut(DependFilename.c_str(), Error);
+ if (!Error.empty()) {
+ errs() << argv0 << ": error opening " << DependFilename
+ << ":" << Error << "\n";
+ return 1;
+ }
+ DepOut.os() << OutputFilename << ":";
+ const std::vector<std::string> &Dependencies = Parser.getDependencies();
+ for (std::vector<std::string>::const_iterator I = Dependencies.begin(),
+ E = Dependencies.end();
+ I != E; ++I) {
+ DepOut.os() << " " << (*I);
+ }
+ DepOut.os() << "\n";
+ DepOut.keep();
+ }
+
+ if (Action(Out.os(), Records))
+ return 1;
+
+ // Declare success.
+ Out.keep();
+ return 0;
+
+ } catch (const TGError &Error) {
+ PrintError(Error);
+ } catch (const std::string &Error) {
+ PrintError(Error);
+ } catch (const char *Error) {
+ PrintError(Error);
+ } catch (...) {
+ errs() << argv0 << ": Unknown unexpected exception occurred.\n";
+ }
+
+ return 1;
+}
+
+}
diff --git a/lib/TableGen/Makefile b/lib/TableGen/Makefile
new file mode 100644
index 0000000..4472438
--- /dev/null
+++ b/lib/TableGen/Makefile
@@ -0,0 +1,18 @@
+##===- lib/TableGen/Makefile -------------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMTableGen
+BUILD_ARCHIVE = 1
+
+## FIXME: This only requires RTTI because tblgen uses it. Fix that.
+REQUIRES_RTTI = 1
+REQUIRES_EH = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
new file mode 100644
index 0000000..b7c51ca
--- /dev/null
+++ b/lib/TableGen/Record.cpp
@@ -0,0 +1,2019 @@
+//===- Record.cpp - Record implementation ---------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implement the tablegen record classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// std::string wrapper for DenseMap purposes
+//===----------------------------------------------------------------------===//
+
+/// TableGenStringKey - This is a wrapper for std::string suitable for
+/// using as a key to a DenseMap. Because there isn't a particularly
+/// good way to indicate tombstone or empty keys for strings, we want
+/// to wrap std::string to indicate that this is a "special" string
+/// not expected to take on certain values (those of the tombstone and
+/// empty keys). This makes things a little safer as it clarifies
+/// that DenseMap is really not appropriate for general strings.
+
+class TableGenStringKey {
+public:
+ TableGenStringKey(const std::string &str) : data(str) {}
+ TableGenStringKey(const char *str) : data(str) {}
+
+ const std::string &str() const { return data; }
+
+private:
+ std::string data;
+};
+
+/// Specialize DenseMapInfo for TableGenStringKey.
+namespace llvm {
+
+template<> struct DenseMapInfo<TableGenStringKey> {
+ static inline TableGenStringKey getEmptyKey() {
+ TableGenStringKey Empty("<<<EMPTY KEY>>>");
+ return Empty;
+ }
+ static inline TableGenStringKey getTombstoneKey() {
+ TableGenStringKey Tombstone("<<<TOMBSTONE KEY>>>");
+ return Tombstone;
+ }
+ static unsigned getHashValue(const TableGenStringKey& Val) {
+ return HashString(Val.str());
+ }
+ static bool isEqual(const TableGenStringKey& LHS,
+ const TableGenStringKey& RHS) {
+ return LHS.str() == RHS.str();
+ }
+};
+
+}
+
+//===----------------------------------------------------------------------===//
+// Type implementations
+//===----------------------------------------------------------------------===//
+
+BitRecTy BitRecTy::Shared;
+IntRecTy IntRecTy::Shared;
+StringRecTy StringRecTy::Shared;
+CodeRecTy CodeRecTy::Shared;
+DagRecTy DagRecTy::Shared;
+
+void RecTy::dump() const { print(errs()); }
+
+ListRecTy *RecTy::getListTy() {
+ if (!ListTy)
+ ListTy = new ListRecTy(this);
+ return ListTy;
+}
+
+Init *BitRecTy::convertValue(BitsInit *BI) {
+ if (BI->getNumBits() != 1) return 0; // Only accept if just one bit!
+ return BI->getBit(0);
+}
+
+bool BitRecTy::baseClassOf(const BitsRecTy *RHS) const {
+ return RHS->getNumBits() == 1;
+}
+
+Init *BitRecTy::convertValue(IntInit *II) {
+ int64_t Val = II->getValue();
+ if (Val != 0 && Val != 1) return 0; // Only accept 0 or 1 for a bit!
+
+ return BitInit::get(Val != 0);
+}
+
+Init *BitRecTy::convertValue(TypedInit *VI) {
+ if (dynamic_cast<BitRecTy*>(VI->getType()))
+ return VI; // Accept variable if it is already of bit type!
+ return 0;
+}
+
+BitsRecTy *BitsRecTy::get(unsigned Sz) {
+ static std::vector<BitsRecTy*> Shared;
+ if (Sz >= Shared.size())
+ Shared.resize(Sz + 1);
+ BitsRecTy *&Ty = Shared[Sz];
+ if (!Ty)
+ Ty = new BitsRecTy(Sz);
+ return Ty;
+}
+
+std::string BitsRecTy::getAsString() const {
+ return "bits<" + utostr(Size) + ">";
+}
+
+Init *BitsRecTy::convertValue(UnsetInit *UI) {
+ SmallVector<Init *, 16> NewBits(Size);
+
+ for (unsigned i = 0; i != Size; ++i)
+ NewBits[i] = UnsetInit::get();
+
+ return BitsInit::get(NewBits);
+}
+
+Init *BitsRecTy::convertValue(BitInit *UI) {
+ if (Size != 1) return 0; // Can only convert single bit.
+ return BitsInit::get(UI);
+}
+
+/// canFitInBitfield - Return true if the number of bits is large enough to hold
+/// the integer value.
+static bool canFitInBitfield(int64_t Value, unsigned NumBits) {
+ // For example, with NumBits == 4, we permit Values from [-7 .. 15].
+ return (NumBits >= sizeof(Value) * 8) ||
+ (Value >> NumBits == 0) || (Value >> (NumBits-1) == -1);
+}
+
+/// convertValue from Int initializer to bits type: Split the integer up into the
+/// appropriate bits.
+///
+Init *BitsRecTy::convertValue(IntInit *II) {
+ int64_t Value = II->getValue();
+ // Make sure this bitfield is large enough to hold the integer value.
+ if (!canFitInBitfield(Value, Size))
+ return 0;
+
+ SmallVector<Init *, 16> NewBits(Size);
+
+ for (unsigned i = 0; i != Size; ++i)
+ NewBits[i] = BitInit::get(Value & (1LL << i));
+
+ return BitsInit::get(NewBits);
+}
+
+Init *BitsRecTy::convertValue(BitsInit *BI) {
+ // If the number of bits is right, return it. Otherwise we need to expand or
+ // truncate.
+ if (BI->getNumBits() == Size) return BI;
+ return 0;
+}
+
+Init *BitsRecTy::convertValue(TypedInit *VI) {
+ if (BitsRecTy *BRT = dynamic_cast<BitsRecTy*>(VI->getType()))
+ if (BRT->Size == Size) {
+ SmallVector<Init *, 16> NewBits(Size);
+
+ for (unsigned i = 0; i != Size; ++i)
+ NewBits[i] = VarBitInit::get(VI, i);
+ return BitsInit::get(NewBits);
+ }
+
+ if (Size == 1 && dynamic_cast<BitRecTy*>(VI->getType()))
+ return BitsInit::get(VI);
+
+ if (TernOpInit *Tern = dynamic_cast<TernOpInit*>(VI)) {
+ if (Tern->getOpcode() == TernOpInit::IF) {
+ Init *LHS = Tern->getLHS();
+ Init *MHS = Tern->getMHS();
+ Init *RHS = Tern->getRHS();
+
+ IntInit *MHSi = dynamic_cast<IntInit*>(MHS);
+ IntInit *RHSi = dynamic_cast<IntInit*>(RHS);
+
+ if (MHSi && RHSi) {
+ int64_t MHSVal = MHSi->getValue();
+ int64_t RHSVal = RHSi->getValue();
+
+ if (canFitInBitfield(MHSVal, Size) && canFitInBitfield(RHSVal, Size)) {
+ SmallVector<Init *, 16> NewBits(Size);
+
+ for (unsigned i = 0; i != Size; ++i)
+ NewBits[i] =
+ TernOpInit::get(TernOpInit::IF, LHS,
+ IntInit::get((MHSVal & (1LL << i)) ? 1 : 0),
+ IntInit::get((RHSVal & (1LL << i)) ? 1 : 0),
+ VI->getType());
+
+ return BitsInit::get(NewBits);
+ }
+ } else {
+ BitsInit *MHSbs = dynamic_cast<BitsInit*>(MHS);
+ BitsInit *RHSbs = dynamic_cast<BitsInit*>(RHS);
+
+ if (MHSbs && RHSbs) {
+ SmallVector<Init *, 16> NewBits(Size);
+
+ for (unsigned i = 0; i != Size; ++i)
+ NewBits[i] = TernOpInit::get(TernOpInit::IF, LHS,
+ MHSbs->getBit(i),
+ RHSbs->getBit(i),
+ VI->getType());
+
+ return BitsInit::get(NewBits);
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+Init *IntRecTy::convertValue(BitInit *BI) {
+ return IntInit::get(BI->getValue());
+}
+
+Init *IntRecTy::convertValue(BitsInit *BI) {
+ int64_t Result = 0;
+ for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i)
+ if (BitInit *Bit = dynamic_cast<BitInit*>(BI->getBit(i))) {
+ Result |= Bit->getValue() << i;
+ } else {
+ return 0;
+ }
+ return IntInit::get(Result);
+}
+
+Init *IntRecTy::convertValue(TypedInit *TI) {
+ if (TI->getType()->typeIsConvertibleTo(this))
+ return TI; // Accept variable if already of the right type!
+ return 0;
+}
+
+Init *StringRecTy::convertValue(UnOpInit *BO) {
+ if (BO->getOpcode() == UnOpInit::CAST) {
+ Init *L = BO->getOperand()->convertInitializerTo(this);
+ if (L == 0) return 0;
+ if (L != BO->getOperand())
+ return UnOpInit::get(UnOpInit::CAST, L, new StringRecTy);
+ return BO;
+ }
+
+ return convertValue((TypedInit*)BO);
+}
+
+Init *StringRecTy::convertValue(BinOpInit *BO) {
+ if (BO->getOpcode() == BinOpInit::STRCONCAT) {
+ Init *L = BO->getLHS()->convertInitializerTo(this);
+ Init *R = BO->getRHS()->convertInitializerTo(this);
+ if (L == 0 || R == 0) return 0;
+ if (L != BO->getLHS() || R != BO->getRHS())
+ return BinOpInit::get(BinOpInit::STRCONCAT, L, R, new StringRecTy);
+ return BO;
+ }
+
+ return convertValue((TypedInit*)BO);
+}
+
+
+Init *StringRecTy::convertValue(TypedInit *TI) {
+ if (dynamic_cast<StringRecTy*>(TI->getType()))
+ return TI; // Accept variable if already of the right type!
+ return 0;
+}
+
+std::string ListRecTy::getAsString() const {
+ return "list<" + Ty->getAsString() + ">";
+}
+
+Init *ListRecTy::convertValue(ListInit *LI) {
+ std::vector<Init*> Elements;
+
+ // Verify that all of the elements of the list are subclasses of the
+ // appropriate class!
+ for (unsigned i = 0, e = LI->getSize(); i != e; ++i)
+ if (Init *CI = LI->getElement(i)->convertInitializerTo(Ty))
+ Elements.push_back(CI);
+ else
+ return 0;
+
+ ListRecTy *LType = dynamic_cast<ListRecTy*>(LI->getType());
+ if (LType == 0) {
+ return 0;
+ }
+
+ return ListInit::get(Elements, this);
+}
+
+Init *ListRecTy::convertValue(TypedInit *TI) {
+ // Ensure that TI is compatible with our class.
+ if (ListRecTy *LRT = dynamic_cast<ListRecTy*>(TI->getType()))
+ if (LRT->getElementType()->typeIsConvertibleTo(getElementType()))
+ return TI;
+ return 0;
+}
+
+Init *CodeRecTy::convertValue(TypedInit *TI) {
+ if (TI->getType()->typeIsConvertibleTo(this))
+ return TI;
+ return 0;
+}
+
+Init *DagRecTy::convertValue(TypedInit *TI) {
+ if (TI->getType()->typeIsConvertibleTo(this))
+ return TI;
+ return 0;
+}
+
+Init *DagRecTy::convertValue(UnOpInit *BO) {
+ if (BO->getOpcode() == UnOpInit::CAST) {
+ Init *L = BO->getOperand()->convertInitializerTo(this);
+ if (L == 0) return 0;
+ if (L != BO->getOperand())
+ return UnOpInit::get(UnOpInit::CAST, L, new DagRecTy);
+ return BO;
+ }
+ return 0;
+}
+
+Init *DagRecTy::convertValue(BinOpInit *BO) {
+ if (BO->getOpcode() == BinOpInit::CONCAT) {
+ Init *L = BO->getLHS()->convertInitializerTo(this);
+ Init *R = BO->getRHS()->convertInitializerTo(this);
+ if (L == 0 || R == 0) return 0;
+ if (L != BO->getLHS() || R != BO->getRHS())
+ return BinOpInit::get(BinOpInit::CONCAT, L, R, new DagRecTy);
+ return BO;
+ }
+ return 0;
+}
+
+RecordRecTy *RecordRecTy::get(Record *R) {
+ return &dynamic_cast<RecordRecTy&>(*R->getDefInit()->getType());
+}
+
+std::string RecordRecTy::getAsString() const {
+ return Rec->getName();
+}
+
+Init *RecordRecTy::convertValue(DefInit *DI) {
+ // Ensure that DI is a subclass of Rec.
+ if (!DI->getDef()->isSubClassOf(Rec))
+ return 0;
+ return DI;
+}
+
+Init *RecordRecTy::convertValue(TypedInit *TI) {
+ // Ensure that TI is compatible with Rec.
+ if (RecordRecTy *RRT = dynamic_cast<RecordRecTy*>(TI->getType()))
+ if (RRT->getRecord()->isSubClassOf(getRecord()) ||
+ RRT->getRecord() == getRecord())
+ return TI;
+ return 0;
+}
+
+bool RecordRecTy::baseClassOf(const RecordRecTy *RHS) const {
+ if (Rec == RHS->getRecord() || RHS->getRecord()->isSubClassOf(Rec))
+ return true;
+
+ const std::vector<Record*> &SC = Rec->getSuperClasses();
+ for (unsigned i = 0, e = SC.size(); i != e; ++i)
+ if (RHS->getRecord()->isSubClassOf(SC[i]))
+ return true;
+
+ return false;
+}
+
+
+/// resolveTypes - Find a common type that T1 and T2 convert to.
+/// Return 0 if no such type exists.
+///
+RecTy *llvm::resolveTypes(RecTy *T1, RecTy *T2) {
+ if (!T1->typeIsConvertibleTo(T2)) {
+ if (!T2->typeIsConvertibleTo(T1)) {
+ // If one is a Record type, check superclasses
+ RecordRecTy *RecTy1 = dynamic_cast<RecordRecTy*>(T1);
+ if (RecTy1) {
+ // See if T2 inherits from a type T1 also inherits from
+ const std::vector<Record *> &T1SuperClasses =
+ RecTy1->getRecord()->getSuperClasses();
+ for(std::vector<Record *>::const_iterator i = T1SuperClasses.begin(),
+ iend = T1SuperClasses.end();
+ i != iend;
+ ++i) {
+ RecordRecTy *SuperRecTy1 = RecordRecTy::get(*i);
+ RecTy *NewType1 = resolveTypes(SuperRecTy1, T2);
+ if (NewType1 != 0) {
+ if (NewType1 != SuperRecTy1) {
+ delete SuperRecTy1;
+ }
+ return NewType1;
+ }
+ }
+ }
+ RecordRecTy *RecTy2 = dynamic_cast<RecordRecTy*>(T2);
+ if (RecTy2) {
+ // See if T1 inherits from a type T2 also inherits from
+ const std::vector<Record *> &T2SuperClasses =
+ RecTy2->getRecord()->getSuperClasses();
+ for (std::vector<Record *>::const_iterator i = T2SuperClasses.begin(),
+ iend = T2SuperClasses.end();
+ i != iend;
+ ++i) {
+ RecordRecTy *SuperRecTy2 = RecordRecTy::get(*i);
+ RecTy *NewType2 = resolveTypes(T1, SuperRecTy2);
+ if (NewType2 != 0) {
+ if (NewType2 != SuperRecTy2) {
+ delete SuperRecTy2;
+ }
+ return NewType2;
+ }
+ }
+ }
+ return 0;
+ }
+ return T2;
+ }
+ return T1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Initializer implementations
+//===----------------------------------------------------------------------===//
+
+void Init::dump() const { return print(errs()); }
+
+UnsetInit *UnsetInit::get() {
+ static UnsetInit TheInit;
+ return &TheInit;
+}
+
+BitInit *BitInit::get(bool V) {
+ static BitInit True(true);
+ static BitInit False(false);
+
+ return V ? &True : &False;
+}
+
+static void
+ProfileBitsInit(FoldingSetNodeID &ID, ArrayRef<Init *> Range) {
+ ID.AddInteger(Range.size());
+
+ for (ArrayRef<Init *>::iterator i = Range.begin(),
+ iend = Range.end();
+ i != iend;
+ ++i)
+ ID.AddPointer(*i);
+}
+
+BitsInit *BitsInit::get(ArrayRef<Init *> Range) {
+ typedef FoldingSet<BitsInit> Pool;
+ static Pool ThePool;
+
+ FoldingSetNodeID ID;
+ ProfileBitsInit(ID, Range);
+
+ void *IP = 0;
+ if (BitsInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
+ return I;
+
+ BitsInit *I = new BitsInit(Range);
+ ThePool.InsertNode(I, IP);
+
+ return I;
+}
+
+void BitsInit::Profile(FoldingSetNodeID &ID) const {
+ ProfileBitsInit(ID, Bits);
+}
+
+Init *
+BitsInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) const {
+ SmallVector<Init *, 16> NewBits(Bits.size());
+
+ for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
+ if (Bits[i] >= getNumBits())
+ return 0;
+ NewBits[i] = getBit(Bits[i]);
+ }
+ return BitsInit::get(NewBits);
+}
+
+std::string BitsInit::getAsString() const {
+ std::string Result = "{ ";
+ for (unsigned i = 0, e = getNumBits(); i != e; ++i) {
+ if (i) Result += ", ";
+ if (Init *Bit = getBit(e-i-1))
+ Result += Bit->getAsString();
+ else
+ Result += "*";
+ }
+ return Result + " }";
+}
+
+// resolveReferences - If there are any field references that refer to fields
+// that have been filled in, we can propagate the values now.
+//
+Init *BitsInit::resolveReferences(Record &R, const RecordVal *RV) const {
+ bool Changed = false;
+ SmallVector<Init *, 16> NewBits(getNumBits());
+
+ for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
+ Init *B;
+ Init *CurBit = getBit(i);
+
+ do {
+ B = CurBit;
+ CurBit = CurBit->resolveReferences(R, RV);
+ Changed |= B != CurBit;
+ } while (B != CurBit);
+ NewBits[i] = CurBit;
+ }
+
+ if (Changed)
+ return BitsInit::get(NewBits);
+
+ return const_cast<BitsInit *>(this);
+}
+
+IntInit *IntInit::get(int64_t V) {
+ typedef DenseMap<int64_t, IntInit *> Pool;
+ static Pool ThePool;
+
+ IntInit *&I = ThePool[V];
+ if (!I) I = new IntInit(V);
+ return I;
+}
+
+std::string IntInit::getAsString() const {
+ return itostr(Value);
+}
+
+Init *
+IntInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) const {
+ SmallVector<Init *, 16> NewBits(Bits.size());
+
+ for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
+ if (Bits[i] >= 64)
+ return 0;
+
+ NewBits[i] = BitInit::get(Value & (INT64_C(1) << Bits[i]));
+ }
+ return BitsInit::get(NewBits);
+}
+
+StringInit *StringInit::get(const std::string &V) {
+ typedef StringMap<StringInit *> Pool;
+ static Pool ThePool;
+
+ StringInit *&I = ThePool[V];
+ if (!I) I = new StringInit(V);
+ return I;
+}
+
+CodeInit *CodeInit::get(const std::string &V) {
+ typedef StringMap<CodeInit *> Pool;
+ static Pool ThePool;
+
+ CodeInit *&I = ThePool[V];
+ if (!I) I = new CodeInit(V);
+ return I;
+}
+
+static void ProfileListInit(FoldingSetNodeID &ID,
+ ArrayRef<Init *> Range,
+ RecTy *EltTy) {
+ ID.AddInteger(Range.size());
+ ID.AddPointer(EltTy);
+
+ for (ArrayRef<Init *>::iterator i = Range.begin(),
+ iend = Range.end();
+ i != iend;
+ ++i)
+ ID.AddPointer(*i);
+}
+
+ListInit *ListInit::get(ArrayRef<Init *> Range, RecTy *EltTy) {
+ typedef FoldingSet<ListInit> Pool;
+ static Pool ThePool;
+
+ // Just use the FoldingSetNodeID to compute a hash. Use a DenseMap
+ // for actual storage.
+ FoldingSetNodeID ID;
+ ProfileListInit(ID, Range, EltTy);
+
+ void *IP = 0;
+ if (ListInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
+ return I;
+
+ ListInit *I = new ListInit(Range, EltTy);
+ ThePool.InsertNode(I, IP);
+ return I;
+}
+
+void ListInit::Profile(FoldingSetNodeID &ID) const {
+ ListRecTy *ListType = dynamic_cast<ListRecTy *>(getType());
+ assert(ListType && "Bad type for ListInit!");
+ RecTy *EltTy = ListType->getElementType();
+
+ ProfileListInit(ID, Values, EltTy);
+}
+
+Init *
+ListInit::convertInitListSlice(const std::vector<unsigned> &Elements) const {
+ std::vector<Init*> Vals;
+ for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
+ if (Elements[i] >= getSize())
+ return 0;
+ Vals.push_back(getElement(Elements[i]));
+ }
+ return ListInit::get(Vals, getType());
+}
+
+Record *ListInit::getElementAsRecord(unsigned i) const {
+ assert(i < Values.size() && "List element index out of range!");
+ DefInit *DI = dynamic_cast<DefInit*>(Values[i]);
+ if (DI == 0) throw "Expected record in list!";
+ return DI->getDef();
+}
+
+Init *ListInit::resolveReferences(Record &R, const RecordVal *RV) const {
+ std::vector<Init*> Resolved;
+ Resolved.reserve(getSize());
+ bool Changed = false;
+
+ for (unsigned i = 0, e = getSize(); i != e; ++i) {
+ Init *E;
+ Init *CurElt = getElement(i);
+
+ do {
+ E = CurElt;
+ CurElt = CurElt->resolveReferences(R, RV);
+ Changed |= E != CurElt;
+ } while (E != CurElt);
+ Resolved.push_back(E);
+ }
+
+ if (Changed)
+ return ListInit::get(Resolved, getType());
+ return const_cast<ListInit *>(this);
+}
+
+Init *ListInit::resolveListElementReference(Record &R, const RecordVal *IRV,
+ unsigned Elt) const {
+ if (Elt >= getSize())
+ return 0; // Out of range reference.
+ Init *E = getElement(Elt);
+ // If the element is set to some value, or if we are resolving a reference
+ // to a specific variable and that variable is explicitly unset, then
+ // replace the VarListElementInit with it.
+ if (IRV || !dynamic_cast<UnsetInit*>(E))
+ return E;
+ return 0;
+}
+
+std::string ListInit::getAsString() const {
+ std::string Result = "[";
+ for (unsigned i = 0, e = Values.size(); i != e; ++i) {
+ if (i) Result += ", ";
+ Result += Values[i]->getAsString();
+ }
+ return Result + "]";
+}
+
+Init *OpInit::resolveBitReference(Record &R, const RecordVal *IRV,
+ unsigned Bit) const {
+ Init *Folded = Fold(&R, 0);
+
+ if (Folded != this) {
+ TypedInit *Typed = dynamic_cast<TypedInit *>(Folded);
+ if (Typed) {
+ return Typed->resolveBitReference(R, IRV, Bit);
+ }
+ }
+
+ return 0;
+}
+
+Init *OpInit::resolveListElementReference(Record &R, const RecordVal *IRV,
+ unsigned Elt) const {
+ Init *Resolved = resolveReferences(R, IRV);
+ OpInit *OResolved = dynamic_cast<OpInit *>(Resolved);
+ if (OResolved) {
+ Resolved = OResolved->Fold(&R, 0);
+ }
+
+ if (Resolved != this) {
+ TypedInit *Typed = dynamic_cast<TypedInit *>(Resolved);
+ assert(Typed && "Expected typed init for list reference");
+ if (Typed) {
+ Init *New = Typed->resolveListElementReference(R, IRV, Elt);
+ if (New)
+ return New;
+ return VarListElementInit::get(Typed, Elt);
+ }
+ }
+
+ return 0;
+}
+
+UnOpInit *UnOpInit::get(UnaryOp opc, Init *lhs, RecTy *Type) {
+ typedef std::pair<std::pair<unsigned, Init *>, RecTy *> Key;
+
+ typedef DenseMap<Key, UnOpInit *> Pool;
+ static Pool ThePool;
+
+ Key TheKey(std::make_pair(std::make_pair(opc, lhs), Type));
+
+ UnOpInit *&I = ThePool[TheKey];
+ if (!I) I = new UnOpInit(opc, lhs, Type);
+ return I;
+}
+
+Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
+ switch (getOpcode()) {
+ default: assert(0 && "Unknown unop");
+ case CAST: {
+ if (getType()->getAsString() == "string") {
+ StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
+ if (LHSs) {
+ return LHSs;
+ }
+
+ DefInit *LHSd = dynamic_cast<DefInit*>(LHS);
+ if (LHSd) {
+ return StringInit::get(LHSd->getDef()->getName());
+ }
+ } else {
+ StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
+ if (LHSs) {
+ std::string Name = LHSs->getValue();
+
+ // From TGParser::ParseIDValue
+ if (CurRec) {
+ if (const RecordVal *RV = CurRec->getValue(Name)) {
+ if (RV->getType() != getType())
+ throw "type mismatch in cast";
+ return VarInit::get(Name, RV->getType());
+ }
+
+ std::string TemplateArgName = CurRec->getName()+":"+Name;
+ if (CurRec->isTemplateArg(TemplateArgName)) {
+ const RecordVal *RV = CurRec->getValue(TemplateArgName);
+ assert(RV && "Template arg doesn't exist??");
+
+ if (RV->getType() != getType())
+ throw "type mismatch in cast";
+
+ return VarInit::get(TemplateArgName, RV->getType());
+ }
+ }
+
+ if (CurMultiClass) {
+ std::string MCName = CurMultiClass->Rec.getName()+"::"+Name;
+ if (CurMultiClass->Rec.isTemplateArg(MCName)) {
+ const RecordVal *RV = CurMultiClass->Rec.getValue(MCName);
+ assert(RV && "Template arg doesn't exist??");
+
+ if (RV->getType() != getType())
+ throw "type mismatch in cast";
+
+ return VarInit::get(MCName, RV->getType());
+ }
+ }
+
+ if (Record *D = (CurRec->getRecords()).getDef(Name))
+ return DefInit::get(D);
+
+ throw TGError(CurRec->getLoc(), "Undefined reference:'" + Name + "'\n");
+ }
+ }
+ break;
+ }
+ case HEAD: {
+ ListInit *LHSl = dynamic_cast<ListInit*>(LHS);
+ if (LHSl) {
+ if (LHSl->getSize() == 0) {
+ assert(0 && "Empty list in car");
+ return 0;
+ }
+ return LHSl->getElement(0);
+ }
+ break;
+ }
+ case TAIL: {
+ ListInit *LHSl = dynamic_cast<ListInit*>(LHS);
+ if (LHSl) {
+ if (LHSl->getSize() == 0) {
+ assert(0 && "Empty list in cdr");
+ return 0;
+ }
+ // Note the +1. We can't just pass the result of getValues()
+ // directly.
+ ArrayRef<Init *>::iterator begin = LHSl->getValues().begin()+1;
+ ArrayRef<Init *>::iterator end = LHSl->getValues().end();
+ ListInit *Result =
+ ListInit::get(ArrayRef<Init *>(begin, end - begin),
+ LHSl->getType());
+ return Result;
+ }
+ break;
+ }
+ case EMPTY: {
+ ListInit *LHSl = dynamic_cast<ListInit*>(LHS);
+ if (LHSl) {
+ if (LHSl->getSize() == 0) {
+ return IntInit::get(1);
+ } else {
+ return IntInit::get(0);
+ }
+ }
+ StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
+ if (LHSs) {
+ if (LHSs->getValue().empty()) {
+ return IntInit::get(1);
+ } else {
+ return IntInit::get(0);
+ }
+ }
+
+ break;
+ }
+ }
+ return const_cast<UnOpInit *>(this);
+}
+
+Init *UnOpInit::resolveReferences(Record &R, const RecordVal *RV) const {
+ Init *lhs = LHS->resolveReferences(R, RV);
+
+ if (LHS != lhs)
+ return (UnOpInit::get(getOpcode(), lhs, getType()))->Fold(&R, 0);
+ return Fold(&R, 0);
+}
+
+std::string UnOpInit::getAsString() const {
+ std::string Result;
+ switch (Opc) {
+ case CAST: Result = "!cast<" + getType()->getAsString() + ">"; break;
+ case HEAD: Result = "!head"; break;
+ case TAIL: Result = "!tail"; break;
+ case EMPTY: Result = "!empty"; break;
+ }
+ return Result + "(" + LHS->getAsString() + ")";
+}
+
+BinOpInit *BinOpInit::get(BinaryOp opc, Init *lhs,
+ Init *rhs, RecTy *Type) {
+ typedef std::pair<
+ std::pair<std::pair<unsigned, Init *>, Init *>,
+ RecTy *
+ > Key;
+
+ typedef DenseMap<Key, BinOpInit *> Pool;
+ static Pool ThePool;
+
+ Key TheKey(std::make_pair(std::make_pair(std::make_pair(opc, lhs), rhs),
+ Type));
+
+ BinOpInit *&I = ThePool[TheKey];
+ if (!I) I = new BinOpInit(opc, lhs, rhs, Type);
+ return I;
+}
+
+Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
+ switch (getOpcode()) {
+ default: assert(0 && "Unknown binop");
+ case CONCAT: {
+ DagInit *LHSs = dynamic_cast<DagInit*>(LHS);
+ DagInit *RHSs = dynamic_cast<DagInit*>(RHS);
+ if (LHSs && RHSs) {
+ DefInit *LOp = dynamic_cast<DefInit*>(LHSs->getOperator());
+ DefInit *ROp = dynamic_cast<DefInit*>(RHSs->getOperator());
+ if (LOp == 0 || ROp == 0 || LOp->getDef() != ROp->getDef())
+ throw "Concated Dag operators do not match!";
+ std::vector<Init*> Args;
+ std::vector<std::string> ArgNames;
+ for (unsigned i = 0, e = LHSs->getNumArgs(); i != e; ++i) {
+ Args.push_back(LHSs->getArg(i));
+ ArgNames.push_back(LHSs->getArgName(i));
+ }
+ for (unsigned i = 0, e = RHSs->getNumArgs(); i != e; ++i) {
+ Args.push_back(RHSs->getArg(i));
+ ArgNames.push_back(RHSs->getArgName(i));
+ }
+ return DagInit::get(LHSs->getOperator(), "", Args, ArgNames);
+ }
+ break;
+ }
+ case STRCONCAT: {
+ StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
+ StringInit *RHSs = dynamic_cast<StringInit*>(RHS);
+ if (LHSs && RHSs)
+ return StringInit::get(LHSs->getValue() + RHSs->getValue());
+ break;
+ }
+ case EQ: {
+ // try to fold eq comparison for 'bit' and 'int', otherwise fallback
+ // to string objects.
+ IntInit* L =
+ dynamic_cast<IntInit*>(LHS->convertInitializerTo(IntRecTy::get()));
+ IntInit* R =
+ dynamic_cast<IntInit*>(RHS->convertInitializerTo(IntRecTy::get()));
+
+ if (L && R)
+ return IntInit::get(L->getValue() == R->getValue());
+
+ StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
+ StringInit *RHSs = dynamic_cast<StringInit*>(RHS);
+
+ // Make sure we've resolved
+ if (LHSs && RHSs)
+ return IntInit::get(LHSs->getValue() == RHSs->getValue());
+
+ break;
+ }
+ case SHL:
+ case SRA:
+ case SRL: {
+ IntInit *LHSi = dynamic_cast<IntInit*>(LHS);
+ IntInit *RHSi = dynamic_cast<IntInit*>(RHS);
+ if (LHSi && RHSi) {
+ int64_t LHSv = LHSi->getValue(), RHSv = RHSi->getValue();
+ int64_t Result;
+ switch (getOpcode()) {
+ default: assert(0 && "Bad opcode!");
+ case SHL: Result = LHSv << RHSv; break;
+ case SRA: Result = LHSv >> RHSv; break;
+ case SRL: Result = (uint64_t)LHSv >> (uint64_t)RHSv; break;
+ }
+ return IntInit::get(Result);
+ }
+ break;
+ }
+ }
+ return const_cast<BinOpInit *>(this);
+}
+
+Init *BinOpInit::resolveReferences(Record &R, const RecordVal *RV) const {
+ Init *lhs = LHS->resolveReferences(R, RV);
+ Init *rhs = RHS->resolveReferences(R, RV);
+
+ if (LHS != lhs || RHS != rhs)
+ return (BinOpInit::get(getOpcode(), lhs, rhs, getType()))->Fold(&R, 0);
+ return Fold(&R, 0);
+}
+
+std::string BinOpInit::getAsString() const {
+ std::string Result;
+ switch (Opc) {
+ case CONCAT: Result = "!con"; break;
+ case SHL: Result = "!shl"; break;
+ case SRA: Result = "!sra"; break;
+ case SRL: Result = "!srl"; break;
+ case EQ: Result = "!eq"; break;
+ case STRCONCAT: Result = "!strconcat"; break;
+ }
+ return Result + "(" + LHS->getAsString() + ", " + RHS->getAsString() + ")";
+}
+
+TernOpInit *TernOpInit::get(TernaryOp opc, Init *lhs,
+ Init *mhs, Init *rhs,
+ RecTy *Type) {
+ typedef std::pair<
+ std::pair<
+ std::pair<std::pair<unsigned, RecTy *>, Init *>,
+ Init *
+ >,
+ Init *
+ > Key;
+
+ typedef DenseMap<Key, TernOpInit *> Pool;
+ static Pool ThePool;
+
+ Key TheKey(std::make_pair(std::make_pair(std::make_pair(std::make_pair(opc,
+ Type),
+ lhs),
+ mhs),
+ rhs));
+
+ TernOpInit *&I = ThePool[TheKey];
+ if (!I) I = new TernOpInit(opc, lhs, mhs, rhs, Type);
+ return I;
+}
+
+static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
+ Record *CurRec, MultiClass *CurMultiClass);
+
+static Init *EvaluateOperation(OpInit *RHSo, Init *LHS, Init *Arg,
+ RecTy *Type, Record *CurRec,
+ MultiClass *CurMultiClass) {
+ std::vector<Init *> NewOperands;
+
+ TypedInit *TArg = dynamic_cast<TypedInit*>(Arg);
+
+ // If this is a dag, recurse
+ if (TArg && TArg->getType()->getAsString() == "dag") {
+ Init *Result = ForeachHelper(LHS, Arg, RHSo, Type,
+ CurRec, CurMultiClass);
+ if (Result != 0) {
+ return Result;
+ } else {
+ return 0;
+ }
+ }
+
+ for (int i = 0; i < RHSo->getNumOperands(); ++i) {
+ OpInit *RHSoo = dynamic_cast<OpInit*>(RHSo->getOperand(i));
+
+ if (RHSoo) {
+ Init *Result = EvaluateOperation(RHSoo, LHS, Arg,
+ Type, CurRec, CurMultiClass);
+ if (Result != 0) {
+ NewOperands.push_back(Result);
+ } else {
+ NewOperands.push_back(Arg);
+ }
+ } else if (LHS->getAsString() == RHSo->getOperand(i)->getAsString()) {
+ NewOperands.push_back(Arg);
+ } else {
+ NewOperands.push_back(RHSo->getOperand(i));
+ }
+ }
+
+ // Now run the operator and use its result as the new leaf
+ const OpInit *NewOp = RHSo->clone(NewOperands);
+ Init *NewVal = NewOp->Fold(CurRec, CurMultiClass);
+ if (NewVal != NewOp)
+ return NewVal;
+
+ return 0;
+}
+
+static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
+ Record *CurRec, MultiClass *CurMultiClass) {
+ DagInit *MHSd = dynamic_cast<DagInit*>(MHS);
+ ListInit *MHSl = dynamic_cast<ListInit*>(MHS);
+
+ DagRecTy *DagType = dynamic_cast<DagRecTy*>(Type);
+ ListRecTy *ListType = dynamic_cast<ListRecTy*>(Type);
+
+ OpInit *RHSo = dynamic_cast<OpInit*>(RHS);
+
+ if (!RHSo) {
+ throw TGError(CurRec->getLoc(), "!foreach requires an operator\n");
+ }
+
+ TypedInit *LHSt = dynamic_cast<TypedInit*>(LHS);
+
+ if (!LHSt) {
+ throw TGError(CurRec->getLoc(), "!foreach requires typed variable\n");
+ }
+
+ if ((MHSd && DagType) || (MHSl && ListType)) {
+ if (MHSd) {
+ Init *Val = MHSd->getOperator();
+ Init *Result = EvaluateOperation(RHSo, LHS, Val,
+ Type, CurRec, CurMultiClass);
+ if (Result != 0) {
+ Val = Result;
+ }
+
+ std::vector<std::pair<Init *, std::string> > args;
+ for (unsigned int i = 0; i < MHSd->getNumArgs(); ++i) {
+ Init *Arg;
+ std::string ArgName;
+ Arg = MHSd->getArg(i);
+ ArgName = MHSd->getArgName(i);
+
+ // Process args
+ Init *Result = EvaluateOperation(RHSo, LHS, Arg, Type,
+ CurRec, CurMultiClass);
+ if (Result != 0) {
+ Arg = Result;
+ }
+
+ // TODO: Process arg names
+ args.push_back(std::make_pair(Arg, ArgName));
+ }
+
+ return DagInit::get(Val, "", args);
+ }
+ if (MHSl) {
+ std::vector<Init *> NewOperands;
+ std::vector<Init *> NewList(MHSl->begin(), MHSl->end());
+
+ for (std::vector<Init *>::iterator li = NewList.begin(),
+ liend = NewList.end();
+ li != liend;
+ ++li) {
+ Init *Item = *li;
+ NewOperands.clear();
+ for(int i = 0; i < RHSo->getNumOperands(); ++i) {
+ // First, replace the foreach variable with the list item
+ if (LHS->getAsString() == RHSo->getOperand(i)->getAsString()) {
+ NewOperands.push_back(Item);
+ } else {
+ NewOperands.push_back(RHSo->getOperand(i));
+ }
+ }
+
+ // Now run the operator and use its result as the new list item
+ const OpInit *NewOp = RHSo->clone(NewOperands);
+ Init *NewItem = NewOp->Fold(CurRec, CurMultiClass);
+ if (NewItem != NewOp)
+ *li = NewItem;
+ }
+ return ListInit::get(NewList, MHSl->getType());
+ }
+ }
+ return 0;
+}
+
+Init *TernOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
+ switch (getOpcode()) {
+ default: assert(0 && "Unknown binop");
+ case SUBST: {
+ DefInit *LHSd = dynamic_cast<DefInit*>(LHS);
+ VarInit *LHSv = dynamic_cast<VarInit*>(LHS);
+ StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
+
+ DefInit *MHSd = dynamic_cast<DefInit*>(MHS);
+ VarInit *MHSv = dynamic_cast<VarInit*>(MHS);
+ StringInit *MHSs = dynamic_cast<StringInit*>(MHS);
+
+ DefInit *RHSd = dynamic_cast<DefInit*>(RHS);
+ VarInit *RHSv = dynamic_cast<VarInit*>(RHS);
+ StringInit *RHSs = dynamic_cast<StringInit*>(RHS);
+
+ if ((LHSd && MHSd && RHSd)
+ || (LHSv && MHSv && RHSv)
+ || (LHSs && MHSs && RHSs)) {
+ if (RHSd) {
+ Record *Val = RHSd->getDef();
+ if (LHSd->getAsString() == RHSd->getAsString()) {
+ Val = MHSd->getDef();
+ }
+ return DefInit::get(Val);
+ }
+ if (RHSv) {
+ std::string Val = RHSv->getName();
+ if (LHSv->getAsString() == RHSv->getAsString()) {
+ Val = MHSv->getName();
+ }
+ return VarInit::get(Val, getType());
+ }
+ if (RHSs) {
+ std::string Val = RHSs->getValue();
+
+ std::string::size_type found;
+ std::string::size_type idx = 0;
+ do {
+ found = Val.find(LHSs->getValue(), idx);
+ if (found != std::string::npos) {
+ Val.replace(found, LHSs->getValue().size(), MHSs->getValue());
+ }
+ idx = found + MHSs->getValue().size();
+ } while (found != std::string::npos);
+
+ return StringInit::get(Val);
+ }
+ }
+ break;
+ }
+
+ case FOREACH: {
+ Init *Result = ForeachHelper(LHS, MHS, RHS, getType(),
+ CurRec, CurMultiClass);
+ if (Result != 0) {
+ return Result;
+ }
+ break;
+ }
+
+ case IF: {
+ IntInit *LHSi = dynamic_cast<IntInit*>(LHS);
+ if (Init *I = LHS->convertInitializerTo(IntRecTy::get()))
+ LHSi = dynamic_cast<IntInit*>(I);
+ if (LHSi) {
+ if (LHSi->getValue()) {
+ return MHS;
+ } else {
+ return RHS;
+ }
+ }
+ break;
+ }
+ }
+
+ return const_cast<TernOpInit *>(this);
+}
+
+Init *TernOpInit::resolveReferences(Record &R,
+ const RecordVal *RV) const {
+ Init *lhs = LHS->resolveReferences(R, RV);
+
+ if (Opc == IF && lhs != LHS) {
+ IntInit *Value = dynamic_cast<IntInit*>(lhs);
+ if (Init *I = lhs->convertInitializerTo(IntRecTy::get()))
+ Value = dynamic_cast<IntInit*>(I);
+ if (Value != 0) {
+ // Short-circuit
+ if (Value->getValue()) {
+ Init *mhs = MHS->resolveReferences(R, RV);
+ return (TernOpInit::get(getOpcode(), lhs, mhs,
+ RHS, getType()))->Fold(&R, 0);
+ } else {
+ Init *rhs = RHS->resolveReferences(R, RV);
+ return (TernOpInit::get(getOpcode(), lhs, MHS,
+ rhs, getType()))->Fold(&R, 0);
+ }
+ }
+ }
+
+ Init *mhs = MHS->resolveReferences(R, RV);
+ Init *rhs = RHS->resolveReferences(R, RV);
+
+ if (LHS != lhs || MHS != mhs || RHS != rhs)
+ return (TernOpInit::get(getOpcode(), lhs, mhs, rhs,
+ getType()))->Fold(&R, 0);
+ return Fold(&R, 0);
+}
+
+std::string TernOpInit::getAsString() const {
+ std::string Result;
+ switch (Opc) {
+ case SUBST: Result = "!subst"; break;
+ case FOREACH: Result = "!foreach"; break;
+ case IF: Result = "!if"; break;
+ }
+ return Result + "(" + LHS->getAsString() + ", " + MHS->getAsString() + ", "
+ + RHS->getAsString() + ")";
+}
+
+RecTy *TypedInit::getFieldType(const std::string &FieldName) const {
+ RecordRecTy *RecordType = dynamic_cast<RecordRecTy *>(getType());
+ if (RecordType) {
+ RecordVal *Field = RecordType->getRecord()->getValue(FieldName);
+ if (Field) {
+ return Field->getType();
+ }
+ }
+ return 0;
+}
+
+Init *
+TypedInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) const {
+ BitsRecTy *T = dynamic_cast<BitsRecTy*>(getType());
+ if (T == 0) return 0; // Cannot subscript a non-bits variable.
+ unsigned NumBits = T->getNumBits();
+
+ SmallVector<Init *, 16> NewBits(Bits.size());
+ for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
+ if (Bits[i] >= NumBits)
+ return 0;
+
+ NewBits[i] = VarBitInit::get(const_cast<TypedInit *>(this), Bits[i]);
+ }
+ return BitsInit::get(NewBits);
+}
+
+Init *
+TypedInit::convertInitListSlice(const std::vector<unsigned> &Elements) const {
+ ListRecTy *T = dynamic_cast<ListRecTy*>(getType());
+ if (T == 0) return 0; // Cannot subscript a non-list variable.
+
+ if (Elements.size() == 1)
+ return VarListElementInit::get(const_cast<TypedInit *>(this), Elements[0]);
+
+ std::vector<Init*> ListInits;
+ ListInits.reserve(Elements.size());
+ for (unsigned i = 0, e = Elements.size(); i != e; ++i)
+ ListInits.push_back(VarListElementInit::get(const_cast<TypedInit *>(this),
+ Elements[i]));
+ return ListInit::get(ListInits, T);
+}
+
+
+VarInit *VarInit::get(const std::string &VN, RecTy *T) {
+ typedef std::pair<RecTy *, TableGenStringKey> Key;
+ typedef DenseMap<Key, VarInit *> Pool;
+ static Pool ThePool;
+
+ Key TheKey(std::make_pair(T, VN));
+
+ VarInit *&I = ThePool[TheKey];
+ if (!I) I = new VarInit(VN, T);
+ return I;
+}
+
+Init *VarInit::resolveBitReference(Record &R, const RecordVal *IRV,
+ unsigned Bit) const {
+ if (R.isTemplateArg(getName())) return 0;
+ if (IRV && IRV->getName() != getName()) return 0;
+
+ RecordVal *RV = R.getValue(getName());
+ assert(RV && "Reference to a non-existent variable?");
+ assert(dynamic_cast<BitsInit*>(RV->getValue()));
+ BitsInit *BI = (BitsInit*)RV->getValue();
+
+ assert(Bit < BI->getNumBits() && "Bit reference out of range!");
+ Init *B = BI->getBit(Bit);
+
+ // If the bit is set to some value, or if we are resolving a reference to a
+ // specific variable and that variable is explicitly unset, then replace the
+ // VarBitInit with it.
+ if (IRV || !dynamic_cast<UnsetInit*>(B))
+ return B;
+ return 0;
+}
+
+Init *VarInit::resolveListElementReference(Record &R,
+ const RecordVal *IRV,
+ unsigned Elt) const {
+ if (R.isTemplateArg(getName())) return 0;
+ if (IRV && IRV->getName() != getName()) return 0;
+
+ RecordVal *RV = R.getValue(getName());
+ assert(RV && "Reference to a non-existent variable?");
+ ListInit *LI = dynamic_cast<ListInit*>(RV->getValue());
+ if (!LI) {
+ TypedInit *VI = dynamic_cast<TypedInit*>(RV->getValue());
+ assert(VI && "Invalid list element!");
+ return VarListElementInit::get(VI, Elt);
+ }
+
+ if (Elt >= LI->getSize())
+ return 0; // Out of range reference.
+ Init *E = LI->getElement(Elt);
+ // If the element is set to some value, or if we are resolving a reference
+ // to a specific variable and that variable is explicitly unset, then
+ // replace the VarListElementInit with it.
+ if (IRV || !dynamic_cast<UnsetInit*>(E))
+ return E;
+ return 0;
+}
+
+
+RecTy *VarInit::getFieldType(const std::string &FieldName) const {
+ if (RecordRecTy *RTy = dynamic_cast<RecordRecTy*>(getType()))
+ if (const RecordVal *RV = RTy->getRecord()->getValue(FieldName))
+ return RV->getType();
+ return 0;
+}
+
+Init *VarInit::getFieldInit(Record &R, const RecordVal *RV,
+ const std::string &FieldName) const {
+ if (dynamic_cast<RecordRecTy*>(getType()))
+ if (const RecordVal *Val = R.getValue(VarName)) {
+ if (RV != Val && (RV || dynamic_cast<UnsetInit*>(Val->getValue())))
+ return 0;
+ Init *TheInit = Val->getValue();
+ assert(TheInit != this && "Infinite loop detected!");
+ if (Init *I = TheInit->getFieldInit(R, RV, FieldName))
+ return I;
+ else
+ return 0;
+ }
+ return 0;
+}
+
+/// resolveReferences - This method is used by classes that refer to other
+/// variables which may not be defined at the time the expression is formed.
+/// If a value is set for the variable later, this method will be called on
+/// users of the value to allow the value to propagate out.
+///
+Init *VarInit::resolveReferences(Record &R, const RecordVal *RV) const {
+ if (RecordVal *Val = R.getValue(VarName))
+ if (RV == Val || (RV == 0 && !dynamic_cast<UnsetInit*>(Val->getValue())))
+ return Val->getValue();
+ return const_cast<VarInit *>(this);
+}
+
+VarBitInit *VarBitInit::get(TypedInit *T, unsigned B) {
+ typedef std::pair<TypedInit *, unsigned> Key;
+ typedef DenseMap<Key, VarBitInit *> Pool;
+
+ static Pool ThePool;
+
+ Key TheKey(std::make_pair(T, B));
+
+ VarBitInit *&I = ThePool[TheKey];
+ if (!I) I = new VarBitInit(T, B);
+ return I;
+}
+
+std::string VarBitInit::getAsString() const {
+ return TI->getAsString() + "{" + utostr(Bit) + "}";
+}
+
+Init *VarBitInit::resolveReferences(Record &R, const RecordVal *RV) const {
+ if (Init *I = getVariable()->resolveBitReference(R, RV, getBitNum()))
+ return I;
+ return const_cast<VarBitInit *>(this);
+}
+
+VarListElementInit *VarListElementInit::get(TypedInit *T,
+ unsigned E) {
+ typedef std::pair<TypedInit *, unsigned> Key;
+ typedef DenseMap<Key, VarListElementInit *> Pool;
+
+ static Pool ThePool;
+
+ Key TheKey(std::make_pair(T, E));
+
+ VarListElementInit *&I = ThePool[TheKey];
+ if (!I) I = new VarListElementInit(T, E);
+ return I;
+}
+
+std::string VarListElementInit::getAsString() const {
+ return TI->getAsString() + "[" + utostr(Element) + "]";
+}
+
+Init *
+VarListElementInit::resolveReferences(Record &R, const RecordVal *RV) const {
+ if (Init *I = getVariable()->resolveListElementReference(R, RV,
+ getElementNum()))
+ return I;
+ return const_cast<VarListElementInit *>(this);
+}
+
+Init *VarListElementInit::resolveBitReference(Record &R, const RecordVal *RV,
+ unsigned Bit) const {
+ // FIXME: This should be implemented, to support references like:
+ // bit B = AA[0]{1};
+ return 0;
+}
+
+Init *VarListElementInit:: resolveListElementReference(Record &R,
+ const RecordVal *RV,
+ unsigned Elt) const {
+ Init *Result = TI->resolveListElementReference(R, RV, Element);
+
+ if (Result) {
+ TypedInit *TInit = dynamic_cast<TypedInit *>(Result);
+ if (TInit) {
+ Init *Result2 = TInit->resolveListElementReference(R, RV, Elt);
+ if (Result2) return Result2;
+ return new VarListElementInit(TInit, Elt);
+ }
+ return Result;
+ }
+
+ return 0;
+}
+
+DefInit *DefInit::get(Record *R) {
+ return R->getDefInit();
+}
+
+RecTy *DefInit::getFieldType(const std::string &FieldName) const {
+ if (const RecordVal *RV = Def->getValue(FieldName))
+ return RV->getType();
+ return 0;
+}
+
+Init *DefInit::getFieldInit(Record &R, const RecordVal *RV,
+ const std::string &FieldName) const {
+ return Def->getValue(FieldName)->getValue();
+}
+
+
+std::string DefInit::getAsString() const {
+ return Def->getName();
+}
+
+FieldInit *FieldInit::get(Init *R, const std::string &FN) {
+ typedef std::pair<Init *, TableGenStringKey> Key;
+ typedef DenseMap<Key, FieldInit *> Pool;
+ static Pool ThePool;
+
+ Key TheKey(std::make_pair(R, FN));
+
+ FieldInit *&I = ThePool[TheKey];
+ if (!I) I = new FieldInit(R, FN);
+ return I;
+}
+
+Init *FieldInit::resolveBitReference(Record &R, const RecordVal *RV,
+ unsigned Bit) const {
+ if (Init *BitsVal = Rec->getFieldInit(R, RV, FieldName))
+ if (BitsInit *BI = dynamic_cast<BitsInit*>(BitsVal)) {
+ assert(Bit < BI->getNumBits() && "Bit reference out of range!");
+ Init *B = BI->getBit(Bit);
+
+ if (dynamic_cast<BitInit*>(B)) // If the bit is set.
+ return B; // Replace the VarBitInit with it.
+ }
+ return 0;
+}
+
+Init *FieldInit::resolveListElementReference(Record &R, const RecordVal *RV,
+ unsigned Elt) const {
+ if (Init *ListVal = Rec->getFieldInit(R, RV, FieldName))
+ if (ListInit *LI = dynamic_cast<ListInit*>(ListVal)) {
+ if (Elt >= LI->getSize()) return 0;
+ Init *E = LI->getElement(Elt);
+
+ // If the element is set to some value, or if we are resolving a
+ // reference to a specific variable and that variable is explicitly
+ // unset, then replace the VarListElementInit with it.
+ if (RV || !dynamic_cast<UnsetInit*>(E))
+ return E;
+ }
+ return 0;
+}
+
+Init *FieldInit::resolveReferences(Record &R, const RecordVal *RV) const {
+ Init *NewRec = RV ? Rec->resolveReferences(R, RV) : Rec;
+
+ Init *BitsVal = NewRec->getFieldInit(R, RV, FieldName);
+ if (BitsVal) {
+ Init *BVR = BitsVal->resolveReferences(R, RV);
+ return BVR->isComplete() ? BVR : const_cast<FieldInit *>(this);
+ }
+
+ if (NewRec != Rec) {
+ return FieldInit::get(NewRec, FieldName);
+ }
+ return const_cast<FieldInit *>(this);
+}
+
+void ProfileDagInit(FoldingSetNodeID &ID,
+ Init *V,
+ const std::string &VN,
+ ArrayRef<Init *> ArgRange,
+ ArrayRef<std::string> NameRange) {
+ ID.AddPointer(V);
+ ID.AddString(VN);
+
+ ArrayRef<Init *>::iterator Arg = ArgRange.begin();
+ ArrayRef<std::string>::iterator Name = NameRange.begin();
+ while (Arg != ArgRange.end()) {
+ assert(Name != NameRange.end() && "Arg name underflow!");
+ ID.AddPointer(*Arg++);
+ ID.AddString(*Name++);
+ }
+ assert(Name == NameRange.end() && "Arg name overflow!");
+}
+
+DagInit *
+DagInit::get(Init *V, const std::string &VN,
+ ArrayRef<Init *> ArgRange,
+ ArrayRef<std::string> NameRange) {
+ typedef FoldingSet<DagInit> Pool;
+ static Pool ThePool;
+
+ FoldingSetNodeID ID;
+ ProfileDagInit(ID, V, VN, ArgRange, NameRange);
+
+ void *IP = 0;
+ if (DagInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
+ return I;
+
+ DagInit *I = new DagInit(V, VN, ArgRange, NameRange);
+ ThePool.InsertNode(I, IP);
+
+ return I;
+}
+
+DagInit *
+DagInit::get(Init *V, const std::string &VN,
+ const std::vector<std::pair<Init*, std::string> > &args) {
+ typedef std::pair<Init*, std::string> PairType;
+
+ std::vector<Init *> Args;
+ std::vector<std::string> Names;
+
+ for (std::vector<PairType>::const_iterator i = args.begin(),
+ iend = args.end();
+ i != iend;
+ ++i) {
+ Args.push_back(i->first);
+ Names.push_back(i->second);
+ }
+
+ return DagInit::get(V, VN, Args, Names);
+}
+
+void DagInit::Profile(FoldingSetNodeID &ID) const {
+ ProfileDagInit(ID, Val, ValName, Args, ArgNames);
+}
+
+Init *DagInit::resolveReferences(Record &R, const RecordVal *RV) const {
+ std::vector<Init*> NewArgs;
+ for (unsigned i = 0, e = Args.size(); i != e; ++i)
+ NewArgs.push_back(Args[i]->resolveReferences(R, RV));
+
+ Init *Op = Val->resolveReferences(R, RV);
+
+ if (Args != NewArgs || Op != Val)
+ return DagInit::get(Op, ValName, NewArgs, ArgNames);
+
+ return const_cast<DagInit *>(this);
+}
+
+
+std::string DagInit::getAsString() const {
+ std::string Result = "(" + Val->getAsString();
+ if (!ValName.empty())
+ Result += ":" + ValName;
+ if (Args.size()) {
+ Result += " " + Args[0]->getAsString();
+ if (!ArgNames[0].empty()) Result += ":$" + ArgNames[0];
+ for (unsigned i = 1, e = Args.size(); i != e; ++i) {
+ Result += ", " + Args[i]->getAsString();
+ if (!ArgNames[i].empty()) Result += ":$" + ArgNames[i];
+ }
+ }
+ return Result + ")";
+}
+
+
+//===----------------------------------------------------------------------===//
+// Other implementations
+//===----------------------------------------------------------------------===//
+
+RecordVal::RecordVal(Init *N, RecTy *T, unsigned P)
+ : Name(N), Ty(T), Prefix(P) {
+ Value = Ty->convertValue(UnsetInit::get());
+ assert(Value && "Cannot create unset value for current type!");
+}
+
+RecordVal::RecordVal(const std::string &N, RecTy *T, unsigned P)
+ : Name(StringInit::get(N)), Ty(T), Prefix(P) {
+ Value = Ty->convertValue(UnsetInit::get());
+ assert(Value && "Cannot create unset value for current type!");
+}
+
+const std::string &RecordVal::getName() const {
+ StringInit *NameString = dynamic_cast<StringInit *>(Name);
+ assert(NameString && "RecordVal name is not a string!");
+ return NameString->getValue();
+}
+
+void RecordVal::dump() const { errs() << *this; }
+
+void RecordVal::print(raw_ostream &OS, bool PrintSem) const {
+ if (getPrefix()) OS << "field ";
+ OS << *getType() << " " << getName();
+
+ if (getValue())
+ OS << " = " << *getValue();
+
+ if (PrintSem) OS << ";\n";
+}
+
+unsigned Record::LastID = 0;
+
+void Record::checkName() {
+ // Ensure the record name has string type.
+ const TypedInit *TypedName = dynamic_cast<const TypedInit *>(Name);
+ assert(TypedName && "Record name is not typed!");
+ RecTy *Type = TypedName->getType();
+ if (dynamic_cast<StringRecTy *>(Type) == 0) {
+ llvm_unreachable("Record name is not a string!");
+ }
+}
+
+DefInit *Record::getDefInit() {
+ if (!TheInit)
+ TheInit = new DefInit(this, new RecordRecTy(this));
+ return TheInit;
+}
+
+const std::string &Record::getName() const {
+ const StringInit *NameString =
+ dynamic_cast<const StringInit *>(Name);
+ assert(NameString && "Record name is not a string!");
+ return NameString->getValue();
+}
+
+void Record::setName(Init *NewName) {
+ if (TrackedRecords.getDef(Name->getAsUnquotedString()) == this) {
+ TrackedRecords.removeDef(Name->getAsUnquotedString());
+ Name = NewName;
+ TrackedRecords.addDef(this);
+ } else {
+ TrackedRecords.removeClass(Name->getAsUnquotedString());
+ Name = NewName;
+ TrackedRecords.addClass(this);
+ }
+ checkName();
+ // Since the Init for the name was changed, see if we can resolve
+ // any of it using members of the Record.
+ Init *ComputedName = Name->resolveReferences(*this, 0);
+ if (ComputedName != Name) {
+ setName(ComputedName);
+ }
+ // DO NOT resolve record values to the name at this point because
+ // there might be default values for arguments of this def. Those
+ // arguments might not have been resolved yet so we don't want to
+ // prematurely assume values for those arguments were not passed to
+ // this def.
+ //
+ // Nonetheless, it may be that some of this Record's values
+ // reference the record name. Indeed, the reason for having the
+ // record name be an Init is to provide this flexibility. The extra
+ // resolve steps after completely instantiating defs takes care of
+ // this. See TGParser::ParseDef and TGParser::ParseDefm.
+}
+
+void Record::setName(const std::string &Name) {
+ setName(StringInit::get(Name));
+}
+
+/// resolveReferencesTo - If anything in this record refers to RV, replace the
+/// reference to RV with the RHS of RV. If RV is null, we resolve all possible
+/// references.
+void Record::resolveReferencesTo(const RecordVal *RV) {
+ for (unsigned i = 0, e = Values.size(); i != e; ++i) {
+ if (Init *V = Values[i].getValue())
+ Values[i].setValue(V->resolveReferences(*this, RV));
+ }
+}
+
+void Record::dump() const { errs() << *this; }
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const Record &R) {
+ OS << R.getName();
+
+ const std::vector<std::string> &TArgs = R.getTemplateArgs();
+ if (!TArgs.empty()) {
+ OS << "<";
+ for (unsigned i = 0, e = TArgs.size(); i != e; ++i) {
+ if (i) OS << ", ";
+ const RecordVal *RV = R.getValue(TArgs[i]);
+ assert(RV && "Template argument record not found??");
+ RV->print(OS, false);
+ }
+ OS << ">";
+ }
+
+ OS << " {";
+ const std::vector<Record*> &SC = R.getSuperClasses();
+ if (!SC.empty()) {
+ OS << "\t//";
+ for (unsigned i = 0, e = SC.size(); i != e; ++i)
+ OS << " " << SC[i]->getName();
+ }
+ OS << "\n";
+
+ const std::vector<RecordVal> &Vals = R.getValues();
+ for (unsigned i = 0, e = Vals.size(); i != e; ++i)
+ if (Vals[i].getPrefix() && !R.isTemplateArg(Vals[i].getName()))
+ OS << Vals[i];
+ for (unsigned i = 0, e = Vals.size(); i != e; ++i)
+ if (!Vals[i].getPrefix() && !R.isTemplateArg(Vals[i].getName()))
+ OS << Vals[i];
+
+ return OS << "}\n";
+}
+
+/// getValueInit - Return the initializer for a value with the specified name,
+/// or throw an exception if the field does not exist.
+///
+Init *Record::getValueInit(StringRef FieldName) const {
+ const RecordVal *R = getValue(FieldName);
+ if (R == 0 || R->getValue() == 0)
+ throw "Record `" + getName() + "' does not have a field named `" +
+ FieldName.str() + "'!\n";
+ return R->getValue();
+}
+
+
+/// getValueAsString - This method looks up the specified field and returns its
+/// value as a string, throwing an exception if the field does not exist or if
+/// the value is not a string.
+///
+std::string Record::getValueAsString(StringRef FieldName) const {
+ const RecordVal *R = getValue(FieldName);
+ if (R == 0 || R->getValue() == 0)
+ throw "Record `" + getName() + "' does not have a field named `" +
+ FieldName.str() + "'!\n";
+
+ if (StringInit *SI = dynamic_cast<StringInit*>(R->getValue()))
+ return SI->getValue();
+ throw "Record `" + getName() + "', field `" + FieldName.str() +
+ "' does not have a string initializer!";
+}
+
+/// getValueAsBitsInit - This method looks up the specified field and returns
+/// its value as a BitsInit, throwing an exception if the field does not exist
+/// or if the value is not the right type.
+///
+BitsInit *Record::getValueAsBitsInit(StringRef FieldName) const {
+ const RecordVal *R = getValue(FieldName);
+ if (R == 0 || R->getValue() == 0)
+ throw "Record `" + getName() + "' does not have a field named `" +
+ FieldName.str() + "'!\n";
+
+ if (BitsInit *BI = dynamic_cast<BitsInit*>(R->getValue()))
+ return BI;
+ throw "Record `" + getName() + "', field `" + FieldName.str() +
+ "' does not have a BitsInit initializer!";
+}
+
+/// getValueAsListInit - This method looks up the specified field and returns
+/// its value as a ListInit, throwing an exception if the field does not exist
+/// or if the value is not the right type.
+///
+ListInit *Record::getValueAsListInit(StringRef FieldName) const {
+ const RecordVal *R = getValue(FieldName);
+ if (R == 0 || R->getValue() == 0)
+ throw "Record `" + getName() + "' does not have a field named `" +
+ FieldName.str() + "'!\n";
+
+ if (ListInit *LI = dynamic_cast<ListInit*>(R->getValue()))
+ return LI;
+ throw "Record `" + getName() + "', field `" + FieldName.str() +
+ "' does not have a list initializer!";
+}
+
+/// getValueAsListOfDefs - This method looks up the specified field and returns
+/// its value as a vector of records, throwing an exception if the field does
+/// not exist or if the value is not the right type.
+///
+std::vector<Record*>
+Record::getValueAsListOfDefs(StringRef FieldName) const {
+ ListInit *List = getValueAsListInit(FieldName);
+ std::vector<Record*> Defs;
+ for (unsigned i = 0; i < List->getSize(); i++) {
+ if (DefInit *DI = dynamic_cast<DefInit*>(List->getElement(i))) {
+ Defs.push_back(DI->getDef());
+ } else {
+ throw "Record `" + getName() + "', field `" + FieldName.str() +
+ "' list is not entirely DefInit!";
+ }
+ }
+ return Defs;
+}
+
+/// getValueAsInt - This method looks up the specified field and returns its
+/// value as an int64_t, throwing an exception if the field does not exist or if
+/// the value is not the right type.
+///
+int64_t Record::getValueAsInt(StringRef FieldName) const {
+ const RecordVal *R = getValue(FieldName);
+ if (R == 0 || R->getValue() == 0)
+ throw "Record `" + getName() + "' does not have a field named `" +
+ FieldName.str() + "'!\n";
+
+ if (IntInit *II = dynamic_cast<IntInit*>(R->getValue()))
+ return II->getValue();
+ throw "Record `" + getName() + "', field `" + FieldName.str() +
+ "' does not have an int initializer!";
+}
+
+/// getValueAsListOfInts - This method looks up the specified field and returns
+/// its value as a vector of integers, throwing an exception if the field does
+/// not exist or if the value is not the right type.
+///
+std::vector<int64_t>
+Record::getValueAsListOfInts(StringRef FieldName) const {
+ ListInit *List = getValueAsListInit(FieldName);
+ std::vector<int64_t> Ints;
+ for (unsigned i = 0; i < List->getSize(); i++) {
+ if (IntInit *II = dynamic_cast<IntInit*>(List->getElement(i))) {
+ Ints.push_back(II->getValue());
+ } else {
+ throw "Record `" + getName() + "', field `" + FieldName.str() +
+ "' does not have a list of ints initializer!";
+ }
+ }
+ return Ints;
+}
+
+/// getValueAsListOfStrings - This method looks up the specified field and
+/// returns its value as a vector of strings, throwing an exception if the
+/// field does not exist or if the value is not the right type.
+///
+std::vector<std::string>
+Record::getValueAsListOfStrings(StringRef FieldName) const {
+ ListInit *List = getValueAsListInit(FieldName);
+ std::vector<std::string> Strings;
+ for (unsigned i = 0; i < List->getSize(); i++) {
+ if (StringInit *II = dynamic_cast<StringInit*>(List->getElement(i))) {
+ Strings.push_back(II->getValue());
+ } else {
+ throw "Record `" + getName() + "', field `" + FieldName.str() +
+ "' does not have a list of strings initializer!";
+ }
+ }
+ return Strings;
+}
+
+/// getValueAsDef - This method looks up the specified field and returns its
+/// value as a Record, throwing an exception if the field does not exist or if
+/// the value is not the right type.
+///
+Record *Record::getValueAsDef(StringRef FieldName) const {
+ const RecordVal *R = getValue(FieldName);
+ if (R == 0 || R->getValue() == 0)
+ throw "Record `" + getName() + "' does not have a field named `" +
+ FieldName.str() + "'!\n";
+
+ if (DefInit *DI = dynamic_cast<DefInit*>(R->getValue()))
+ return DI->getDef();
+ throw "Record `" + getName() + "', field `" + FieldName.str() +
+ "' does not have a def initializer!";
+}
+
+/// getValueAsBit - This method looks up the specified field and returns its
+/// value as a bit, throwing an exception if the field does not exist or if
+/// the value is not the right type.
+///
+bool Record::getValueAsBit(StringRef FieldName) const {
+ const RecordVal *R = getValue(FieldName);
+ if (R == 0 || R->getValue() == 0)
+ throw "Record `" + getName() + "' does not have a field named `" +
+ FieldName.str() + "'!\n";
+
+ if (BitInit *BI = dynamic_cast<BitInit*>(R->getValue()))
+ return BI->getValue();
+ throw "Record `" + getName() + "', field `" + FieldName.str() +
+ "' does not have a bit initializer!";
+}
+
+/// getValueAsDag - This method looks up the specified field and returns its
+/// value as an Dag, throwing an exception if the field does not exist or if
+/// the value is not the right type.
+///
+DagInit *Record::getValueAsDag(StringRef FieldName) const {
+ const RecordVal *R = getValue(FieldName);
+ if (R == 0 || R->getValue() == 0)
+ throw "Record `" + getName() + "' does not have a field named `" +
+ FieldName.str() + "'!\n";
+
+ if (DagInit *DI = dynamic_cast<DagInit*>(R->getValue()))
+ return DI;
+ throw "Record `" + getName() + "', field `" + FieldName.str() +
+ "' does not have a dag initializer!";
+}
+
+std::string Record::getValueAsCode(StringRef FieldName) const {
+ const RecordVal *R = getValue(FieldName);
+ if (R == 0 || R->getValue() == 0)
+ throw "Record `" + getName() + "' does not have a field named `" +
+ FieldName.str() + "'!\n";
+
+ if (CodeInit *CI = dynamic_cast<CodeInit*>(R->getValue()))
+ return CI->getValue();
+ throw "Record `" + getName() + "', field `" + FieldName.str() +
+ "' does not have a code initializer!";
+}
+
+
+void MultiClass::dump() const {
+ errs() << "Record:\n";
+ Rec.dump();
+
+ errs() << "Defs:\n";
+ for (RecordVector::const_iterator r = DefPrototypes.begin(),
+ rend = DefPrototypes.end();
+ r != rend;
+ ++r) {
+ (*r)->dump();
+ }
+}
+
+
+void RecordKeeper::dump() const { errs() << *this; }
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const RecordKeeper &RK) {
+ OS << "------------- Classes -----------------\n";
+ const std::map<std::string, Record*> &Classes = RK.getClasses();
+ for (std::map<std::string, Record*>::const_iterator I = Classes.begin(),
+ E = Classes.end(); I != E; ++I)
+ OS << "class " << *I->second;
+
+ OS << "------------- Defs -----------------\n";
+ const std::map<std::string, Record*> &Defs = RK.getDefs();
+ for (std::map<std::string, Record*>::const_iterator I = Defs.begin(),
+ E = Defs.end(); I != E; ++I)
+ OS << "def " << *I->second;
+ return OS;
+}
+
+
+/// getAllDerivedDefinitions - This method returns all concrete definitions
+/// that derive from the specified class name. If a class with the specified
+/// name does not exist, an error is printed and true is returned.
+std::vector<Record*>
+RecordKeeper::getAllDerivedDefinitions(const std::string &ClassName) const {
+ Record *Class = getClass(ClassName);
+ if (!Class)
+ throw "ERROR: Couldn't find the `" + ClassName + "' class!\n";
+
+ std::vector<Record*> Defs;
+ for (std::map<std::string, Record*>::const_iterator I = getDefs().begin(),
+ E = getDefs().end(); I != E; ++I)
+ if (I->second->isSubClassOf(Class))
+ Defs.push_back(I->second);
+
+ return Defs;
+}
+
diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp
new file mode 100644
index 0000000..8c1b429
--- /dev/null
+++ b/lib/TableGen/TGLexer.cpp
@@ -0,0 +1,435 @@
+//===- TGLexer.cpp - Lexer for TableGen -----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implement the Lexer for TableGen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "TGLexer.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Config/config.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include <cctype>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <cerrno>
+using namespace llvm;
+
+TGLexer::TGLexer(SourceMgr &SM) : SrcMgr(SM) {
+ CurBuffer = 0;
+ CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
+ CurPtr = CurBuf->getBufferStart();
+ TokStart = 0;
+}
+
+SMLoc TGLexer::getLoc() const {
+ return SMLoc::getFromPointer(TokStart);
+}
+
+/// ReturnError - Set the error to the specified string at the specified
+/// location. This is defined to always return tgtok::Error.
+tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) {
+ PrintError(Loc, Msg);
+ return tgtok::Error;
+}
+
+int TGLexer::getNextChar() {
+ char CurChar = *CurPtr++;
+ switch (CurChar) {
+ default:
+ return (unsigned char)CurChar;
+ case 0: {
+ // A nul character in the stream is either the end of the current buffer or
+ // a random nul in the file. Disambiguate that here.
+ if (CurPtr-1 != CurBuf->getBufferEnd())
+ return 0; // Just whitespace.
+
+ // If this is the end of an included file, pop the parent file off the
+ // include stack.
+ SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
+ if (ParentIncludeLoc != SMLoc()) {
+ CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
+ CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
+ CurPtr = ParentIncludeLoc.getPointer();
+ return getNextChar();
+ }
+
+ // Otherwise, return end of file.
+ --CurPtr; // Another call to lex will return EOF again.
+ return EOF;
+ }
+ case '\n':
+ case '\r':
+ // Handle the newline character by ignoring it and incrementing the line
+ // count. However, be careful about 'dos style' files with \n\r in them.
+ // Only treat a \n\r or \r\n as a single line.
+ if ((*CurPtr == '\n' || (*CurPtr == '\r')) &&
+ *CurPtr != CurChar)
+ ++CurPtr; // Eat the two char newline sequence.
+ return '\n';
+ }
+}
+
+tgtok::TokKind TGLexer::LexToken() {
+ TokStart = CurPtr;
+ // This always consumes at least one character.
+ int CurChar = getNextChar();
+
+ switch (CurChar) {
+ default:
+ // Handle letters: [a-zA-Z_#]
+ if (isalpha(CurChar) || CurChar == '_' || CurChar == '#')
+ return LexIdentifier();
+
+ // Unknown character, emit an error.
+ return ReturnError(TokStart, "Unexpected character");
+ case EOF: return tgtok::Eof;
+ case ':': return tgtok::colon;
+ case ';': return tgtok::semi;
+ case '.': return tgtok::period;
+ case ',': return tgtok::comma;
+ case '<': return tgtok::less;
+ case '>': return tgtok::greater;
+ case ']': return tgtok::r_square;
+ case '{': return tgtok::l_brace;
+ case '}': return tgtok::r_brace;
+ case '(': return tgtok::l_paren;
+ case ')': return tgtok::r_paren;
+ case '=': return tgtok::equal;
+ case '?': return tgtok::question;
+
+ case 0:
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ // Ignore whitespace.
+ return LexToken();
+ case '/':
+ // If this is the start of a // comment, skip until the end of the line or
+ // the end of the buffer.
+ if (*CurPtr == '/')
+ SkipBCPLComment();
+ else if (*CurPtr == '*') {
+ if (SkipCComment())
+ return tgtok::Error;
+ } else // Otherwise, this is an error.
+ return ReturnError(TokStart, "Unexpected character");
+ return LexToken();
+ case '-': case '+':
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6':
+ case '7': case '8': case '9':
+ return LexNumber();
+ case '"': return LexString();
+ case '$': return LexVarName();
+ case '[': return LexBracket();
+ case '!': return LexExclaim();
+ }
+}
+
+/// LexString - Lex "[^"]*"
+tgtok::TokKind TGLexer::LexString() {
+ const char *StrStart = CurPtr;
+
+ CurStrVal = "";
+
+ while (*CurPtr != '"') {
+ // If we hit the end of the buffer, report an error.
+ if (*CurPtr == 0 && CurPtr == CurBuf->getBufferEnd())
+ return ReturnError(StrStart, "End of file in string literal");
+
+ if (*CurPtr == '\n' || *CurPtr == '\r')
+ return ReturnError(StrStart, "End of line in string literal");
+
+ if (*CurPtr != '\\') {
+ CurStrVal += *CurPtr++;
+ continue;
+ }
+
+ ++CurPtr;
+
+ switch (*CurPtr) {
+ case '\\': case '\'': case '"':
+ // These turn into their literal character.
+ CurStrVal += *CurPtr++;
+ break;
+ case 't':
+ CurStrVal += '\t';
+ ++CurPtr;
+ break;
+ case 'n':
+ CurStrVal += '\n';
+ ++CurPtr;
+ break;
+
+ case '\n':
+ case '\r':
+ return ReturnError(CurPtr, "escaped newlines not supported in tblgen");
+
+ // If we hit the end of the buffer, report an error.
+ case '\0':
+ if (CurPtr == CurBuf->getBufferEnd())
+ return ReturnError(StrStart, "End of file in string literal");
+ // FALL THROUGH
+ default:
+ return ReturnError(CurPtr, "invalid escape in string literal");
+ }
+ }
+
+ ++CurPtr;
+ return tgtok::StrVal;
+}
+
+tgtok::TokKind TGLexer::LexVarName() {
+ if (!isalpha(CurPtr[0]) && CurPtr[0] != '_')
+ return ReturnError(TokStart, "Invalid variable name");
+
+ // Otherwise, we're ok, consume the rest of the characters.
+ const char *VarNameStart = CurPtr++;
+
+ while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
+ ++CurPtr;
+
+ CurStrVal.assign(VarNameStart, CurPtr);
+ return tgtok::VarName;
+}
+
+
+tgtok::TokKind TGLexer::LexIdentifier() {
+ // The first letter is [a-zA-Z_#].
+ const char *IdentStart = TokStart;
+
+ // Match the rest of the identifier regex: [0-9a-zA-Z_#]*
+ while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_' ||
+ *CurPtr == '#')
+ ++CurPtr;
+
+ // Check to see if this identifier is a keyword.
+ StringRef Str(IdentStart, CurPtr-IdentStart);
+
+ if (Str == "include") {
+ if (LexInclude()) return tgtok::Error;
+ return Lex();
+ }
+
+ tgtok::TokKind Kind = StringSwitch<tgtok::TokKind>(Str)
+ .Case("int", tgtok::Int)
+ .Case("bit", tgtok::Bit)
+ .Case("bits", tgtok::Bits)
+ .Case("string", tgtok::String)
+ .Case("list", tgtok::List)
+ .Case("code", tgtok::Code)
+ .Case("dag", tgtok::Dag)
+ .Case("class", tgtok::Class)
+ .Case("def", tgtok::Def)
+ .Case("defm", tgtok::Defm)
+ .Case("multiclass", tgtok::MultiClass)
+ .Case("field", tgtok::Field)
+ .Case("let", tgtok::Let)
+ .Case("in", tgtok::In)
+ .Default(tgtok::Id);
+
+ if (Kind == tgtok::Id)
+ CurStrVal.assign(Str.begin(), Str.end());
+ return Kind;
+}
+
+/// LexInclude - We just read the "include" token. Get the string token that
+/// comes next and enter the include.
+bool TGLexer::LexInclude() {
+ // The token after the include must be a string.
+ tgtok::TokKind Tok = LexToken();
+ if (Tok == tgtok::Error) return true;
+ if (Tok != tgtok::StrVal) {
+ PrintError(getLoc(), "Expected filename after include");
+ return true;
+ }
+
+ // Get the string.
+ std::string Filename = CurStrVal;
+ std::string IncludedFile;
+
+
+ CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr),
+ IncludedFile);
+ if (CurBuffer == -1) {
+ PrintError(getLoc(), "Could not find include file '" + Filename + "'");
+ return true;
+ }
+
+ Dependencies.push_back(IncludedFile);
+ // Save the line number and lex buffer of the includer.
+ CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
+ CurPtr = CurBuf->getBufferStart();
+ return false;
+}
+
+void TGLexer::SkipBCPLComment() {
+ ++CurPtr; // skip the second slash.
+ while (1) {
+ switch (*CurPtr) {
+ case '\n':
+ case '\r':
+ return; // Newline is end of comment.
+ case 0:
+ // If this is the end of the buffer, end the comment.
+ if (CurPtr == CurBuf->getBufferEnd())
+ return;
+ break;
+ }
+ // Otherwise, skip the character.
+ ++CurPtr;
+ }
+}
+
+/// SkipCComment - This skips C-style /**/ comments. The only difference from C
+/// is that we allow nesting.
+bool TGLexer::SkipCComment() {
+ ++CurPtr; // skip the star.
+ unsigned CommentDepth = 1;
+
+ while (1) {
+ int CurChar = getNextChar();
+ switch (CurChar) {
+ case EOF:
+ PrintError(TokStart, "Unterminated comment!");
+ return true;
+ case '*':
+ // End of the comment?
+ if (CurPtr[0] != '/') break;
+
+ ++CurPtr; // End the */.
+ if (--CommentDepth == 0)
+ return false;
+ break;
+ case '/':
+ // Start of a nested comment?
+ if (CurPtr[0] != '*') break;
+ ++CurPtr;
+ ++CommentDepth;
+ break;
+ }
+ }
+}
+
+/// LexNumber - Lex:
+/// [-+]?[0-9]+
+/// 0x[0-9a-fA-F]+
+/// 0b[01]+
+tgtok::TokKind TGLexer::LexNumber() {
+ if (CurPtr[-1] == '0') {
+ if (CurPtr[0] == 'x') {
+ ++CurPtr;
+ const char *NumStart = CurPtr;
+ while (isxdigit(CurPtr[0]))
+ ++CurPtr;
+
+ // Requires at least one hex digit.
+ if (CurPtr == NumStart)
+ return ReturnError(TokStart, "Invalid hexadecimal number");
+
+ errno = 0;
+ CurIntVal = strtoll(NumStart, 0, 16);
+ if (errno == EINVAL)
+ return ReturnError(TokStart, "Invalid hexadecimal number");
+ if (errno == ERANGE) {
+ errno = 0;
+ CurIntVal = (int64_t)strtoull(NumStart, 0, 16);
+ if (errno == EINVAL)
+ return ReturnError(TokStart, "Invalid hexadecimal number");
+ if (errno == ERANGE)
+ return ReturnError(TokStart, "Hexadecimal number out of range");
+ }
+ return tgtok::IntVal;
+ } else if (CurPtr[0] == 'b') {
+ ++CurPtr;
+ const char *NumStart = CurPtr;
+ while (CurPtr[0] == '0' || CurPtr[0] == '1')
+ ++CurPtr;
+
+ // Requires at least one binary digit.
+ if (CurPtr == NumStart)
+ return ReturnError(CurPtr-2, "Invalid binary number");
+ CurIntVal = strtoll(NumStart, 0, 2);
+ return tgtok::IntVal;
+ }
+ }
+
+ // Check for a sign without a digit.
+ if (!isdigit(CurPtr[0])) {
+ if (CurPtr[-1] == '-')
+ return tgtok::minus;
+ else if (CurPtr[-1] == '+')
+ return tgtok::plus;
+ }
+
+ while (isdigit(CurPtr[0]))
+ ++CurPtr;
+ CurIntVal = strtoll(TokStart, 0, 10);
+ return tgtok::IntVal;
+}
+
+/// LexBracket - We just read '['. If this is a code block, return it,
+/// otherwise return the bracket. Match: '[' and '[{ ( [^}]+ | }[^]] )* }]'
+tgtok::TokKind TGLexer::LexBracket() {
+ if (CurPtr[0] != '{')
+ return tgtok::l_square;
+ ++CurPtr;
+ const char *CodeStart = CurPtr;
+ while (1) {
+ int Char = getNextChar();
+ if (Char == EOF) break;
+
+ if (Char != '}') continue;
+
+ Char = getNextChar();
+ if (Char == EOF) break;
+ if (Char == ']') {
+ CurStrVal.assign(CodeStart, CurPtr-2);
+ return tgtok::CodeFragment;
+ }
+ }
+
+ return ReturnError(CodeStart-2, "Unterminated Code Block");
+}
+
+/// LexExclaim - Lex '!' and '![a-zA-Z]+'.
+tgtok::TokKind TGLexer::LexExclaim() {
+ if (!isalpha(*CurPtr))
+ return ReturnError(CurPtr - 1, "Invalid \"!operator\"");
+
+ const char *Start = CurPtr++;
+ while (isalpha(*CurPtr))
+ ++CurPtr;
+
+ // Check to see which operator this is.
+ tgtok::TokKind Kind =
+ StringSwitch<tgtok::TokKind>(StringRef(Start, CurPtr - Start))
+ .Case("eq", tgtok::XEq)
+ .Case("if", tgtok::XIf)
+ .Case("head", tgtok::XHead)
+ .Case("tail", tgtok::XTail)
+ .Case("con", tgtok::XConcat)
+ .Case("shl", tgtok::XSHL)
+ .Case("sra", tgtok::XSRA)
+ .Case("srl", tgtok::XSRL)
+ .Case("cast", tgtok::XCast)
+ .Case("empty", tgtok::XEmpty)
+ .Case("subst", tgtok::XSubst)
+ .Case("foreach", tgtok::XForEach)
+ .Case("strconcat", tgtok::XStrConcat)
+ .Default(tgtok::Error);
+
+ return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator");
+}
+
diff --git a/lib/TableGen/TGLexer.h b/lib/TableGen/TGLexer.h
new file mode 100644
index 0000000..84d328b
--- /dev/null
+++ b/lib/TableGen/TGLexer.h
@@ -0,0 +1,125 @@
+//===- TGLexer.h - Lexer for TableGen Files ---------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class represents the Lexer for tablegen files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TGLEXER_H
+#define TGLEXER_H
+
+#include "llvm/Support/DataTypes.h"
+#include <string>
+#include <vector>
+#include <cassert>
+
+namespace llvm {
+class MemoryBuffer;
+class SourceMgr;
+class SMLoc;
+class Twine;
+
+namespace tgtok {
+ enum TokKind {
+ // Markers
+ Eof, Error,
+
+ // Tokens with no info.
+ minus, plus, // - +
+ l_square, r_square, // [ ]
+ l_brace, r_brace, // { }
+ l_paren, r_paren, // ( )
+ less, greater, // < >
+ colon, semi, // : ;
+ comma, period, // , .
+ equal, question, // = ?
+
+ // Keywords.
+ Bit, Bits, Class, Code, Dag, Def, Defm, Field, In, Int, Let, List,
+ MultiClass, String,
+
+ // !keywords.
+ XConcat, XSRA, XSRL, XSHL, XStrConcat, XCast, XSubst,
+ XForEach, XHead, XTail, XEmpty, XIf, XEq,
+
+ // Integer value.
+ IntVal,
+
+ // String valued tokens.
+ Id, StrVal, VarName, CodeFragment
+ };
+}
+
+/// TGLexer - TableGen Lexer class.
+class TGLexer {
+ SourceMgr &SrcMgr;
+
+ const char *CurPtr;
+ const MemoryBuffer *CurBuf;
+
+ // Information about the current token.
+ const char *TokStart;
+ tgtok::TokKind CurCode;
+ std::string CurStrVal; // This is valid for ID, STRVAL, VARNAME, CODEFRAGMENT
+ int64_t CurIntVal; // This is valid for INTVAL.
+
+ /// CurBuffer - This is the current buffer index we're lexing from as managed
+ /// by the SourceMgr object.
+ int CurBuffer;
+ /// Dependencies - This is the list of all included files.
+ std::vector<std::string> Dependencies;
+
+public:
+ TGLexer(SourceMgr &SrcMgr);
+ ~TGLexer() {}
+
+ tgtok::TokKind Lex() {
+ return CurCode = LexToken();
+ }
+
+ const std::vector<std::string> &getDependencies() const {
+ return Dependencies;
+ }
+
+ tgtok::TokKind getCode() const { return CurCode; }
+
+ const std::string &getCurStrVal() const {
+ assert((CurCode == tgtok::Id || CurCode == tgtok::StrVal ||
+ CurCode == tgtok::VarName || CurCode == tgtok::CodeFragment) &&
+ "This token doesn't have a string value");
+ return CurStrVal;
+ }
+ int64_t getCurIntVal() const {
+ assert(CurCode == tgtok::IntVal && "This token isn't an integer");
+ return CurIntVal;
+ }
+
+ SMLoc getLoc() const;
+
+private:
+ /// LexToken - Read the next token and return its code.
+ tgtok::TokKind LexToken();
+
+ tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg);
+
+ int getNextChar();
+ void SkipBCPLComment();
+ bool SkipCComment();
+ tgtok::TokKind LexIdentifier();
+ bool LexInclude();
+ tgtok::TokKind LexString();
+ tgtok::TokKind LexVarName();
+ tgtok::TokKind LexNumber();
+ tgtok::TokKind LexBracket();
+ tgtok::TokKind LexExclaim();
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp
new file mode 100644
index 0000000..e7f00ba
--- /dev/null
+++ b/lib/TableGen/TGParser.cpp
@@ -0,0 +1,2194 @@
+//===- TGParser.cpp - Parser for TableGen Files ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implement the Parser for TableGen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "TGParser.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+#include <sstream>
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Support Code for the Semantic Actions.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+struct SubClassReference {
+ SMLoc RefLoc;
+ Record *Rec;
+ std::vector<Init*> TemplateArgs;
+ SubClassReference() : Rec(0) {}
+
+ bool isInvalid() const { return Rec == 0; }
+};
+
+struct SubMultiClassReference {
+ SMLoc RefLoc;
+ MultiClass *MC;
+ std::vector<Init*> TemplateArgs;
+ SubMultiClassReference() : MC(0) {}
+
+ bool isInvalid() const { return MC == 0; }
+ void dump() const;
+};
+
+void SubMultiClassReference::dump() const {
+ errs() << "Multiclass:\n";
+
+ MC->dump();
+
+ errs() << "Template args:\n";
+ for (std::vector<Init *>::const_iterator i = TemplateArgs.begin(),
+ iend = TemplateArgs.end();
+ i != iend;
+ ++i) {
+ (*i)->dump();
+ }
+}
+
+} // end namespace llvm
+
+bool TGParser::AddValue(Record *CurRec, SMLoc Loc, const RecordVal &RV) {
+ if (CurRec == 0)
+ CurRec = &CurMultiClass->Rec;
+
+ if (RecordVal *ERV = CurRec->getValue(RV.getName())) {
+ // The value already exists in the class, treat this as a set.
+ if (ERV->setValue(RV.getValue()))
+ return Error(Loc, "New definition of '" + RV.getName() + "' of type '" +
+ RV.getType()->getAsString() + "' is incompatible with " +
+ "previous definition of type '" +
+ ERV->getType()->getAsString() + "'");
+ } else {
+ CurRec->addValue(RV);
+ }
+ return false;
+}
+
+/// SetValue -
+/// Return true on error, false on success.
+bool TGParser::SetValue(Record *CurRec, SMLoc Loc, const std::string &ValName,
+ const std::vector<unsigned> &BitList, Init *V) {
+ if (!V) return false;
+
+ if (CurRec == 0) CurRec = &CurMultiClass->Rec;
+
+ RecordVal *RV = CurRec->getValue(ValName);
+ if (RV == 0)
+ return Error(Loc, "Value '" + ValName + "' unknown!");
+
+ // Do not allow assignments like 'X = X'. This will just cause infinite loops
+ // in the resolution machinery.
+ if (BitList.empty())
+ if (VarInit *VI = dynamic_cast<VarInit*>(V))
+ if (VI->getName() == ValName)
+ return false;
+
+ // If we are assigning to a subset of the bits in the value... then we must be
+ // assigning to a field of BitsRecTy, which must have a BitsInit
+ // initializer.
+ //
+ if (!BitList.empty()) {
+ BitsInit *CurVal = dynamic_cast<BitsInit*>(RV->getValue());
+ if (CurVal == 0)
+ return Error(Loc, "Value '" + ValName + "' is not a bits type");
+
+ // Convert the incoming value to a bits type of the appropriate size...
+ Init *BI = V->convertInitializerTo(BitsRecTy::get(BitList.size()));
+ if (BI == 0) {
+ V->convertInitializerTo(BitsRecTy::get(BitList.size()));
+ return Error(Loc, "Initializer is not compatible with bit range");
+ }
+
+ // We should have a BitsInit type now.
+ BitsInit *BInit = dynamic_cast<BitsInit*>(BI);
+ assert(BInit != 0);
+
+ SmallVector<Init *, 16> NewBits(CurVal->getNumBits());
+
+ // Loop over bits, assigning values as appropriate.
+ for (unsigned i = 0, e = BitList.size(); i != e; ++i) {
+ unsigned Bit = BitList[i];
+ if (NewBits[Bit])
+ return Error(Loc, "Cannot set bit #" + utostr(Bit) + " of value '" +
+ ValName + "' more than once");
+ NewBits[Bit] = BInit->getBit(i);
+ }
+
+ for (unsigned i = 0, e = CurVal->getNumBits(); i != e; ++i)
+ if (NewBits[i] == 0)
+ NewBits[i] = CurVal->getBit(i);
+
+ V = BitsInit::get(NewBits);
+ }
+
+ if (RV->setValue(V))
+ return Error(Loc, "Value '" + ValName + "' of type '" +
+ RV->getType()->getAsString() +
+ "' is incompatible with initializer '" + V->getAsString() +"'");
+ return false;
+}
+
+/// AddSubClass - Add SubClass as a subclass to CurRec, resolving its template
+/// args as SubClass's template arguments.
+bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) {
+ Record *SC = SubClass.Rec;
+ // Add all of the values in the subclass into the current class.
+ const std::vector<RecordVal> &Vals = SC->getValues();
+ for (unsigned i = 0, e = Vals.size(); i != e; ++i)
+ if (AddValue(CurRec, SubClass.RefLoc, Vals[i]))
+ return true;
+
+ const std::vector<std::string> &TArgs = SC->getTemplateArgs();
+
+ // Ensure that an appropriate number of template arguments are specified.
+ if (TArgs.size() < SubClass.TemplateArgs.size())
+ return Error(SubClass.RefLoc, "More template args specified than expected");
+
+ // Loop over all of the template arguments, setting them to the specified
+ // value or leaving them as the default if necessary.
+ for (unsigned i = 0, e = TArgs.size(); i != e; ++i) {
+ if (i < SubClass.TemplateArgs.size()) {
+ // If a value is specified for this template arg, set it now.
+ if (SetValue(CurRec, SubClass.RefLoc, TArgs[i], std::vector<unsigned>(),
+ SubClass.TemplateArgs[i]))
+ return true;
+
+ // Resolve it next.
+ CurRec->resolveReferencesTo(CurRec->getValue(TArgs[i]));
+
+ // Now remove it.
+ CurRec->removeValue(TArgs[i]);
+
+ } else if (!CurRec->getValue(TArgs[i])->getValue()->isComplete()) {
+ return Error(SubClass.RefLoc,"Value not specified for template argument #"
+ + utostr(i) + " (" + TArgs[i] + ") of subclass '" +
+ SC->getName() + "'!");
+ }
+ }
+
+ // Since everything went well, we can now set the "superclass" list for the
+ // current record.
+ const std::vector<Record*> &SCs = SC->getSuperClasses();
+ for (unsigned i = 0, e = SCs.size(); i != e; ++i) {
+ if (CurRec->isSubClassOf(SCs[i]))
+ return Error(SubClass.RefLoc,
+ "Already subclass of '" + SCs[i]->getName() + "'!\n");
+ CurRec->addSuperClass(SCs[i]);
+ }
+
+ if (CurRec->isSubClassOf(SC))
+ return Error(SubClass.RefLoc,
+ "Already subclass of '" + SC->getName() + "'!\n");
+ CurRec->addSuperClass(SC);
+ return false;
+}
+
+/// AddSubMultiClass - Add SubMultiClass as a subclass to
+/// CurMC, resolving its template args as SubMultiClass's
+/// template arguments.
+bool TGParser::AddSubMultiClass(MultiClass *CurMC,
+ SubMultiClassReference &SubMultiClass) {
+ MultiClass *SMC = SubMultiClass.MC;
+ Record *CurRec = &CurMC->Rec;
+
+ const std::vector<RecordVal> &MCVals = CurRec->getValues();
+
+ // Add all of the values in the subclass into the current class.
+ const std::vector<RecordVal> &SMCVals = SMC->Rec.getValues();
+ for (unsigned i = 0, e = SMCVals.size(); i != e; ++i)
+ if (AddValue(CurRec, SubMultiClass.RefLoc, SMCVals[i]))
+ return true;
+
+ int newDefStart = CurMC->DefPrototypes.size();
+
+ // Add all of the defs in the subclass into the current multiclass.
+ for (MultiClass::RecordVector::const_iterator i = SMC->DefPrototypes.begin(),
+ iend = SMC->DefPrototypes.end();
+ i != iend;
+ ++i) {
+ // Clone the def and add it to the current multiclass
+ Record *NewDef = new Record(**i);
+
+ // Add all of the values in the superclass into the current def.
+ for (unsigned i = 0, e = MCVals.size(); i != e; ++i)
+ if (AddValue(NewDef, SubMultiClass.RefLoc, MCVals[i]))
+ return true;
+
+ CurMC->DefPrototypes.push_back(NewDef);
+ }
+
+ const std::vector<std::string> &SMCTArgs = SMC->Rec.getTemplateArgs();
+
+ // Ensure that an appropriate number of template arguments are
+ // specified.
+ if (SMCTArgs.size() < SubMultiClass.TemplateArgs.size())
+ return Error(SubMultiClass.RefLoc,
+ "More template args specified than expected");
+
+ // Loop over all of the template arguments, setting them to the specified
+ // value or leaving them as the default if necessary.
+ for (unsigned i = 0, e = SMCTArgs.size(); i != e; ++i) {
+ if (i < SubMultiClass.TemplateArgs.size()) {
+ // If a value is specified for this template arg, set it in the
+ // superclass now.
+ if (SetValue(CurRec, SubMultiClass.RefLoc, SMCTArgs[i],
+ std::vector<unsigned>(),
+ SubMultiClass.TemplateArgs[i]))
+ return true;
+
+ // Resolve it next.
+ CurRec->resolveReferencesTo(CurRec->getValue(SMCTArgs[i]));
+
+ // Now remove it.
+ CurRec->removeValue(SMCTArgs[i]);
+
+ // If a value is specified for this template arg, set it in the
+ // new defs now.
+ for (MultiClass::RecordVector::iterator j =
+ CurMC->DefPrototypes.begin() + newDefStart,
+ jend = CurMC->DefPrototypes.end();
+ j != jend;
+ ++j) {
+ Record *Def = *j;
+
+ if (SetValue(Def, SubMultiClass.RefLoc, SMCTArgs[i],
+ std::vector<unsigned>(),
+ SubMultiClass.TemplateArgs[i]))
+ return true;
+
+ // Resolve it next.
+ Def->resolveReferencesTo(Def->getValue(SMCTArgs[i]));
+
+ // Now remove it
+ Def->removeValue(SMCTArgs[i]);
+ }
+ } else if (!CurRec->getValue(SMCTArgs[i])->getValue()->isComplete()) {
+ return Error(SubMultiClass.RefLoc,
+ "Value not specified for template argument #"
+ + utostr(i) + " (" + SMCTArgs[i] + ") of subclass '" +
+ SMC->Rec.getName() + "'!");
+ }
+ }
+
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Parser Code
+//===----------------------------------------------------------------------===//
+
+/// isObjectStart - Return true if this is a valid first token for an Object.
+static bool isObjectStart(tgtok::TokKind K) {
+ return K == tgtok::Class || K == tgtok::Def ||
+ K == tgtok::Defm || K == tgtok::Let || K == tgtok::MultiClass;
+}
+
+static std::string GetNewAnonymousName() {
+ static unsigned AnonCounter = 0;
+ return "anonymous."+utostr(AnonCounter++);
+}
+
+/// ParseObjectName - If an object name is specified, return it. Otherwise,
+/// return an anonymous name.
+/// ObjectName ::= ID
+/// ObjectName ::= /*empty*/
+///
+std::string TGParser::ParseObjectName() {
+ if (Lex.getCode() != tgtok::Id)
+ return GetNewAnonymousName();
+
+ std::string Ret = Lex.getCurStrVal();
+ Lex.Lex();
+ return Ret;
+}
+
+
+/// ParseClassID - Parse and resolve a reference to a class name. This returns
+/// null on error.
+///
+/// ClassID ::= ID
+///
+Record *TGParser::ParseClassID() {
+ if (Lex.getCode() != tgtok::Id) {
+ TokError("expected name for ClassID");
+ return 0;
+ }
+
+ Record *Result = Records.getClass(Lex.getCurStrVal());
+ if (Result == 0)
+ TokError("Couldn't find class '" + Lex.getCurStrVal() + "'");
+
+ Lex.Lex();
+ return Result;
+}
+
+/// ParseMultiClassID - Parse and resolve a reference to a multiclass name.
+/// This returns null on error.
+///
+/// MultiClassID ::= ID
+///
+MultiClass *TGParser::ParseMultiClassID() {
+ if (Lex.getCode() != tgtok::Id) {
+ TokError("expected name for ClassID");
+ return 0;
+ }
+
+ MultiClass *Result = MultiClasses[Lex.getCurStrVal()];
+ if (Result == 0)
+ TokError("Couldn't find class '" + Lex.getCurStrVal() + "'");
+
+ Lex.Lex();
+ return Result;
+}
+
+Record *TGParser::ParseDefmID() {
+ if (Lex.getCode() != tgtok::Id) {
+ TokError("expected multiclass name");
+ return 0;
+ }
+
+ MultiClass *MC = MultiClasses[Lex.getCurStrVal()];
+ if (MC == 0) {
+ TokError("Couldn't find multiclass '" + Lex.getCurStrVal() + "'");
+ return 0;
+ }
+
+ Lex.Lex();
+ return &MC->Rec;
+}
+
+
+/// ParseSubClassReference - Parse a reference to a subclass or to a templated
+/// subclass. This returns a SubClassRefTy with a null Record* on error.
+///
+/// SubClassRef ::= ClassID
+/// SubClassRef ::= ClassID '<' ValueList '>'
+///
+SubClassReference TGParser::
+ParseSubClassReference(Record *CurRec, bool isDefm) {
+ SubClassReference Result;
+ Result.RefLoc = Lex.getLoc();
+
+ if (isDefm)
+ Result.Rec = ParseDefmID();
+ else
+ Result.Rec = ParseClassID();
+ if (Result.Rec == 0) return Result;
+
+ // If there is no template arg list, we're done.
+ if (Lex.getCode() != tgtok::less)
+ return Result;
+ Lex.Lex(); // Eat the '<'
+
+ if (Lex.getCode() == tgtok::greater) {
+ TokError("subclass reference requires a non-empty list of template values");
+ Result.Rec = 0;
+ return Result;
+ }
+
+ Result.TemplateArgs = ParseValueList(CurRec, Result.Rec);
+ if (Result.TemplateArgs.empty()) {
+ Result.Rec = 0; // Error parsing value list.
+ return Result;
+ }
+
+ if (Lex.getCode() != tgtok::greater) {
+ TokError("expected '>' in template value list");
+ Result.Rec = 0;
+ return Result;
+ }
+ Lex.Lex();
+
+ return Result;
+}
+
+/// ParseSubMultiClassReference - Parse a reference to a subclass or to a
+/// templated submulticlass. This returns a SubMultiClassRefTy with a null
+/// Record* on error.
+///
+/// SubMultiClassRef ::= MultiClassID
+/// SubMultiClassRef ::= MultiClassID '<' ValueList '>'
+///
+SubMultiClassReference TGParser::
+ParseSubMultiClassReference(MultiClass *CurMC) {
+ SubMultiClassReference Result;
+ Result.RefLoc = Lex.getLoc();
+
+ Result.MC = ParseMultiClassID();
+ if (Result.MC == 0) return Result;
+
+ // If there is no template arg list, we're done.
+ if (Lex.getCode() != tgtok::less)
+ return Result;
+ Lex.Lex(); // Eat the '<'
+
+ if (Lex.getCode() == tgtok::greater) {
+ TokError("subclass reference requires a non-empty list of template values");
+ Result.MC = 0;
+ return Result;
+ }
+
+ Result.TemplateArgs = ParseValueList(&CurMC->Rec, &Result.MC->Rec);
+ if (Result.TemplateArgs.empty()) {
+ Result.MC = 0; // Error parsing value list.
+ return Result;
+ }
+
+ if (Lex.getCode() != tgtok::greater) {
+ TokError("expected '>' in template value list");
+ Result.MC = 0;
+ return Result;
+ }
+ Lex.Lex();
+
+ return Result;
+}
+
+/// ParseRangePiece - Parse a bit/value range.
+/// RangePiece ::= INTVAL
+/// RangePiece ::= INTVAL '-' INTVAL
+/// RangePiece ::= INTVAL INTVAL
+bool TGParser::ParseRangePiece(std::vector<unsigned> &Ranges) {
+ if (Lex.getCode() != tgtok::IntVal) {
+ TokError("expected integer or bitrange");
+ return true;
+ }
+ int64_t Start = Lex.getCurIntVal();
+ int64_t End;
+
+ if (Start < 0)
+ return TokError("invalid range, cannot be negative");
+
+ switch (Lex.Lex()) { // eat first character.
+ default:
+ Ranges.push_back(Start);
+ return false;
+ case tgtok::minus:
+ if (Lex.Lex() != tgtok::IntVal) {
+ TokError("expected integer value as end of range");
+ return true;
+ }
+ End = Lex.getCurIntVal();
+ break;
+ case tgtok::IntVal:
+ End = -Lex.getCurIntVal();
+ break;
+ }
+ if (End < 0)
+ return TokError("invalid range, cannot be negative");
+ Lex.Lex();
+
+ // Add to the range.
+ if (Start < End) {
+ for (; Start <= End; ++Start)
+ Ranges.push_back(Start);
+ } else {
+ for (; Start >= End; --Start)
+ Ranges.push_back(Start);
+ }
+ return false;
+}
+
+/// ParseRangeList - Parse a list of scalars and ranges into scalar values.
+///
+/// RangeList ::= RangePiece (',' RangePiece)*
+///
+std::vector<unsigned> TGParser::ParseRangeList() {
+ std::vector<unsigned> Result;
+
+ // Parse the first piece.
+ if (ParseRangePiece(Result))
+ return std::vector<unsigned>();
+ while (Lex.getCode() == tgtok::comma) {
+ Lex.Lex(); // Eat the comma.
+
+ // Parse the next range piece.
+ if (ParseRangePiece(Result))
+ return std::vector<unsigned>();
+ }
+ return Result;
+}
+
+/// ParseOptionalRangeList - Parse either a range list in <>'s or nothing.
+/// OptionalRangeList ::= '<' RangeList '>'
+/// OptionalRangeList ::= /*empty*/
+bool TGParser::ParseOptionalRangeList(std::vector<unsigned> &Ranges) {
+ if (Lex.getCode() != tgtok::less)
+ return false;
+
+ SMLoc StartLoc = Lex.getLoc();
+ Lex.Lex(); // eat the '<'
+
+ // Parse the range list.
+ Ranges = ParseRangeList();
+ if (Ranges.empty()) return true;
+
+ if (Lex.getCode() != tgtok::greater) {
+ TokError("expected '>' at end of range list");
+ return Error(StartLoc, "to match this '<'");
+ }
+ Lex.Lex(); // eat the '>'.
+ return false;
+}
+
+/// ParseOptionalBitList - Parse either a bit list in {}'s or nothing.
+/// OptionalBitList ::= '{' RangeList '}'
+/// OptionalBitList ::= /*empty*/
+bool TGParser::ParseOptionalBitList(std::vector<unsigned> &Ranges) {
+ if (Lex.getCode() != tgtok::l_brace)
+ return false;
+
+ SMLoc StartLoc = Lex.getLoc();
+ Lex.Lex(); // eat the '{'
+
+ // Parse the range list.
+ Ranges = ParseRangeList();
+ if (Ranges.empty()) return true;
+
+ if (Lex.getCode() != tgtok::r_brace) {
+ TokError("expected '}' at end of bit list");
+ return Error(StartLoc, "to match this '{'");
+ }
+ Lex.Lex(); // eat the '}'.
+ return false;
+}
+
+
+/// ParseType - Parse and return a tblgen type. This returns null on error.
+///
+/// Type ::= STRING // string type
+/// Type ::= BIT // bit type
+/// Type ::= BITS '<' INTVAL '>' // bits<x> type
+/// Type ::= INT // int type
+/// Type ::= LIST '<' Type '>' // list<x> type
+/// Type ::= CODE // code type
+/// Type ::= DAG // dag type
+/// Type ::= ClassID // Record Type
+///
+RecTy *TGParser::ParseType() {
+ switch (Lex.getCode()) {
+ default: TokError("Unknown token when expecting a type"); return 0;
+ case tgtok::String: Lex.Lex(); return StringRecTy::get();
+ case tgtok::Bit: Lex.Lex(); return BitRecTy::get();
+ case tgtok::Int: Lex.Lex(); return IntRecTy::get();
+ case tgtok::Code: Lex.Lex(); return CodeRecTy::get();
+ case tgtok::Dag: Lex.Lex(); return DagRecTy::get();
+ case tgtok::Id:
+ if (Record *R = ParseClassID()) return RecordRecTy::get(R);
+ return 0;
+ case tgtok::Bits: {
+ if (Lex.Lex() != tgtok::less) { // Eat 'bits'
+ TokError("expected '<' after bits type");
+ return 0;
+ }
+ if (Lex.Lex() != tgtok::IntVal) { // Eat '<'
+ TokError("expected integer in bits<n> type");
+ return 0;
+ }
+ uint64_t Val = Lex.getCurIntVal();
+ if (Lex.Lex() != tgtok::greater) { // Eat count.
+ TokError("expected '>' at end of bits<n> type");
+ return 0;
+ }
+ Lex.Lex(); // Eat '>'
+ return BitsRecTy::get(Val);
+ }
+ case tgtok::List: {
+ if (Lex.Lex() != tgtok::less) { // Eat 'bits'
+ TokError("expected '<' after list type");
+ return 0;
+ }
+ Lex.Lex(); // Eat '<'
+ RecTy *SubType = ParseType();
+ if (SubType == 0) return 0;
+
+ if (Lex.getCode() != tgtok::greater) {
+ TokError("expected '>' at end of list<ty> type");
+ return 0;
+ }
+ Lex.Lex(); // Eat '>'
+ return ListRecTy::get(SubType);
+ }
+ }
+}
+
+/// ParseIDValue - Parse an ID as a value and decode what it means.
+///
+/// IDValue ::= ID [def local value]
+/// IDValue ::= ID [def template arg]
+/// IDValue ::= ID [multiclass local value]
+/// IDValue ::= ID [multiclass template argument]
+/// IDValue ::= ID [def name]
+///
+Init *TGParser::ParseIDValue(Record *CurRec) {
+ assert(Lex.getCode() == tgtok::Id && "Expected ID in ParseIDValue");
+ std::string Name = Lex.getCurStrVal();
+ SMLoc Loc = Lex.getLoc();
+ Lex.Lex();
+ return ParseIDValue(CurRec, Name, Loc);
+}
+
+/// ParseIDValue - This is just like ParseIDValue above, but it assumes the ID
+/// has already been read.
+Init *TGParser::ParseIDValue(Record *CurRec,
+ const std::string &Name, SMLoc NameLoc) {
+ if (CurRec) {
+ if (const RecordVal *RV = CurRec->getValue(Name))
+ return VarInit::get(Name, RV->getType());
+
+ std::string TemplateArgName = CurRec->getName()+":"+Name;
+ if (CurMultiClass)
+ TemplateArgName = CurMultiClass->Rec.getName()+"::"+TemplateArgName;
+
+ if (CurRec->isTemplateArg(TemplateArgName)) {
+ const RecordVal *RV = CurRec->getValue(TemplateArgName);
+ assert(RV && "Template arg doesn't exist??");
+ return VarInit::get(TemplateArgName, RV->getType());
+ }
+ }
+
+ if (CurMultiClass) {
+ std::string MCName = CurMultiClass->Rec.getName()+"::"+Name;
+ if (CurMultiClass->Rec.isTemplateArg(MCName)) {
+ const RecordVal *RV = CurMultiClass->Rec.getValue(MCName);
+ assert(RV && "Template arg doesn't exist??");
+ return VarInit::get(MCName, RV->getType());
+ }
+ }
+
+ if (Record *D = Records.getDef(Name))
+ return DefInit::get(D);
+
+ Error(NameLoc, "Variable not defined: '" + Name + "'");
+ return 0;
+}
+
+/// ParseOperation - Parse an operator. This returns null on error.
+///
+/// Operation ::= XOperator ['<' Type '>'] '(' Args ')'
+///
+Init *TGParser::ParseOperation(Record *CurRec) {
+ switch (Lex.getCode()) {
+ default:
+ TokError("unknown operation");
+ return 0;
+ break;
+ case tgtok::XHead:
+ case tgtok::XTail:
+ case tgtok::XEmpty:
+ case tgtok::XCast: { // Value ::= !unop '(' Value ')'
+ UnOpInit::UnaryOp Code;
+ RecTy *Type = 0;
+
+ switch (Lex.getCode()) {
+ default: assert(0 && "Unhandled code!");
+ case tgtok::XCast:
+ Lex.Lex(); // eat the operation
+ Code = UnOpInit::CAST;
+
+ Type = ParseOperatorType();
+
+ if (Type == 0) {
+ TokError("did not get type for unary operator");
+ return 0;
+ }
+
+ break;
+ case tgtok::XHead:
+ Lex.Lex(); // eat the operation
+ Code = UnOpInit::HEAD;
+ break;
+ case tgtok::XTail:
+ Lex.Lex(); // eat the operation
+ Code = UnOpInit::TAIL;
+ break;
+ case tgtok::XEmpty:
+ Lex.Lex(); // eat the operation
+ Code = UnOpInit::EMPTY;
+ Type = IntRecTy::get();
+ break;
+ }
+ if (Lex.getCode() != tgtok::l_paren) {
+ TokError("expected '(' after unary operator");
+ return 0;
+ }
+ Lex.Lex(); // eat the '('
+
+ Init *LHS = ParseValue(CurRec);
+ if (LHS == 0) return 0;
+
+ if (Code == UnOpInit::HEAD
+ || Code == UnOpInit::TAIL
+ || Code == UnOpInit::EMPTY) {
+ ListInit *LHSl = dynamic_cast<ListInit*>(LHS);
+ StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
+ TypedInit *LHSt = dynamic_cast<TypedInit*>(LHS);
+ if (LHSl == 0 && LHSs == 0 && LHSt == 0) {
+ TokError("expected list or string type argument in unary operator");
+ return 0;
+ }
+ if (LHSt) {
+ ListRecTy *LType = dynamic_cast<ListRecTy*>(LHSt->getType());
+ StringRecTy *SType = dynamic_cast<StringRecTy*>(LHSt->getType());
+ if (LType == 0 && SType == 0) {
+ TokError("expected list or string type argumnet in unary operator");
+ return 0;
+ }
+ }
+
+ if (Code == UnOpInit::HEAD
+ || Code == UnOpInit::TAIL) {
+ if (LHSl == 0 && LHSt == 0) {
+ TokError("expected list type argumnet in unary operator");
+ return 0;
+ }
+
+ if (LHSl && LHSl->getSize() == 0) {
+ TokError("empty list argument in unary operator");
+ return 0;
+ }
+ if (LHSl) {
+ Init *Item = LHSl->getElement(0);
+ TypedInit *Itemt = dynamic_cast<TypedInit*>(Item);
+ if (Itemt == 0) {
+ TokError("untyped list element in unary operator");
+ return 0;
+ }
+ if (Code == UnOpInit::HEAD) {
+ Type = Itemt->getType();
+ } else {
+ Type = ListRecTy::get(Itemt->getType());
+ }
+ } else {
+ assert(LHSt && "expected list type argument in unary operator");
+ ListRecTy *LType = dynamic_cast<ListRecTy*>(LHSt->getType());
+ if (LType == 0) {
+ TokError("expected list type argumnet in unary operator");
+ return 0;
+ }
+ if (Code == UnOpInit::HEAD) {
+ Type = LType->getElementType();
+ } else {
+ Type = LType;
+ }
+ }
+ }
+ }
+
+ if (Lex.getCode() != tgtok::r_paren) {
+ TokError("expected ')' in unary operator");
+ return 0;
+ }
+ Lex.Lex(); // eat the ')'
+ return (UnOpInit::get(Code, LHS, Type))->Fold(CurRec, CurMultiClass);
+ }
+
+ case tgtok::XConcat:
+ case tgtok::XSRA:
+ case tgtok::XSRL:
+ case tgtok::XSHL:
+ case tgtok::XEq:
+ case tgtok::XStrConcat: { // Value ::= !binop '(' Value ',' Value ')'
+ tgtok::TokKind OpTok = Lex.getCode();
+ SMLoc OpLoc = Lex.getLoc();
+ Lex.Lex(); // eat the operation
+
+ BinOpInit::BinaryOp Code;
+ RecTy *Type = 0;
+
+ switch (OpTok) {
+ default: assert(0 && "Unhandled code!");
+ case tgtok::XConcat: Code = BinOpInit::CONCAT;Type = DagRecTy::get(); break;
+ case tgtok::XSRA: Code = BinOpInit::SRA; Type = IntRecTy::get(); break;
+ case tgtok::XSRL: Code = BinOpInit::SRL; Type = IntRecTy::get(); break;
+ case tgtok::XSHL: Code = BinOpInit::SHL; Type = IntRecTy::get(); break;
+ case tgtok::XEq: Code = BinOpInit::EQ; Type = BitRecTy::get(); break;
+ case tgtok::XStrConcat:
+ Code = BinOpInit::STRCONCAT;
+ Type = StringRecTy::get();
+ break;
+ }
+
+ if (Lex.getCode() != tgtok::l_paren) {
+ TokError("expected '(' after binary operator");
+ return 0;
+ }
+ Lex.Lex(); // eat the '('
+
+ SmallVector<Init*, 2> InitList;
+
+ InitList.push_back(ParseValue(CurRec));
+ if (InitList.back() == 0) return 0;
+
+ while (Lex.getCode() == tgtok::comma) {
+ Lex.Lex(); // eat the ','
+
+ InitList.push_back(ParseValue(CurRec));
+ if (InitList.back() == 0) return 0;
+ }
+
+ if (Lex.getCode() != tgtok::r_paren) {
+ TokError("expected ')' in operator");
+ return 0;
+ }
+ Lex.Lex(); // eat the ')'
+
+ // We allow multiple operands to associative operators like !strconcat as
+ // shorthand for nesting them.
+ if (Code == BinOpInit::STRCONCAT) {
+ while (InitList.size() > 2) {
+ Init *RHS = InitList.pop_back_val();
+ RHS = (BinOpInit::get(Code, InitList.back(), RHS, Type))
+ ->Fold(CurRec, CurMultiClass);
+ InitList.back() = RHS;
+ }
+ }
+
+ if (InitList.size() == 2)
+ return (BinOpInit::get(Code, InitList[0], InitList[1], Type))
+ ->Fold(CurRec, CurMultiClass);
+
+ Error(OpLoc, "expected two operands to operator");
+ return 0;
+ }
+
+ case tgtok::XIf:
+ case tgtok::XForEach:
+ case tgtok::XSubst: { // Value ::= !ternop '(' Value ',' Value ',' Value ')'
+ TernOpInit::TernaryOp Code;
+ RecTy *Type = 0;
+
+ tgtok::TokKind LexCode = Lex.getCode();
+ Lex.Lex(); // eat the operation
+ switch (LexCode) {
+ default: assert(0 && "Unhandled code!");
+ case tgtok::XIf:
+ Code = TernOpInit::IF;
+ break;
+ case tgtok::XForEach:
+ Code = TernOpInit::FOREACH;
+ break;
+ case tgtok::XSubst:
+ Code = TernOpInit::SUBST;
+ break;
+ }
+ if (Lex.getCode() != tgtok::l_paren) {
+ TokError("expected '(' after ternary operator");
+ return 0;
+ }
+ Lex.Lex(); // eat the '('
+
+ Init *LHS = ParseValue(CurRec);
+ if (LHS == 0) return 0;
+
+ if (Lex.getCode() != tgtok::comma) {
+ TokError("expected ',' in ternary operator");
+ return 0;
+ }
+ Lex.Lex(); // eat the ','
+
+ Init *MHS = ParseValue(CurRec);
+ if (MHS == 0) return 0;
+
+ if (Lex.getCode() != tgtok::comma) {
+ TokError("expected ',' in ternary operator");
+ return 0;
+ }
+ Lex.Lex(); // eat the ','
+
+ Init *RHS = ParseValue(CurRec);
+ if (RHS == 0) return 0;
+
+ if (Lex.getCode() != tgtok::r_paren) {
+ TokError("expected ')' in binary operator");
+ return 0;
+ }
+ Lex.Lex(); // eat the ')'
+
+ switch (LexCode) {
+ default: assert(0 && "Unhandled code!");
+ case tgtok::XIf: {
+ // FIXME: The `!if' operator doesn't handle non-TypedInit well at
+ // all. This can be made much more robust.
+ TypedInit *MHSt = dynamic_cast<TypedInit*>(MHS);
+ TypedInit *RHSt = dynamic_cast<TypedInit*>(RHS);
+
+ RecTy *MHSTy = 0;
+ RecTy *RHSTy = 0;
+
+ if (MHSt == 0 && RHSt == 0) {
+ BitsInit *MHSbits = dynamic_cast<BitsInit*>(MHS);
+ BitsInit *RHSbits = dynamic_cast<BitsInit*>(RHS);
+
+ if (MHSbits && RHSbits &&
+ MHSbits->getNumBits() == RHSbits->getNumBits()) {
+ Type = BitRecTy::get();
+ break;
+ } else {
+ BitInit *MHSbit = dynamic_cast<BitInit*>(MHS);
+ BitInit *RHSbit = dynamic_cast<BitInit*>(RHS);
+
+ if (MHSbit && RHSbit) {
+ Type = BitRecTy::get();
+ break;
+ }
+ }
+ } else if (MHSt != 0 && RHSt != 0) {
+ MHSTy = MHSt->getType();
+ RHSTy = RHSt->getType();
+ }
+
+ if (!MHSTy || !RHSTy) {
+ TokError("could not get type for !if");
+ return 0;
+ }
+
+ if (MHSTy->typeIsConvertibleTo(RHSTy)) {
+ Type = RHSTy;
+ } else if (RHSTy->typeIsConvertibleTo(MHSTy)) {
+ Type = MHSTy;
+ } else {
+ TokError("inconsistent types for !if");
+ return 0;
+ }
+ break;
+ }
+ case tgtok::XForEach: {
+ TypedInit *MHSt = dynamic_cast<TypedInit *>(MHS);
+ if (MHSt == 0) {
+ TokError("could not get type for !foreach");
+ return 0;
+ }
+ Type = MHSt->getType();
+ break;
+ }
+ case tgtok::XSubst: {
+ TypedInit *RHSt = dynamic_cast<TypedInit *>(RHS);
+ if (RHSt == 0) {
+ TokError("could not get type for !subst");
+ return 0;
+ }
+ Type = RHSt->getType();
+ break;
+ }
+ }
+ return (TernOpInit::get(Code, LHS, MHS, RHS, Type))->Fold(CurRec,
+ CurMultiClass);
+ }
+ }
+ TokError("could not parse operation");
+ return 0;
+}
+
+/// ParseOperatorType - Parse a type for an operator. This returns
+/// null on error.
+///
+/// OperatorType ::= '<' Type '>'
+///
+RecTy *TGParser::ParseOperatorType() {
+ RecTy *Type = 0;
+
+ if (Lex.getCode() != tgtok::less) {
+ TokError("expected type name for operator");
+ return 0;
+ }
+ Lex.Lex(); // eat the <
+
+ Type = ParseType();
+
+ if (Type == 0) {
+ TokError("expected type name for operator");
+ return 0;
+ }
+
+ if (Lex.getCode() != tgtok::greater) {
+ TokError("expected type name for operator");
+ return 0;
+ }
+ Lex.Lex(); // eat the >
+
+ return Type;
+}
+
+
+/// ParseSimpleValue - Parse a tblgen value. This returns null on error.
+///
+/// SimpleValue ::= IDValue
+/// SimpleValue ::= INTVAL
+/// SimpleValue ::= STRVAL+
+/// SimpleValue ::= CODEFRAGMENT
+/// SimpleValue ::= '?'
+/// SimpleValue ::= '{' ValueList '}'
+/// SimpleValue ::= ID '<' ValueListNE '>'
+/// SimpleValue ::= '[' ValueList ']'
+/// SimpleValue ::= '(' IDValue DagArgList ')'
+/// SimpleValue ::= CONCATTOK '(' Value ',' Value ')'
+/// SimpleValue ::= SHLTOK '(' Value ',' Value ')'
+/// SimpleValue ::= SRATOK '(' Value ',' Value ')'
+/// SimpleValue ::= SRLTOK '(' Value ',' Value ')'
+/// SimpleValue ::= STRCONCATTOK '(' Value ',' Value ')'
+///
+Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType) {
+ Init *R = 0;
+ switch (Lex.getCode()) {
+ default: TokError("Unknown token when parsing a value"); break;
+ case tgtok::IntVal: R = IntInit::get(Lex.getCurIntVal()); Lex.Lex(); break;
+ case tgtok::StrVal: {
+ std::string Val = Lex.getCurStrVal();
+ Lex.Lex();
+
+ // Handle multiple consecutive concatenated strings.
+ while (Lex.getCode() == tgtok::StrVal) {
+ Val += Lex.getCurStrVal();
+ Lex.Lex();
+ }
+
+ R = StringInit::get(Val);
+ break;
+ }
+ case tgtok::CodeFragment:
+ R = CodeInit::get(Lex.getCurStrVal());
+ Lex.Lex();
+ break;
+ case tgtok::question:
+ R = UnsetInit::get();
+ Lex.Lex();
+ break;
+ case tgtok::Id: {
+ SMLoc NameLoc = Lex.getLoc();
+ std::string Name = Lex.getCurStrVal();
+ if (Lex.Lex() != tgtok::less) // consume the Id.
+ return ParseIDValue(CurRec, Name, NameLoc); // Value ::= IDValue
+
+ // Value ::= ID '<' ValueListNE '>'
+ if (Lex.Lex() == tgtok::greater) {
+ TokError("expected non-empty value list");
+ return 0;
+ }
+
+ // This is a CLASS<initvalslist> expression. This is supposed to synthesize
+ // a new anonymous definition, deriving from CLASS<initvalslist> with no
+ // body.
+ Record *Class = Records.getClass(Name);
+ if (!Class) {
+ Error(NameLoc, "Expected a class name, got '" + Name + "'");
+ return 0;
+ }
+
+ std::vector<Init*> ValueList = ParseValueList(CurRec, Class);
+ if (ValueList.empty()) return 0;
+
+ if (Lex.getCode() != tgtok::greater) {
+ TokError("expected '>' at end of value list");
+ return 0;
+ }
+ Lex.Lex(); // eat the '>'
+
+ // Create the new record, set it as CurRec temporarily.
+ static unsigned AnonCounter = 0;
+ Record *NewRec = new Record("anonymous.val."+utostr(AnonCounter++),
+ NameLoc,
+ Records);
+ SubClassReference SCRef;
+ SCRef.RefLoc = NameLoc;
+ SCRef.Rec = Class;
+ SCRef.TemplateArgs = ValueList;
+ // Add info about the subclass to NewRec.
+ if (AddSubClass(NewRec, SCRef))
+ return 0;
+ NewRec->resolveReferences();
+ Records.addDef(NewRec);
+
+ // The result of the expression is a reference to the new record.
+ return DefInit::get(NewRec);
+ }
+ case tgtok::l_brace: { // Value ::= '{' ValueList '}'
+ SMLoc BraceLoc = Lex.getLoc();
+ Lex.Lex(); // eat the '{'
+ std::vector<Init*> Vals;
+
+ if (Lex.getCode() != tgtok::r_brace) {
+ Vals = ParseValueList(CurRec);
+ if (Vals.empty()) return 0;
+ }
+ if (Lex.getCode() != tgtok::r_brace) {
+ TokError("expected '}' at end of bit list value");
+ return 0;
+ }
+ Lex.Lex(); // eat the '}'
+
+ SmallVector<Init *, 16> NewBits(Vals.size());
+
+ for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
+ Init *Bit = Vals[i]->convertInitializerTo(BitRecTy::get());
+ if (Bit == 0) {
+ Error(BraceLoc, "Element #" + utostr(i) + " (" + Vals[i]->getAsString()+
+ ") is not convertable to a bit");
+ return 0;
+ }
+ NewBits[Vals.size()-i-1] = Bit;
+ }
+ return BitsInit::get(NewBits);
+ }
+ case tgtok::l_square: { // Value ::= '[' ValueList ']'
+ Lex.Lex(); // eat the '['
+ std::vector<Init*> Vals;
+
+ RecTy *DeducedEltTy = 0;
+ ListRecTy *GivenListTy = 0;
+
+ if (ItemType != 0) {
+ ListRecTy *ListType = dynamic_cast<ListRecTy*>(ItemType);
+ if (ListType == 0) {
+ std::stringstream s;
+ s << "Type mismatch for list, expected list type, got "
+ << ItemType->getAsString();
+ TokError(s.str());
+ return 0;
+ }
+ GivenListTy = ListType;
+ }
+
+ if (Lex.getCode() != tgtok::r_square) {
+ Vals = ParseValueList(CurRec, 0,
+ GivenListTy ? GivenListTy->getElementType() : 0);
+ if (Vals.empty()) return 0;
+ }
+ if (Lex.getCode() != tgtok::r_square) {
+ TokError("expected ']' at end of list value");
+ return 0;
+ }
+ Lex.Lex(); // eat the ']'
+
+ RecTy *GivenEltTy = 0;
+ if (Lex.getCode() == tgtok::less) {
+ // Optional list element type
+ Lex.Lex(); // eat the '<'
+
+ GivenEltTy = ParseType();
+ if (GivenEltTy == 0) {
+ // Couldn't parse element type
+ return 0;
+ }
+
+ if (Lex.getCode() != tgtok::greater) {
+ TokError("expected '>' at end of list element type");
+ return 0;
+ }
+ Lex.Lex(); // eat the '>'
+ }
+
+ // Check elements
+ RecTy *EltTy = 0;
+ for (std::vector<Init *>::iterator i = Vals.begin(), ie = Vals.end();
+ i != ie;
+ ++i) {
+ TypedInit *TArg = dynamic_cast<TypedInit*>(*i);
+ if (TArg == 0) {
+ TokError("Untyped list element");
+ return 0;
+ }
+ if (EltTy != 0) {
+ EltTy = resolveTypes(EltTy, TArg->getType());
+ if (EltTy == 0) {
+ TokError("Incompatible types in list elements");
+ return 0;
+ }
+ } else {
+ EltTy = TArg->getType();
+ }
+ }
+
+ if (GivenEltTy != 0) {
+ if (EltTy != 0) {
+ // Verify consistency
+ if (!EltTy->typeIsConvertibleTo(GivenEltTy)) {
+ TokError("Incompatible types in list elements");
+ return 0;
+ }
+ }
+ EltTy = GivenEltTy;
+ }
+
+ if (EltTy == 0) {
+ if (ItemType == 0) {
+ TokError("No type for list");
+ return 0;
+ }
+ DeducedEltTy = GivenListTy->getElementType();
+ } else {
+ // Make sure the deduced type is compatible with the given type
+ if (GivenListTy) {
+ if (!EltTy->typeIsConvertibleTo(GivenListTy->getElementType())) {
+ TokError("Element type mismatch for list");
+ return 0;
+ }
+ }
+ DeducedEltTy = EltTy;
+ }
+
+ return ListInit::get(Vals, DeducedEltTy);
+ }
+ case tgtok::l_paren: { // Value ::= '(' IDValue DagArgList ')'
+ Lex.Lex(); // eat the '('
+ if (Lex.getCode() != tgtok::Id && Lex.getCode() != tgtok::XCast) {
+ TokError("expected identifier in dag init");
+ return 0;
+ }
+
+ Init *Operator = ParseValue(CurRec);
+ if (Operator == 0) return 0;
+
+ // If the operator name is present, parse it.
+ std::string OperatorName;
+ if (Lex.getCode() == tgtok::colon) {
+ if (Lex.Lex() != tgtok::VarName) { // eat the ':'
+ TokError("expected variable name in dag operator");
+ return 0;
+ }
+ OperatorName = Lex.getCurStrVal();
+ Lex.Lex(); // eat the VarName.
+ }
+
+ std::vector<std::pair<llvm::Init*, std::string> > DagArgs;
+ if (Lex.getCode() != tgtok::r_paren) {
+ DagArgs = ParseDagArgList(CurRec);
+ if (DagArgs.empty()) return 0;
+ }
+
+ if (Lex.getCode() != tgtok::r_paren) {
+ TokError("expected ')' in dag init");
+ return 0;
+ }
+ Lex.Lex(); // eat the ')'
+
+ return DagInit::get(Operator, OperatorName, DagArgs);
+ }
+
+ case tgtok::XHead:
+ case tgtok::XTail:
+ case tgtok::XEmpty:
+ case tgtok::XCast: // Value ::= !unop '(' Value ')'
+ case tgtok::XConcat:
+ case tgtok::XSRA:
+ case tgtok::XSRL:
+ case tgtok::XSHL:
+ case tgtok::XEq:
+ case tgtok::XStrConcat: // Value ::= !binop '(' Value ',' Value ')'
+ case tgtok::XIf:
+ case tgtok::XForEach:
+ case tgtok::XSubst: { // Value ::= !ternop '(' Value ',' Value ',' Value ')'
+ return ParseOperation(CurRec);
+ }
+ }
+
+ return R;
+}
+
+/// ParseValue - Parse a tblgen value. This returns null on error.
+///
+/// Value ::= SimpleValue ValueSuffix*
+/// ValueSuffix ::= '{' BitList '}'
+/// ValueSuffix ::= '[' BitList ']'
+/// ValueSuffix ::= '.' ID
+///
+Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType) {
+ Init *Result = ParseSimpleValue(CurRec, ItemType);
+ if (Result == 0) return 0;
+
+ // Parse the suffixes now if present.
+ while (1) {
+ switch (Lex.getCode()) {
+ default: return Result;
+ case tgtok::l_brace: {
+ SMLoc CurlyLoc = Lex.getLoc();
+ Lex.Lex(); // eat the '{'
+ std::vector<unsigned> Ranges = ParseRangeList();
+ if (Ranges.empty()) return 0;
+
+ // Reverse the bitlist.
+ std::reverse(Ranges.begin(), Ranges.end());
+ Result = Result->convertInitializerBitRange(Ranges);
+ if (Result == 0) {
+ Error(CurlyLoc, "Invalid bit range for value");
+ return 0;
+ }
+
+ // Eat the '}'.
+ if (Lex.getCode() != tgtok::r_brace) {
+ TokError("expected '}' at end of bit range list");
+ return 0;
+ }
+ Lex.Lex();
+ break;
+ }
+ case tgtok::l_square: {
+ SMLoc SquareLoc = Lex.getLoc();
+ Lex.Lex(); // eat the '['
+ std::vector<unsigned> Ranges = ParseRangeList();
+ if (Ranges.empty()) return 0;
+
+ Result = Result->convertInitListSlice(Ranges);
+ if (Result == 0) {
+ Error(SquareLoc, "Invalid range for list slice");
+ return 0;
+ }
+
+ // Eat the ']'.
+ if (Lex.getCode() != tgtok::r_square) {
+ TokError("expected ']' at end of list slice");
+ return 0;
+ }
+ Lex.Lex();
+ break;
+ }
+ case tgtok::period:
+ if (Lex.Lex() != tgtok::Id) { // eat the .
+ TokError("expected field identifier after '.'");
+ return 0;
+ }
+ if (!Result->getFieldType(Lex.getCurStrVal())) {
+ TokError("Cannot access field '" + Lex.getCurStrVal() + "' of value '" +
+ Result->getAsString() + "'");
+ return 0;
+ }
+ Result = FieldInit::get(Result, Lex.getCurStrVal());
+ Lex.Lex(); // eat field name
+ break;
+ }
+ }
+}
+
+/// ParseDagArgList - Parse the argument list for a dag literal expression.
+///
+/// ParseDagArgList ::= Value (':' VARNAME)?
+/// ParseDagArgList ::= ParseDagArgList ',' Value (':' VARNAME)?
+std::vector<std::pair<llvm::Init*, std::string> >
+TGParser::ParseDagArgList(Record *CurRec) {
+ std::vector<std::pair<llvm::Init*, std::string> > Result;
+
+ while (1) {
+ Init *Val = ParseValue(CurRec);
+ if (Val == 0) return std::vector<std::pair<llvm::Init*, std::string> >();
+
+ // If the variable name is present, add it.
+ std::string VarName;
+ if (Lex.getCode() == tgtok::colon) {
+ if (Lex.Lex() != tgtok::VarName) { // eat the ':'
+ TokError("expected variable name in dag literal");
+ return std::vector<std::pair<llvm::Init*, std::string> >();
+ }
+ VarName = Lex.getCurStrVal();
+ Lex.Lex(); // eat the VarName.
+ }
+
+ Result.push_back(std::make_pair(Val, VarName));
+
+ if (Lex.getCode() != tgtok::comma) break;
+ Lex.Lex(); // eat the ','
+ }
+
+ return Result;
+}
+
+
+/// ParseValueList - Parse a comma separated list of values, returning them as a
+/// vector. Note that this always expects to be able to parse at least one
+/// value. It returns an empty list if this is not possible.
+///
+/// ValueList ::= Value (',' Value)
+///
+std::vector<Init*> TGParser::ParseValueList(Record *CurRec, Record *ArgsRec,
+ RecTy *EltTy) {
+ std::vector<Init*> Result;
+ RecTy *ItemType = EltTy;
+ unsigned int ArgN = 0;
+ if (ArgsRec != 0 && EltTy == 0) {
+ const std::vector<std::string> &TArgs = ArgsRec->getTemplateArgs();
+ const RecordVal *RV = ArgsRec->getValue(TArgs[ArgN]);
+ if (!RV) {
+ errs() << "Cannot find template arg " << ArgN << " (" << TArgs[ArgN]
+ << ")\n";
+ }
+ assert(RV && "Template argument record not found??");
+ ItemType = RV->getType();
+ ++ArgN;
+ }
+ Result.push_back(ParseValue(CurRec, ItemType));
+ if (Result.back() == 0) return std::vector<Init*>();
+
+ while (Lex.getCode() == tgtok::comma) {
+ Lex.Lex(); // Eat the comma
+
+ if (ArgsRec != 0 && EltTy == 0) {
+ const std::vector<std::string> &TArgs = ArgsRec->getTemplateArgs();
+ if (ArgN >= TArgs.size()) {
+ TokError("too many template arguments");
+ return std::vector<Init*>();
+ }
+ const RecordVal *RV = ArgsRec->getValue(TArgs[ArgN]);
+ assert(RV && "Template argument record not found??");
+ ItemType = RV->getType();
+ ++ArgN;
+ }
+ Result.push_back(ParseValue(CurRec, ItemType));
+ if (Result.back() == 0) return std::vector<Init*>();
+ }
+
+ return Result;
+}
+
+
+/// ParseDeclaration - Read a declaration, returning the name of field ID, or an
+/// empty string on error. This can happen in a number of different context's,
+/// including within a def or in the template args for a def (which which case
+/// CurRec will be non-null) and within the template args for a multiclass (in
+/// which case CurRec will be null, but CurMultiClass will be set). This can
+/// also happen within a def that is within a multiclass, which will set both
+/// CurRec and CurMultiClass.
+///
+/// Declaration ::= FIELD? Type ID ('=' Value)?
+///
+std::string TGParser::ParseDeclaration(Record *CurRec,
+ bool ParsingTemplateArgs) {
+ // Read the field prefix if present.
+ bool HasField = Lex.getCode() == tgtok::Field;
+ if (HasField) Lex.Lex();
+
+ RecTy *Type = ParseType();
+ if (Type == 0) return "";
+
+ if (Lex.getCode() != tgtok::Id) {
+ TokError("Expected identifier in declaration");
+ return "";
+ }
+
+ SMLoc IdLoc = Lex.getLoc();
+ std::string DeclName = Lex.getCurStrVal();
+ Lex.Lex();
+
+ if (ParsingTemplateArgs) {
+ if (CurRec) {
+ DeclName = CurRec->getName() + ":" + DeclName;
+ } else {
+ assert(CurMultiClass);
+ }
+ if (CurMultiClass)
+ DeclName = CurMultiClass->Rec.getName() + "::" + DeclName;
+ }
+
+ // Add the value.
+ if (AddValue(CurRec, IdLoc, RecordVal(DeclName, Type, HasField)))
+ return "";
+
+ // If a value is present, parse it.
+ if (Lex.getCode() == tgtok::equal) {
+ Lex.Lex();
+ SMLoc ValLoc = Lex.getLoc();
+ Init *Val = ParseValue(CurRec, Type);
+ if (Val == 0 ||
+ SetValue(CurRec, ValLoc, DeclName, std::vector<unsigned>(), Val))
+ return "";
+ }
+
+ return DeclName;
+}
+
+/// ParseTemplateArgList - Read a template argument list, which is a non-empty
+/// sequence of template-declarations in <>'s. If CurRec is non-null, these are
+/// template args for a def, which may or may not be in a multiclass. If null,
+/// these are the template args for a multiclass.
+///
+/// TemplateArgList ::= '<' Declaration (',' Declaration)* '>'
+///
+bool TGParser::ParseTemplateArgList(Record *CurRec) {
+ assert(Lex.getCode() == tgtok::less && "Not a template arg list!");
+ Lex.Lex(); // eat the '<'
+
+ Record *TheRecToAddTo = CurRec ? CurRec : &CurMultiClass->Rec;
+
+ // Read the first declaration.
+ std::string TemplArg = ParseDeclaration(CurRec, true/*templateargs*/);
+ if (TemplArg.empty())
+ return true;
+
+ TheRecToAddTo->addTemplateArg(TemplArg);
+
+ while (Lex.getCode() == tgtok::comma) {
+ Lex.Lex(); // eat the ','
+
+ // Read the following declarations.
+ TemplArg = ParseDeclaration(CurRec, true/*templateargs*/);
+ if (TemplArg.empty())
+ return true;
+ TheRecToAddTo->addTemplateArg(TemplArg);
+ }
+
+ if (Lex.getCode() != tgtok::greater)
+ return TokError("expected '>' at end of template argument list");
+ Lex.Lex(); // eat the '>'.
+ return false;
+}
+
+
+/// ParseBodyItem - Parse a single item at within the body of a def or class.
+///
+/// BodyItem ::= Declaration ';'
+/// BodyItem ::= LET ID OptionalBitList '=' Value ';'
+bool TGParser::ParseBodyItem(Record *CurRec) {
+ if (Lex.getCode() != tgtok::Let) {
+ if (ParseDeclaration(CurRec, false).empty())
+ return true;
+
+ if (Lex.getCode() != tgtok::semi)
+ return TokError("expected ';' after declaration");
+ Lex.Lex();
+ return false;
+ }
+
+ // LET ID OptionalRangeList '=' Value ';'
+ if (Lex.Lex() != tgtok::Id)
+ return TokError("expected field identifier after let");
+
+ SMLoc IdLoc = Lex.getLoc();
+ std::string FieldName = Lex.getCurStrVal();
+ Lex.Lex(); // eat the field name.
+
+ std::vector<unsigned> BitList;
+ if (ParseOptionalBitList(BitList))
+ return true;
+ std::reverse(BitList.begin(), BitList.end());
+
+ if (Lex.getCode() != tgtok::equal)
+ return TokError("expected '=' in let expression");
+ Lex.Lex(); // eat the '='.
+
+ RecordVal *Field = CurRec->getValue(FieldName);
+ if (Field == 0)
+ return TokError("Value '" + FieldName + "' unknown!");
+
+ RecTy *Type = Field->getType();
+
+ Init *Val = ParseValue(CurRec, Type);
+ if (Val == 0) return true;
+
+ if (Lex.getCode() != tgtok::semi)
+ return TokError("expected ';' after let expression");
+ Lex.Lex();
+
+ return SetValue(CurRec, IdLoc, FieldName, BitList, Val);
+}
+
+/// ParseBody - Read the body of a class or def. Return true on error, false on
+/// success.
+///
+/// Body ::= ';'
+/// Body ::= '{' BodyList '}'
+/// BodyList BodyItem*
+///
+bool TGParser::ParseBody(Record *CurRec) {
+ // If this is a null definition, just eat the semi and return.
+ if (Lex.getCode() == tgtok::semi) {
+ Lex.Lex();
+ return false;
+ }
+
+ if (Lex.getCode() != tgtok::l_brace)
+ return TokError("Expected ';' or '{' to start body");
+ // Eat the '{'.
+ Lex.Lex();
+
+ while (Lex.getCode() != tgtok::r_brace)
+ if (ParseBodyItem(CurRec))
+ return true;
+
+ // Eat the '}'.
+ Lex.Lex();
+ return false;
+}
+
+/// ParseObjectBody - Parse the body of a def or class. This consists of an
+/// optional ClassList followed by a Body. CurRec is the current def or class
+/// that is being parsed.
+///
+/// ObjectBody ::= BaseClassList Body
+/// BaseClassList ::= /*empty*/
+/// BaseClassList ::= ':' BaseClassListNE
+/// BaseClassListNE ::= SubClassRef (',' SubClassRef)*
+///
+bool TGParser::ParseObjectBody(Record *CurRec) {
+ // If there is a baseclass list, read it.
+ if (Lex.getCode() == tgtok::colon) {
+ Lex.Lex();
+
+ // Read all of the subclasses.
+ SubClassReference SubClass = ParseSubClassReference(CurRec, false);
+ while (1) {
+ // Check for error.
+ if (SubClass.Rec == 0) return true;
+
+ // Add it.
+ if (AddSubClass(CurRec, SubClass))
+ return true;
+
+ if (Lex.getCode() != tgtok::comma) break;
+ Lex.Lex(); // eat ','.
+ SubClass = ParseSubClassReference(CurRec, false);
+ }
+ }
+
+ // Process any variables on the let stack.
+ for (unsigned i = 0, e = LetStack.size(); i != e; ++i)
+ for (unsigned j = 0, e = LetStack[i].size(); j != e; ++j)
+ if (SetValue(CurRec, LetStack[i][j].Loc, LetStack[i][j].Name,
+ LetStack[i][j].Bits, LetStack[i][j].Value))
+ return true;
+
+ return ParseBody(CurRec);
+}
+
+/// ParseDef - Parse and return a top level or multiclass def, return the record
+/// corresponding to it. This returns null on error.
+///
+/// DefInst ::= DEF ObjectName ObjectBody
+///
+bool TGParser::ParseDef(MultiClass *CurMultiClass) {
+ SMLoc DefLoc = Lex.getLoc();
+ assert(Lex.getCode() == tgtok::Def && "Unknown tok");
+ Lex.Lex(); // Eat the 'def' token.
+
+ // Parse ObjectName and make a record for it.
+ Record *CurRec = new Record(ParseObjectName(), DefLoc, Records);
+
+ if (!CurMultiClass) {
+ // Top-level def definition.
+
+ // Ensure redefinition doesn't happen.
+ if (Records.getDef(CurRec->getName())) {
+ Error(DefLoc, "def '" + CurRec->getName() + "' already defined");
+ return true;
+ }
+ Records.addDef(CurRec);
+ } else {
+ // Otherwise, a def inside a multiclass, add it to the multiclass.
+ for (unsigned i = 0, e = CurMultiClass->DefPrototypes.size(); i != e; ++i)
+ if (CurMultiClass->DefPrototypes[i]->getName() == CurRec->getName()) {
+ Error(DefLoc, "def '" + CurRec->getName() +
+ "' already defined in this multiclass!");
+ return true;
+ }
+ CurMultiClass->DefPrototypes.push_back(CurRec);
+ }
+
+ if (ParseObjectBody(CurRec))
+ return true;
+
+ if (CurMultiClass == 0) // Def's in multiclasses aren't really defs.
+ // See Record::setName(). This resolve step will see any new name
+ // for the def that might have been created when resolving
+ // inheritance, values and arguments above.
+ CurRec->resolveReferences();
+
+ // If ObjectBody has template arguments, it's an error.
+ assert(CurRec->getTemplateArgs().empty() && "How'd this get template args?");
+
+ if (CurMultiClass) {
+ // Copy the template arguments for the multiclass into the def.
+ const std::vector<std::string> &TArgs =
+ CurMultiClass->Rec.getTemplateArgs();
+
+ for (unsigned i = 0, e = TArgs.size(); i != e; ++i) {
+ const RecordVal *RV = CurMultiClass->Rec.getValue(TArgs[i]);
+ assert(RV && "Template arg doesn't exist?");
+ CurRec->addValue(*RV);
+ }
+ }
+
+ return false;
+}
+
+/// ParseClass - Parse a tblgen class definition.
+///
+/// ClassInst ::= CLASS ID TemplateArgList? ObjectBody
+///
+bool TGParser::ParseClass() {
+ assert(Lex.getCode() == tgtok::Class && "Unexpected token!");
+ Lex.Lex();
+
+ if (Lex.getCode() != tgtok::Id)
+ return TokError("expected class name after 'class' keyword");
+
+ Record *CurRec = Records.getClass(Lex.getCurStrVal());
+ if (CurRec) {
+ // If the body was previously defined, this is an error.
+ if (!CurRec->getValues().empty() ||
+ !CurRec->getSuperClasses().empty() ||
+ !CurRec->getTemplateArgs().empty())
+ return TokError("Class '" + CurRec->getName() + "' already defined");
+ } else {
+ // If this is the first reference to this class, create and add it.
+ CurRec = new Record(Lex.getCurStrVal(), Lex.getLoc(), Records);
+ Records.addClass(CurRec);
+ }
+ Lex.Lex(); // eat the name.
+
+ // If there are template args, parse them.
+ if (Lex.getCode() == tgtok::less)
+ if (ParseTemplateArgList(CurRec))
+ return true;
+
+ // Finally, parse the object body.
+ return ParseObjectBody(CurRec);
+}
+
+/// ParseLetList - Parse a non-empty list of assignment expressions into a list
+/// of LetRecords.
+///
+/// LetList ::= LetItem (',' LetItem)*
+/// LetItem ::= ID OptionalRangeList '=' Value
+///
+std::vector<LetRecord> TGParser::ParseLetList() {
+ std::vector<LetRecord> Result;
+
+ while (1) {
+ if (Lex.getCode() != tgtok::Id) {
+ TokError("expected identifier in let definition");
+ return std::vector<LetRecord>();
+ }
+ std::string Name = Lex.getCurStrVal();
+ SMLoc NameLoc = Lex.getLoc();
+ Lex.Lex(); // Eat the identifier.
+
+ // Check for an optional RangeList.
+ std::vector<unsigned> Bits;
+ if (ParseOptionalRangeList(Bits))
+ return std::vector<LetRecord>();
+ std::reverse(Bits.begin(), Bits.end());
+
+ if (Lex.getCode() != tgtok::equal) {
+ TokError("expected '=' in let expression");
+ return std::vector<LetRecord>();
+ }
+ Lex.Lex(); // eat the '='.
+
+ Init *Val = ParseValue(0);
+ if (Val == 0) return std::vector<LetRecord>();
+
+ // Now that we have everything, add the record.
+ Result.push_back(LetRecord(Name, Bits, Val, NameLoc));
+
+ if (Lex.getCode() != tgtok::comma)
+ return Result;
+ Lex.Lex(); // eat the comma.
+ }
+}
+
+/// ParseTopLevelLet - Parse a 'let' at top level. This can be a couple of
+/// different related productions. This works inside multiclasses too.
+///
+/// Object ::= LET LetList IN '{' ObjectList '}'
+/// Object ::= LET LetList IN Object
+///
+bool TGParser::ParseTopLevelLet(MultiClass *CurMultiClass) {
+ assert(Lex.getCode() == tgtok::Let && "Unexpected token");
+ Lex.Lex();
+
+ // Add this entry to the let stack.
+ std::vector<LetRecord> LetInfo = ParseLetList();
+ if (LetInfo.empty()) return true;
+ LetStack.push_back(LetInfo);
+
+ if (Lex.getCode() != tgtok::In)
+ return TokError("expected 'in' at end of top-level 'let'");
+ Lex.Lex();
+
+ // If this is a scalar let, just handle it now
+ if (Lex.getCode() != tgtok::l_brace) {
+ // LET LetList IN Object
+ if (ParseObject(CurMultiClass))
+ return true;
+ } else { // Object ::= LETCommand '{' ObjectList '}'
+ SMLoc BraceLoc = Lex.getLoc();
+ // Otherwise, this is a group let.
+ Lex.Lex(); // eat the '{'.
+
+ // Parse the object list.
+ if (ParseObjectList(CurMultiClass))
+ return true;
+
+ if (Lex.getCode() != tgtok::r_brace) {
+ TokError("expected '}' at end of top level let command");
+ return Error(BraceLoc, "to match this '{'");
+ }
+ Lex.Lex();
+ }
+
+ // Outside this let scope, this let block is not active.
+ LetStack.pop_back();
+ return false;
+}
+
+/// ParseMultiClass - Parse a multiclass definition.
+///
+/// MultiClassInst ::= MULTICLASS ID TemplateArgList?
+/// ':' BaseMultiClassList '{' MultiClassDef+ '}'
+///
+bool TGParser::ParseMultiClass() {
+ assert(Lex.getCode() == tgtok::MultiClass && "Unexpected token");
+ Lex.Lex(); // Eat the multiclass token.
+
+ if (Lex.getCode() != tgtok::Id)
+ return TokError("expected identifier after multiclass for name");
+ std::string Name = Lex.getCurStrVal();
+
+ if (MultiClasses.count(Name))
+ return TokError("multiclass '" + Name + "' already defined");
+
+ CurMultiClass = MultiClasses[Name] = new MultiClass(Name,
+ Lex.getLoc(), Records);
+ Lex.Lex(); // Eat the identifier.
+
+ // If there are template args, parse them.
+ if (Lex.getCode() == tgtok::less)
+ if (ParseTemplateArgList(0))
+ return true;
+
+ bool inherits = false;
+
+ // If there are submulticlasses, parse them.
+ if (Lex.getCode() == tgtok::colon) {
+ inherits = true;
+
+ Lex.Lex();
+
+ // Read all of the submulticlasses.
+ SubMultiClassReference SubMultiClass =
+ ParseSubMultiClassReference(CurMultiClass);
+ while (1) {
+ // Check for error.
+ if (SubMultiClass.MC == 0) return true;
+
+ // Add it.
+ if (AddSubMultiClass(CurMultiClass, SubMultiClass))
+ return true;
+
+ if (Lex.getCode() != tgtok::comma) break;
+ Lex.Lex(); // eat ','.
+ SubMultiClass = ParseSubMultiClassReference(CurMultiClass);
+ }
+ }
+
+ if (Lex.getCode() != tgtok::l_brace) {
+ if (!inherits)
+ return TokError("expected '{' in multiclass definition");
+ else if (Lex.getCode() != tgtok::semi)
+ return TokError("expected ';' in multiclass definition");
+ else
+ Lex.Lex(); // eat the ';'.
+ } else {
+ if (Lex.Lex() == tgtok::r_brace) // eat the '{'.
+ return TokError("multiclass must contain at least one def");
+
+ while (Lex.getCode() != tgtok::r_brace) {
+ switch (Lex.getCode()) {
+ default:
+ return TokError("expected 'let', 'def' or 'defm' in multiclass body");
+ case tgtok::Let:
+ case tgtok::Def:
+ case tgtok::Defm:
+ if (ParseObject(CurMultiClass))
+ return true;
+ break;
+ }
+ }
+ Lex.Lex(); // eat the '}'.
+ }
+
+ CurMultiClass = 0;
+ return false;
+}
+
+Record *TGParser::
+InstantiateMulticlassDef(MultiClass &MC,
+ Record *DefProto,
+ const std::string &DefmPrefix,
+ SMLoc DefmPrefixLoc) {
+ // Add in the defm name. If the defm prefix is empty, give each
+ // instantiated def a unique name. Otherwise, if "#NAME#" exists in the
+ // name, substitute the prefix for #NAME#. Otherwise, use the defm name
+ // as a prefix.
+ std::string DefName = DefProto->getName();
+ if (DefmPrefix.empty()) {
+ DefName = GetNewAnonymousName();
+ } else {
+ std::string::size_type idx = DefName.find("#NAME#");
+ if (idx != std::string::npos) {
+ DefName.replace(idx, 6, DefmPrefix);
+ } else {
+ // Add the suffix to the defm name to get the new name.
+ DefName = DefmPrefix + DefName;
+ }
+ }
+
+ Record *CurRec = new Record(DefName, DefmPrefixLoc, Records);
+
+ SubClassReference Ref;
+ Ref.RefLoc = DefmPrefixLoc;
+ Ref.Rec = DefProto;
+ AddSubClass(CurRec, Ref);
+
+ return CurRec;
+}
+
+bool TGParser::ResolveMulticlassDefArgs(MultiClass &MC,
+ Record *CurRec,
+ SMLoc DefmPrefixLoc,
+ SMLoc SubClassLoc,
+ const std::vector<std::string> &TArgs,
+ std::vector<Init *> &TemplateVals,
+ bool DeleteArgs) {
+ // Loop over all of the template arguments, setting them to the specified
+ // value or leaving them as the default if necessary.
+ for (unsigned i = 0, e = TArgs.size(); i != e; ++i) {
+ // Check if a value is specified for this temp-arg.
+ if (i < TemplateVals.size()) {
+ // Set it now.
+ if (SetValue(CurRec, DefmPrefixLoc, TArgs[i], std::vector<unsigned>(),
+ TemplateVals[i]))
+ return true;
+
+ // Resolve it next.
+ CurRec->resolveReferencesTo(CurRec->getValue(TArgs[i]));
+
+ if (DeleteArgs)
+ // Now remove it.
+ CurRec->removeValue(TArgs[i]);
+
+ } else if (!CurRec->getValue(TArgs[i])->getValue()->isComplete()) {
+ return Error(SubClassLoc, "value not specified for template argument #"+
+ utostr(i) + " (" + TArgs[i] + ") of multiclassclass '" +
+ MC.Rec.getName() + "'");
+ }
+ }
+ return false;
+}
+
+bool TGParser::ResolveMulticlassDef(MultiClass &MC,
+ Record *CurRec,
+ Record *DefProto,
+ SMLoc DefmPrefixLoc) {
+ // If the mdef is inside a 'let' expression, add to each def.
+ for (unsigned i = 0, e = LetStack.size(); i != e; ++i)
+ for (unsigned j = 0, e = LetStack[i].size(); j != e; ++j)
+ if (SetValue(CurRec, LetStack[i][j].Loc, LetStack[i][j].Name,
+ LetStack[i][j].Bits, LetStack[i][j].Value))
+ return Error(DefmPrefixLoc, "when instantiating this defm");
+
+ // Ensure redefinition doesn't happen.
+ if (Records.getDef(CurRec->getName()))
+ return Error(DefmPrefixLoc, "def '" + CurRec->getName() +
+ "' already defined, instantiating defm with subdef '" +
+ DefProto->getName() + "'");
+
+ // Don't create a top level definition for defm inside multiclasses,
+ // instead, only update the prototypes and bind the template args
+ // with the new created definition.
+ if (CurMultiClass) {
+ for (unsigned i = 0, e = CurMultiClass->DefPrototypes.size();
+ i != e; ++i)
+ if (CurMultiClass->DefPrototypes[i]->getName() == CurRec->getName())
+ return Error(DefmPrefixLoc, "defm '" + CurRec->getName() +
+ "' already defined in this multiclass!");
+ CurMultiClass->DefPrototypes.push_back(CurRec);
+
+ // Copy the template arguments for the multiclass into the new def.
+ const std::vector<std::string> &TA =
+ CurMultiClass->Rec.getTemplateArgs();
+
+ for (unsigned i = 0, e = TA.size(); i != e; ++i) {
+ const RecordVal *RV = CurMultiClass->Rec.getValue(TA[i]);
+ assert(RV && "Template arg doesn't exist?");
+ CurRec->addValue(*RV);
+ }
+ } else {
+ Records.addDef(CurRec);
+ }
+
+ return false;
+}
+
+/// ParseDefm - Parse the instantiation of a multiclass.
+///
+/// DefMInst ::= DEFM ID ':' DefmSubClassRef ';'
+///
+bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
+ assert(Lex.getCode() == tgtok::Defm && "Unexpected token!");
+
+ std::string DefmPrefix;
+ if (Lex.Lex() == tgtok::Id) { // eat the defm.
+ DefmPrefix = Lex.getCurStrVal();
+ Lex.Lex(); // Eat the defm prefix.
+ }
+
+ SMLoc DefmPrefixLoc = Lex.getLoc();
+ if (Lex.getCode() != tgtok::colon)
+ return TokError("expected ':' after defm identifier");
+
+ // Keep track of the new generated record definitions.
+ std::vector<Record*> NewRecDefs;
+
+ // This record also inherits from a regular class (non-multiclass)?
+ bool InheritFromClass = false;
+
+ // eat the colon.
+ Lex.Lex();
+
+ SMLoc SubClassLoc = Lex.getLoc();
+ SubClassReference Ref = ParseSubClassReference(0, true);
+
+ while (1) {
+ if (Ref.Rec == 0) return true;
+
+ // To instantiate a multiclass, we need to first get the multiclass, then
+ // instantiate each def contained in the multiclass with the SubClassRef
+ // template parameters.
+ MultiClass *MC = MultiClasses[Ref.Rec->getName()];
+ assert(MC && "Didn't lookup multiclass correctly?");
+ std::vector<Init*> &TemplateVals = Ref.TemplateArgs;
+
+ // Verify that the correct number of template arguments were specified.
+ const std::vector<std::string> &TArgs = MC->Rec.getTemplateArgs();
+ if (TArgs.size() < TemplateVals.size())
+ return Error(SubClassLoc,
+ "more template args specified than multiclass expects");
+
+ // Loop over all the def's in the multiclass, instantiating each one.
+ for (unsigned i = 0, e = MC->DefPrototypes.size(); i != e; ++i) {
+ Record *DefProto = MC->DefPrototypes[i];
+
+ Record *CurRec = InstantiateMulticlassDef(*MC, DefProto, DefmPrefix, DefmPrefixLoc);
+
+ if (ResolveMulticlassDefArgs(*MC, CurRec, DefmPrefixLoc, SubClassLoc,
+ TArgs, TemplateVals, true/*Delete args*/))
+ return Error(SubClassLoc, "could not instantiate def");
+
+ if (ResolveMulticlassDef(*MC, CurRec, DefProto, DefmPrefixLoc))
+ return Error(SubClassLoc, "could not instantiate def");
+
+ NewRecDefs.push_back(CurRec);
+ }
+
+
+ if (Lex.getCode() != tgtok::comma) break;
+ Lex.Lex(); // eat ','.
+
+ SubClassLoc = Lex.getLoc();
+
+ // A defm can inherit from regular classes (non-multiclass) as
+ // long as they come in the end of the inheritance list.
+ InheritFromClass = (Records.getClass(Lex.getCurStrVal()) != 0);
+
+ if (InheritFromClass)
+ break;
+
+ Ref = ParseSubClassReference(0, true);
+ }
+
+ if (InheritFromClass) {
+ // Process all the classes to inherit as if they were part of a
+ // regular 'def' and inherit all record values.
+ SubClassReference SubClass = ParseSubClassReference(0, false);
+ while (1) {
+ // Check for error.
+ if (SubClass.Rec == 0) return true;
+
+ // Get the expanded definition prototypes and teach them about
+ // the record values the current class to inherit has
+ for (unsigned i = 0, e = NewRecDefs.size(); i != e; ++i) {
+ Record *CurRec = NewRecDefs[i];
+
+ // Add it.
+ if (AddSubClass(CurRec, SubClass))
+ return true;
+
+ // Process any variables on the let stack.
+ for (unsigned i = 0, e = LetStack.size(); i != e; ++i)
+ for (unsigned j = 0, e = LetStack[i].size(); j != e; ++j)
+ if (SetValue(CurRec, LetStack[i][j].Loc, LetStack[i][j].Name,
+ LetStack[i][j].Bits, LetStack[i][j].Value))
+ return true;
+ }
+
+ if (Lex.getCode() != tgtok::comma) break;
+ Lex.Lex(); // eat ','.
+ SubClass = ParseSubClassReference(0, false);
+ }
+ }
+
+ if (!CurMultiClass)
+ for (unsigned i = 0, e = NewRecDefs.size(); i != e; ++i)
+ // See Record::setName(). This resolve step will see any new
+ // name for the def that might have been created when resolving
+ // inheritance, values and arguments above.
+ NewRecDefs[i]->resolveReferences();
+
+ if (Lex.getCode() != tgtok::semi)
+ return TokError("expected ';' at end of defm");
+ Lex.Lex();
+
+ return false;
+}
+
+/// ParseObject
+/// Object ::= ClassInst
+/// Object ::= DefInst
+/// Object ::= MultiClassInst
+/// Object ::= DefMInst
+/// Object ::= LETCommand '{' ObjectList '}'
+/// Object ::= LETCommand Object
+bool TGParser::ParseObject(MultiClass *MC) {
+ switch (Lex.getCode()) {
+ default:
+ return TokError("Expected class, def, defm, multiclass or let definition");
+ case tgtok::Let: return ParseTopLevelLet(MC);
+ case tgtok::Def: return ParseDef(MC);
+ case tgtok::Defm: return ParseDefm(MC);
+ case tgtok::Class: return ParseClass();
+ case tgtok::MultiClass: return ParseMultiClass();
+ }
+}
+
+/// ParseObjectList
+/// ObjectList :== Object*
+bool TGParser::ParseObjectList(MultiClass *MC) {
+ while (isObjectStart(Lex.getCode())) {
+ if (ParseObject(MC))
+ return true;
+ }
+ return false;
+}
+
+bool TGParser::ParseFile() {
+ Lex.Lex(); // Prime the lexer.
+ if (ParseObjectList()) return true;
+
+ // If we have unread input at the end of the file, report it.
+ if (Lex.getCode() == tgtok::Eof)
+ return false;
+
+ return TokError("Unexpected input at top level");
+}
+
diff --git a/lib/TableGen/TGParser.h b/lib/TableGen/TGParser.h
new file mode 100644
index 0000000..db8a620
--- /dev/null
+++ b/lib/TableGen/TGParser.h
@@ -0,0 +1,137 @@
+//===- TGParser.h - Parser for TableGen Files -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class represents the Parser for tablegen files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TGPARSER_H
+#define TGPARSER_H
+
+#include "TGLexer.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/SourceMgr.h"
+#include <map>
+
+namespace llvm {
+ class Record;
+ class RecordVal;
+ class RecordKeeper;
+ class RecTy;
+ class Init;
+ struct MultiClass;
+ struct SubClassReference;
+ struct SubMultiClassReference;
+
+ struct LetRecord {
+ std::string Name;
+ std::vector<unsigned> Bits;
+ Init *Value;
+ SMLoc Loc;
+ LetRecord(const std::string &N, const std::vector<unsigned> &B, Init *V,
+ SMLoc L)
+ : Name(N), Bits(B), Value(V), Loc(L) {
+ }
+ };
+
+class TGParser {
+ TGLexer Lex;
+ std::vector<std::vector<LetRecord> > LetStack;
+ std::map<std::string, MultiClass*> MultiClasses;
+
+ /// CurMultiClass - If we are parsing a 'multiclass' definition, this is the
+ /// current value.
+ MultiClass *CurMultiClass;
+
+ // Record tracker
+ RecordKeeper &Records;
+public:
+ TGParser(SourceMgr &SrcMgr, RecordKeeper &records) :
+ Lex(SrcMgr), CurMultiClass(0), Records(records) {}
+
+ /// ParseFile - Main entrypoint for parsing a tblgen file. These parser
+ /// routines return true on error, or false on success.
+ bool ParseFile();
+
+ bool Error(SMLoc L, const Twine &Msg) const {
+ PrintError(L, Msg);
+ return true;
+ }
+ bool TokError(const Twine &Msg) const {
+ return Error(Lex.getLoc(), Msg);
+ }
+ const std::vector<std::string> &getDependencies() const {
+ return Lex.getDependencies();
+ }
+private: // Semantic analysis methods.
+ bool AddValue(Record *TheRec, SMLoc Loc, const RecordVal &RV);
+ bool SetValue(Record *TheRec, SMLoc Loc, const std::string &ValName,
+ const std::vector<unsigned> &BitList, Init *V);
+ bool AddSubClass(Record *Rec, SubClassReference &SubClass);
+ bool AddSubMultiClass(MultiClass *CurMC,
+ SubMultiClassReference &SubMultiClass);
+
+private: // Parser methods.
+ bool ParseObjectList(MultiClass *MC = 0);
+ bool ParseObject(MultiClass *MC);
+ bool ParseClass();
+ bool ParseMultiClass();
+ Record *InstantiateMulticlassDef(MultiClass &MC,
+ Record *DefProto,
+ const std::string &DefmPrefix,
+ SMLoc DefmPrefixLoc);
+ bool ResolveMulticlassDefArgs(MultiClass &MC,
+ Record *DefProto,
+ SMLoc DefmPrefixLoc,
+ SMLoc SubClassLoc,
+ const std::vector<std::string> &TArgs,
+ std::vector<Init *> &TemplateVals,
+ bool DeleteArgs);
+ bool ResolveMulticlassDef(MultiClass &MC,
+ Record *CurRec,
+ Record *DefProto,
+ SMLoc DefmPrefixLoc);
+ bool ParseDefm(MultiClass *CurMultiClass);
+ bool ParseDef(MultiClass *CurMultiClass);
+ bool ParseTopLevelLet(MultiClass *CurMultiClass);
+ std::vector<LetRecord> ParseLetList();
+
+ bool ParseObjectBody(Record *CurRec);
+ bool ParseBody(Record *CurRec);
+ bool ParseBodyItem(Record *CurRec);
+
+ bool ParseTemplateArgList(Record *CurRec);
+ std::string ParseDeclaration(Record *CurRec, bool ParsingTemplateArgs);
+
+ SubClassReference ParseSubClassReference(Record *CurRec, bool isDefm);
+ SubMultiClassReference ParseSubMultiClassReference(MultiClass *CurMC);
+
+ Init *ParseIDValue(Record *CurRec);
+ Init *ParseIDValue(Record *CurRec, const std::string &Name, SMLoc NameLoc);
+ Init *ParseSimpleValue(Record *CurRec, RecTy *ItemType = 0);
+ Init *ParseValue(Record *CurRec, RecTy *ItemType = 0);
+ std::vector<Init*> ParseValueList(Record *CurRec, Record *ArgsRec = 0, RecTy *EltTy = 0);
+ std::vector<std::pair<llvm::Init*, std::string> > ParseDagArgList(Record *);
+ bool ParseOptionalRangeList(std::vector<unsigned> &Ranges);
+ bool ParseOptionalBitList(std::vector<unsigned> &Ranges);
+ std::vector<unsigned> ParseRangeList();
+ bool ParseRangePiece(std::vector<unsigned> &Ranges);
+ RecTy *ParseType();
+ Init *ParseOperation(Record *CurRec);
+ RecTy *ParseOperatorType();
+ std::string ParseObjectName();
+ Record *ParseClassID();
+ MultiClass *ParseMultiClassID();
+ Record *ParseDefmID();
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/TableGen/TableGenBackend.cpp b/lib/TableGen/TableGenBackend.cpp
new file mode 100644
index 0000000..29588db
--- /dev/null
+++ b/lib/TableGen/TableGenBackend.cpp
@@ -0,0 +1,25 @@
+//===- TableGenBackend.cpp - Base class for TableGen Backends ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides useful services for TableGen backends...
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TableGen/TableGenBackend.h"
+#include "llvm/TableGen/Record.h"
+using namespace llvm;
+
+void TableGenBackend::EmitSourceFileHeader(const std::string &Desc,
+ raw_ostream &OS) const {
+ OS << "//===- TableGen'erated file -------------------------------------*-"
+ " C++ -*-===//\n//\n// " << Desc << "\n//\n// Automatically generate"
+ "d file, do not edit!\n//\n//===------------------------------------"
+ "----------------------------------===//\n\n";
+}
+
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 08dc340..16d0da3 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -15,7 +15,7 @@
#ifndef TARGET_ARM_H
#define TARGET_ARM_H
-#include "ARMBaseInfo.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/ErrorHandling.h"
@@ -29,19 +29,7 @@ class ARMBaseTargetMachine;
class FunctionPass;
class JITCodeEmitter;
class MachineInstr;
-class MCCodeEmitter;
class MCInst;
-class MCInstrInfo;
-class MCObjectWriter;
-class MCSubtargetInfo;
-class TargetAsmBackend;
-class formatted_raw_ostream;
-
-MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII,
- const MCSubtargetInfo &STI,
- MCContext &Ctx);
-
-TargetAsmBackend *createARMAsmBackend(const Target &, const std::string &);
FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
CodeGenOpt::Level OptLevel);
@@ -53,7 +41,6 @@ FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
FunctionPass *createARMGlobalMergePass(const TargetLowering* tli);
FunctionPass *createARMConstantIslandPass();
-FunctionPass *createNEONMoveFixPass();
FunctionPass *createMLxExpansionPass();
FunctionPass *createThumb2ITBlockPass();
FunctionPass *createThumb2SizeReductionPass();
@@ -61,12 +48,6 @@ FunctionPass *createThumb2SizeReductionPass();
void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
ARMAsmPrinter &AP);
-/// createARMMachObjectWriter - Construct an ARM Mach-O object writer.
-MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS,
- bool Is64Bit,
- uint32_t CPUType,
- uint32_t CPUSubtype);
-
} // end namespace llvm;
#endif
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index cf333cc..5c727ad 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -23,6 +23,9 @@ include "llvm/Target/Target.td"
def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode", "true",
"Thumb mode">;
+def ModeNaCl : SubtargetFeature<"nacl-mode", "InNaClMode", "true",
+ "Native client mode">;
+
//===----------------------------------------------------------------------===//
// ARM Subtarget features.
//
@@ -85,12 +88,16 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr",
/// Some M architectures don't have the DSP extension (v7E-M vs. v7M)
def FeatureDSPThumb2 : SubtargetFeature<"t2dsp", "Thumb2DSP", "true",
- "Supports v7 DSP instructions in Thumb2.">;
+ "Supports v7 DSP instructions in Thumb2">;
// Multiprocessing extension.
def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
"Supports Multiprocessing extension">;
+// M-series ISA?
+def FeatureMClass : SubtargetFeature<"mclass", "IsMClass", "true",
+ "Is microcontroller profile ('M' series)">;
+
// ARM ISAs.
def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true",
"Support ARM v4T instructions">;
@@ -105,7 +112,7 @@ def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true",
[HasV5TEOps]>;
def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true",
"Support ARM v6t2 instructions",
- [HasV6Ops, FeatureThumb2, FeatureDSPThumb2]>;
+ [HasV6Ops, FeatureThumb2]>;
def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true",
"Support ARM v7 instructions",
[HasV6T2Ops]>;
@@ -182,12 +189,14 @@ def : Processor<"mpcore", ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
// V6M Processors.
def : Processor<"cortex-m0", ARMV6Itineraries, [HasV6Ops, FeatureNoARM,
- FeatureDB]>;
+ FeatureDB, FeatureMClass]>;
// V6T2 Processors.
-def : Processor<"arm1156t2-s", ARMV6Itineraries, [HasV6T2Ops]>;
+def : Processor<"arm1156t2-s", ARMV6Itineraries, [HasV6T2Ops,
+ FeatureDSPThumb2]>;
def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2,
- FeatureHasSlowFPVMLx]>;
+ FeatureHasSlowFPVMLx,
+ FeatureDSPThumb2]>;
// V7a Processors.
def : Processor<"cortex-a8", CortexA8Itineraries,
@@ -203,14 +212,14 @@ def : Processor<"cortex-a9-mp", CortexA9Itineraries,
// V7M Processors.
def : ProcNoItin<"cortex-m3", [HasV7Ops,
FeatureThumb2, FeatureNoARM, FeatureDB,
- FeatureHWDiv]>;
+ FeatureHWDiv, FeatureMClass]>;
// V7EM Processors.
def : ProcNoItin<"cortex-m4", [HasV7Ops,
FeatureThumb2, FeatureNoARM, FeatureDB,
FeatureHWDiv, FeatureDSPThumb2,
FeatureT2XtPk, FeatureVFP2,
- FeatureVFPOnlySP]>;
+ FeatureVFPOnlySP, FeatureMClass]>;
//===----------------------------------------------------------------------===//
// Register File Description
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index dbc3ee4..ea3319f 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -15,15 +15,15 @@
#define DEBUG_TYPE "asm-printer"
#include "ARM.h"
#include "ARMAsmPrinter.h"
-#include "ARMAddressingModes.h"
#include "ARMBuildAttrs.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMConstantPoolValue.h"
#include "ARMMachineFunctionInfo.h"
-#include "ARMMCExpr.h"
#include "ARMTargetMachine.h"
#include "ARMTargetObjectFile.h"
#include "InstPrinter/ARMInstPrinter.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMMCExpr.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Constants.h"
#include "llvm/Module.h"
@@ -45,13 +45,13 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <cctype>
using namespace llvm;
@@ -92,7 +92,7 @@ namespace {
case ARMBuildAttrs::Advanced_SIMD_arch:
case ARMBuildAttrs::VFP_arch:
Streamer.EmitRawText(StringRef("\t.fpu ") + LowercaseString(String));
- break;
+ break;
default: assert(0 && "Unsupported Text attribute in ASM Mode"); break;
}
}
@@ -100,13 +100,41 @@ namespace {
};
class ObjectAttributeEmitter : public AttributeEmitter {
+ // This structure holds all attributes, accounting for
+ // their string/numeric value, so we can later emmit them
+ // in declaration order, keeping all in the same vector
+ struct AttributeItemType {
+ enum {
+ HiddenAttribute = 0,
+ NumericAttribute,
+ TextAttribute
+ } Type;
+ unsigned Tag;
+ unsigned IntValue;
+ StringRef StringValue;
+ } AttributeItem;
+
MCObjectStreamer &Streamer;
StringRef CurrentVendor;
- SmallString<64> Contents;
+ SmallVector<AttributeItemType, 64> Contents;
+
+ // Account for the ULEB/String size of each item,
+ // not just the number of items
+ size_t ContentsSize;
+ // FIXME: this should be in a more generic place, but
+ // getULEBSize() is in MCAsmInfo and will be moved to MCDwarf
+ size_t getULEBSize(int Value) {
+ size_t Size = 0;
+ do {
+ Value >>= 7;
+ Size += sizeof(int8_t); // Is this really necessary?
+ } while (Value);
+ return Size;
+ }
public:
ObjectAttributeEmitter(MCObjectStreamer &Streamer_) :
- Streamer(Streamer_), CurrentVendor("") { }
+ Streamer(Streamer_), CurrentVendor(""), ContentsSize(0) { }
void MaybeSwitchVendor(StringRef Vendor) {
assert(!Vendor.empty() && "Vendor cannot be empty.");
@@ -124,20 +152,32 @@ namespace {
}
void EmitAttribute(unsigned Attribute, unsigned Value) {
- // FIXME: should be ULEB
- Contents += Attribute;
- Contents += Value;
+ AttributeItemType attr = {
+ AttributeItemType::NumericAttribute,
+ Attribute,
+ Value,
+ StringRef("")
+ };
+ ContentsSize += getULEBSize(Attribute);
+ ContentsSize += getULEBSize(Value);
+ Contents.push_back(attr);
}
void EmitTextAttribute(unsigned Attribute, StringRef String) {
- Contents += Attribute;
- Contents += UppercaseString(String);
- Contents += 0;
+ AttributeItemType attr = {
+ AttributeItemType::TextAttribute,
+ Attribute,
+ 0,
+ String
+ };
+ ContentsSize += getULEBSize(Attribute);
+ // String + \0
+ ContentsSize += String.size()+1;
+
+ Contents.push_back(attr);
}
void Finish() {
- const size_t ContentsSize = Contents.size();
-
// Vendor size + Vendor name + '\0'
const size_t VendorHeaderSize = 4 + CurrentVendor.size() + 1;
@@ -151,7 +191,23 @@ namespace {
Streamer.EmitIntValue(ARMBuildAttrs::File, 1);
Streamer.EmitIntValue(TagHeaderSize + ContentsSize, 4);
- Streamer.EmitBytes(Contents, 0);
+ // Size should have been accounted for already, now
+ // emit each field as its type (ULEB or String)
+ for (unsigned int i=0; i<Contents.size(); ++i) {
+ AttributeItemType item = Contents[i];
+ Streamer.EmitULEB128IntValue(item.Tag, 0);
+ switch (item.Type) {
+ case AttributeItemType::NumericAttribute:
+ Streamer.EmitULEB128IntValue(item.IntValue, 0);
+ break;
+ case AttributeItemType::TextAttribute:
+ Streamer.EmitBytes(UppercaseString(item.StringValue), 0);
+ Streamer.EmitIntValue(0, 1); // '\0'
+ break;
+ default:
+ assert(0 && "Invalid attribute type");
+ }
+ }
Contents.clear();
}
@@ -184,7 +240,7 @@ void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
// S registers are described as bit-pieces of a register
// S[2x] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 0)
// S[2x+1] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 32)
-
+
unsigned SReg = Reg - ARM::S0;
bool odd = SReg & 0x1;
unsigned Rx = 256 + (SReg >> 1);
@@ -209,12 +265,13 @@ void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
} else if (Reg >= ARM::Q0 && Reg <= ARM::Q15) {
assert(ARM::Q0 + 15 == ARM::Q15 && "Unexpected ARM Q register numbering");
// Q registers Q0-Q15 are described by composing two D registers together.
- // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1) DW_OP_piece(8)
+ // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1)
+ // DW_OP_piece(8)
unsigned QReg = Reg - ARM::Q0;
unsigned D1 = 256 + 2 * QReg;
unsigned D2 = D1 + 1;
-
+
OutStreamer.AddComment("DW_OP_regx for Q register: D1");
EmitInt8(dwarf::DW_OP_regx);
EmitULEB128(D1);
@@ -233,6 +290,8 @@ void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
}
void ARMAsmPrinter::EmitFunctionEntryLabel() {
+ OutStreamer.ForceCodeRegion();
+
if (AFI->isThumbFunction()) {
OutStreamer.EmitAssemblerFlag(MCAF_Code16);
OutStreamer.EmitThumbFunc(CurrentFnSym);
@@ -395,16 +454,16 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
// This takes advantage of the 2 operand-ness of ldm/stm and that we've
// already got the operands in registers that are operands to the
// inline asm statement.
-
+
O << "{" << ARMInstPrinter::getRegisterName(RegBegin);
-
+
// FIXME: The register allocator not only may not have given us the
// registers in sequence, but may not be in ascending registers. This
// will require changes in the register allocator that'll need to be
// propagated down here if the operands change.
unsigned RegOps = OpNum + 1;
while (MI->getOperand(RegOps).isReg()) {
- O << ", "
+ O << ", "
<< ARMInstPrinter::getRegisterName(MI->getOperand(RegOps).getReg());
RegOps++;
}
@@ -413,14 +472,34 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
return false;
}
+ case 'R': // The most significant register of a pair.
+ case 'Q': { // The least significant register of a pair.
+ if (OpNum == 0)
+ return true;
+ const MachineOperand &FlagsOP = MI->getOperand(OpNum - 1);
+ if (!FlagsOP.isImm())
+ return true;
+ unsigned Flags = FlagsOP.getImm();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ if (NumVals != 2)
+ return true;
+ unsigned RegOp = ExtraCode[0] == 'Q' ? OpNum : OpNum + 1;
+ if (RegOp >= MI->getNumOperands())
+ return true;
+ const MachineOperand &MO = MI->getOperand(RegOp);
+ if (!MO.isReg())
+ return true;
+ unsigned Reg = MO.getReg();
+ O << ARMInstPrinter::getRegisterName(Reg);
+ return false;
+ }
+
// These modifiers are not yet supported.
case 'p': // The high single-precision register of a VFP double-precision
// register.
case 'e': // The low doubleword register of a NEON quad register.
case 'f': // The high doubleword register of a NEON quad register.
case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1.
- case 'Q': // The least significant register of a pair.
- case 'R': // The most significant register of a pair.
case 'H': // The highest-numbered register of a pair.
return true;
}
@@ -437,7 +516,7 @@ bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
// Does this asm operand have a single letter operand modifier?
if (ExtraCode && ExtraCode[0]) {
if (ExtraCode[1] != 0) return true; // Unknown modifier.
-
+
switch (ExtraCode[0]) {
case 'A': // A memory operand for a VLD1/VST1 instruction.
default: return true; // Unknown modifier.
@@ -448,7 +527,7 @@ bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
return false;
}
}
-
+
const MachineOperand &MO = MI->getOperand(OpNum);
assert(MO.isReg() && "unexpected inline asm memory operand");
O << "[" << ARMInstPrinter::getRegisterName(MO.getReg()) << "]";
@@ -772,13 +851,19 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
OS << MAI->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber();
MCSym = OutContext.GetOrCreateSymbol(OS.str());
} else if (ACPV->isBlockAddress()) {
- MCSym = GetBlockAddressSymbol(ACPV->getBlockAddress());
+ const BlockAddress *BA =
+ cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress();
+ MCSym = GetBlockAddressSymbol(BA);
} else if (ACPV->isGlobalValue()) {
- const GlobalValue *GV = ACPV->getGV();
+ const GlobalValue *GV = cast<ARMConstantPoolConstant>(ACPV)->getGV();
MCSym = GetARMGVSymbol(GV);
+ } else if (ACPV->isMachineBasicBlock()) {
+ const MachineBasicBlock *MBB = cast<ARMConstantPoolMBB>(ACPV)->getMBB();
+ MCSym = MBB->getSymbol();
} else {
assert(ACPV->isExtSymbol() && "unrecognized constant pool value");
- MCSym = GetExternalSymbolSymbol(ACPV->getSymbol());
+ const char *Sym = cast<ARMConstantPoolSymbol>(ACPV)->getSymbol();
+ MCSym = GetExternalSymbolSymbol(Sym);
}
// Create an MCSymbol for the reference.
@@ -822,6 +907,9 @@ void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) {
const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
unsigned JTI = MO1.getIndex();
+ // Tag the jump table appropriately for precise disassembly.
+ OutStreamer.EmitJumpTable32Region();
+
// Emit a label for the jump table.
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
OutStreamer.EmitLabel(JTISymbol);
@@ -847,6 +935,11 @@ void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) {
Expr = MCBinaryExpr::CreateSub(Expr, MCSymbolRefExpr::Create(JTISymbol,
OutContext),
OutContext);
+ // If we're generating a table of Thumb addresses in static relocation
+ // model, we need to add one to keep interworking correctly.
+ else if (AFI->isThumbFunction())
+ Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(1,OutContext),
+ OutContext);
OutStreamer.EmitValue(Expr, 4);
}
}
@@ -859,6 +952,14 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
unsigned JTI = MO1.getIndex();
// Emit a label for the jump table.
+ if (MI->getOpcode() == ARM::t2TBB_JT) {
+ OutStreamer.EmitJumpTable8Region();
+ } else if (MI->getOpcode() == ARM::t2TBH_JT) {
+ OutStreamer.EmitJumpTable16Region();
+ } else {
+ OutStreamer.EmitJumpTable32Region();
+ }
+
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
OutStreamer.EmitLabel(JTISymbol);
@@ -881,6 +982,8 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
MCInst BrInst;
BrInst.setOpcode(ARM::t2B);
BrInst.addOperand(MCOperand::CreateExpr(MBBSymbolExpr));
+ BrInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ BrInst.addOperand(MCOperand::CreateReg(0));
OutStreamer.EmitInstruction(BrInst);
continue;
}
@@ -994,7 +1097,8 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
i != NumOps; ++i)
RegList.push_back(MI->getOperand(i).getReg());
break;
- case ARM::STR_PRE:
+ case ARM::STR_PRE_IMM:
+ case ARM::STR_PRE_REG:
assert(MI->getOperand(2).getReg() == ARM::SP &&
"Only stack pointer as a source reg is supported");
RegList.push_back(SrcReg);
@@ -1074,10 +1178,20 @@ extern cl::opt<bool> EnableARMEHABI;
#include "ARMGenMCPseudoLowering.inc"
void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ if (MI->getOpcode() != ARM::CONSTPOOL_ENTRY)
+ OutStreamer.EmitCodeRegion();
+
+ // Emit unwinding stuff for frame-related instructions
+ if (EnableARMEHABI && MI->getFlag(MachineInstr::FrameSetup))
+ EmitUnwindingInstruction(MI);
+
// Do any auto-generated pseudo lowerings.
if (emitPseudoExpansionLowering(OutStreamer, MI))
return;
+ assert(!convertAddSubFlagsOpcode(MI->getOpcode()) &&
+ "Pseudo flag setting opcode should be expanded early");
+
// Check for manual lowerings.
unsigned Opc = MI->getOpcode();
switch (Opc) {
@@ -1372,6 +1486,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex();
EmitAlignment(2);
+
+ // Mark the constant pool entry as data if we're not already in a data
+ // region.
+ OutStreamer.EmitDataRegion();
OutStreamer.EmitLabel(GetCPISymbol(LabelId));
const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx];
@@ -1379,7 +1497,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
else
EmitGlobalConstant(MCPE.Val.ConstVal);
-
return;
}
case ARM::t2BR_JT: {
@@ -1590,6 +1707,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCInst TmpInst;
TmpInst.setOpcode(ARM::tB);
TmpInst.addOperand(MCOperand::CreateExpr(SymbolExpr));
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
OutStreamer.EmitInstruction(TmpInst);
}
{
@@ -1804,10 +1923,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCInst TmpInst;
LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
- // Emit unwinding stuff for frame-related instructions
- if (EnableARMEHABI && MI->getFlag(MachineInstr::FrameSetup))
- EmitUnwindingInstruction(MI);
-
OutStreamer.EmitInstruction(TmpInst);
}
@@ -1815,20 +1930,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Target Registry Stuff
//===----------------------------------------------------------------------===//
-static MCInstPrinter *createARMMCInstPrinter(const Target &T,
- unsigned SyntaxVariant,
- const MCAsmInfo &MAI) {
- if (SyntaxVariant == 0)
- return new ARMInstPrinter(MAI);
- return 0;
-}
-
// Force static initialization.
extern "C" void LLVMInitializeARMAsmPrinter() {
RegisterAsmPrinter<ARMAsmPrinter> X(TheARMTarget);
RegisterAsmPrinter<ARMAsmPrinter> Y(TheThumbTarget);
-
- TargetRegistry::RegisterMCInstPrinter(TheARMTarget, createARMMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheThumbTarget, createARMMCInstPrinter);
}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 649bd7d..408edfc 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -13,11 +13,11 @@
#include "ARMBaseInstrInfo.h"
#include "ARM.h"
-#include "ARMAddressingModes.h"
#include "ARMConstantPoolValue.h"
#include "ARMHazardRecognizer.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMRegisterInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
#include "llvm/GlobalValue.h"
@@ -29,6 +29,7 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
@@ -45,6 +46,10 @@ static cl::opt<bool>
EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
cl::desc("Enable ARM 2-addr to 3-addr conv"));
+static cl::opt<bool>
+WidenVMOVS("widen-vmovs", cl::Hidden,
+ cl::desc("Widen ARM vmovs to vmovd when possible"));
+
/// ARM_MLxEntry - Record information about MLA / MLS instructions.
struct ARM_MLxEntry {
unsigned MLxOpc; // MLA / MLS opcode
@@ -171,7 +176,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
UpdateMI = BuildMI(MF, MI->getDebugLoc(),
- get(isSub ? ARM::SUBrs : ARM::ADDrs), WBReg)
+ get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
.addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc)
.addImm(Pred).addReg(0).addReg(0);
} else
@@ -399,6 +404,7 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
int BccOpc = !AFI->isThumbFunction()
? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
+ bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
@@ -406,9 +412,12 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
"ARM branch conditions have two components!");
if (FBB == 0) {
- if (Cond.empty()) // Unconditional branch?
- BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
- else
+ if (Cond.empty()) { // Unconditional branch?
+ if (isThumb)
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0);
+ else
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
+ } else
BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
return 1;
@@ -417,7 +426,10 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
// Two-way conditional branch.
BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
- BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
+ if (isThumb)
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0);
+ else
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
return 2;
}
@@ -627,7 +639,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
bool SPRDest = ARM::SPRRegClass.contains(DestReg);
bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
- unsigned Opc;
+ unsigned Opc = 0;
if (SPRDest && SPRSrc)
Opc = ARM::VMOVS;
else if (GPRDest && SPRSrc)
@@ -638,19 +650,40 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = ARM::VMOVD;
else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
Opc = ARM::VORRq;
- else if (ARM::QQPRRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VMOVQQ;
- else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VMOVQQQQ;
- else
- llvm_unreachable("Impossible reg-to-reg copy");
- MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
- MIB.addReg(SrcReg, getKillRegState(KillSrc));
- if (Opc == ARM::VORRq)
+ if (Opc) {
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
MIB.addReg(SrcReg, getKillRegState(KillSrc));
- if (Opc != ARM::VMOVQQ && Opc != ARM::VMOVQQQQ)
+ if (Opc == ARM::VORRq)
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
AddDefaultPred(MIB);
+ return;
+ }
+
+ // Generate instructions for VMOVQQ and VMOVQQQQ pseudos in place.
+ if (ARM::QQPRRegClass.contains(DestReg, SrcReg) ||
+ ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ assert(ARM::qsub_0 + 3 == ARM::qsub_3 && "Expected contiguous enum.");
+ unsigned EndSubReg = ARM::QQPRRegClass.contains(DestReg, SrcReg) ?
+ ARM::qsub_1 : ARM::qsub_3;
+ for (unsigned i = ARM::qsub_0, e = EndSubReg + 1; i != e; ++i) {
+ unsigned Dst = TRI->getSubReg(DestReg, i);
+ unsigned Src = TRI->getSubReg(SrcReg, i);
+ MachineInstrBuilder Mov =
+ AddDefaultPred(BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VORRq))
+ .addReg(Dst, RegState::Define)
+ .addReg(Src, getKillRegState(KillSrc))
+ .addReg(Src, getKillRegState(KillSrc)));
+ if (i == EndSubReg) {
+ Mov->addRegisterDefined(DestReg, TRI);
+ if (KillSrc)
+ Mov->addRegisterKilled(SrcReg, TRI);
+ }
+ }
+ return;
+ }
+ llvm_unreachable("Impossible reg-to-reg copy");
}
static const
@@ -683,82 +716,84 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MFI.getObjectSize(FI),
Align);
- // tGPR is used sometimes in ARM instructions that need to avoid using
- // certain registers. Just treat it as GPR here. Likewise, rGPR.
- if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass
- || RC == ARM::rGPRRegisterClass)
- RC = ARM::GPRRegisterClass;
-
- switch (RC->getID()) {
- case ARM::GPRRegClassID:
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12))
+ switch (RC->getSize()) {
+ case 4:
+ if (ARM::GPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- break;
- case ARM::SPRRegClassID:
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
+ } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- break;
- case ARM::DPRRegClassID:
- case ARM::DPR_VFP2RegClassID:
- case ARM::DPR_8RegClassID:
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 8:
+ if (ARM::DPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- break;
- case ARM::QPRRegClassID:
- case ARM::QPR_VFP2RegClassID:
- case ARM::QPR_8RegClassID:
- if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo))
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 16:
+ if (ARM::QPRRegClass.hasSubClassEq(RC)) {
+ if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo))
.addFrameIndex(FI).addImm(16)
.addReg(SrcReg, getKillRegState(isKill))
.addMemOperand(MMO));
- } else {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
+ } else {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI)
.addMemOperand(MMO));
- }
- break;
- case ARM::QQPRRegClassID:
- case ARM::QQPR_VFP2RegClassID:
- if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- // FIXME: It's possible to only store part of the QQ register if the
- // spilled def has a sub-register index.
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 32:
+ if (ARM::QQPRRegClass.hasSubClassEq(RC)) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ // FIXME: It's possible to only store part of the QQ register if the
+ // spilled def has a sub-register index.
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
.addFrameIndex(FI).addImm(16)
.addReg(SrcReg, getKillRegState(isKill))
.addMemOperand(MMO));
- } else {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
.addFrameIndex(FI))
- .addMemOperand(MMO);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
- AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
- }
- break;
- case ARM::QQQQPRRegClassID: {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
- .addFrameIndex(FI))
- .addMemOperand(MMO);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
- AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
- break;
- }
- default:
- llvm_unreachable("Unknown regclass!");
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 64:
+ if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ default:
+ llvm_unreachable("Unknown reg class!");
}
}
@@ -809,6 +844,12 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
return 0;
}
+unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+ const MachineMemOperand *Dummy;
+ return MI->getDesc().mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
+}
+
void ARMBaseInstrInfo::
loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
@@ -826,72 +867,77 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MFI.getObjectSize(FI),
Align);
- // tGPR is used sometimes in ARM instructions that need to avoid using
- // certain registers. Just treat it as GPR here.
- if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass
- || RC == ARM::rGPRRegisterClass)
- RC = ARM::GPRRegisterClass;
-
- switch (RC->getID()) {
- case ARM::GPRRegClassID:
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
+ switch (RC->getSize()) {
+ case 4:
+ if (ARM::GPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- break;
- case ARM::SPRRegClassID:
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
+
+ } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ } else
+ llvm_unreachable("Unknown reg class!");
break;
- case ARM::DPRRegClassID:
- case ARM::DPR_VFP2RegClassID:
- case ARM::DPR_8RegClassID:
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
+ case 8:
+ if (ARM::DPRRegClass.hasSubClassEq(RC)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ } else
+ llvm_unreachable("Unknown reg class!");
break;
- case ARM::QPRRegClassID:
- case ARM::QPR_VFP2RegClassID:
- case ARM::QPR_8RegClassID:
- if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg)
+ case 16:
+ if (ARM::QPRRegClass.hasSubClassEq(RC)) {
+ if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg)
.addFrameIndex(FI).addImm(16)
.addMemOperand(MMO));
- } else {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
- .addFrameIndex(FI)
- .addMemOperand(MMO));
- }
+ } else {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
+ .addFrameIndex(FI)
+ .addMemOperand(MMO));
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
break;
- case ARM::QQPRRegClassID:
- case ARM::QQPR_VFP2RegClassID:
- if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
+ case 32:
+ if (ARM::QQPRRegClass.hasSubClassEq(RC)) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
.addFrameIndex(FI).addImm(16)
.addMemOperand(MMO));
- } else {
- MachineInstrBuilder MIB =
+ } else {
+ MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
.addFrameIndex(FI))
- .addMemOperand(MMO);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
- AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
- }
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+ MIB.addReg(DestReg, RegState::Define | RegState::Implicit);
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
break;
- case ARM::QQQQPRRegClassID: {
- MachineInstrBuilder MIB =
+ case 64:
+ if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
+ MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
.addFrameIndex(FI))
- .addMemOperand(MMO);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI);
- AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI);
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI);
+ MIB.addReg(DestReg, RegState::Define | RegState::Implicit);
+ } else
+ llvm_unreachable("Unknown reg class!");
break;
- }
default:
llvm_unreachable("Unknown regclass!");
}
@@ -944,6 +990,78 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
return 0;
}
+unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+ const MachineMemOperand *Dummy;
+ return MI->getDesc().mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+}
+
+bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
+ // This hook gets to expand COPY instructions before they become
+ // copyPhysReg() calls. Look for VMOVS instructions that can legally be
+ // widened to VMOVD. We prefer the VMOVD when possible because it may be
+ // changed into a VORR that can go down the NEON pipeline.
+ if (!WidenVMOVS || !MI->isCopy())
+ return false;
+
+ // Look for a copy between even S-registers. That is where we keep floats
+ // when using NEON v2f32 instructions for f32 arithmetic.
+ unsigned DstRegS = MI->getOperand(0).getReg();
+ unsigned SrcRegS = MI->getOperand(1).getReg();
+ if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
+ return false;
+
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
+ &ARM::DPRRegClass);
+ unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
+ &ARM::DPRRegClass);
+ if (!DstRegD || !SrcRegD)
+ return false;
+
+ // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
+ // legal if the COPY already defines the full DstRegD, and it isn't a
+ // sub-register insertion.
+ if (!MI->definesRegister(DstRegD, TRI) || MI->readsRegister(DstRegD, TRI))
+ return false;
+
+ // A dead copy shouldn't show up here, but reject it just in case.
+ if (MI->getOperand(0).isDead())
+ return false;
+
+ // All clear, widen the COPY.
+ DEBUG(dbgs() << "widening: " << *MI);
+
+ // Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg
+ // or some other super-register.
+ int ImpDefIdx = MI->findRegisterDefOperandIdx(DstRegD);
+ if (ImpDefIdx != -1)
+ MI->RemoveOperand(ImpDefIdx);
+
+ // Change the opcode and operands.
+ MI->setDesc(get(ARM::VMOVD));
+ MI->getOperand(0).setReg(DstRegD);
+ MI->getOperand(1).setReg(SrcRegD);
+ AddDefaultPred(MachineInstrBuilder(MI));
+
+ // We are now reading SrcRegD instead of SrcRegS. This may upset the
+ // register scavenger and machine verifier, so we need to indicate that we
+ // are reading an undefined value from SrcRegD, but a proper value from
+ // SrcRegS.
+ MI->getOperand(1).setIsUndef();
+ MachineInstrBuilder(MI).addReg(SrcRegS, RegState::Implicit);
+
+ // SrcRegD may actually contain an unrelated value in the ssub_1
+ // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
+ if (MI->getOperand(1).isKill()) {
+ MI->getOperand(1).setIsKill(false);
+ MI->addRegisterKilled(SrcRegS, TRI, true);
+ }
+
+ DEBUG(dbgs() << "replaced by: " << *MI);
+ return true;
+}
+
MachineInstr*
ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
int FrameIx, uint64_t Offset,
@@ -974,17 +1092,24 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
// instructions, so that's probably OK, but is PIC always correct when
// we get here?
if (ACPV->isGlobalValue())
- NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId,
- ARMCP::CPValue, 4);
+ NewCPV = ARMConstantPoolConstant::
+ Create(cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId,
+ ARMCP::CPValue, 4);
else if (ACPV->isExtSymbol())
- NewCPV = new ARMConstantPoolValue(MF.getFunction()->getContext(),
- ACPV->getSymbol(), PCLabelId, 4);
+ NewCPV = ARMConstantPoolSymbol::
+ Create(MF.getFunction()->getContext(),
+ cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
else if (ACPV->isBlockAddress())
- NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId,
- ARMCP::CPBlockAddress, 4);
+ NewCPV = ARMConstantPoolConstant::
+ Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
+ ARMCP::CPBlockAddress, 4);
else if (ACPV->isLSDA())
- NewCPV = new ARMConstantPoolValue(MF.getFunction(), PCLabelId,
- ARMCP::CPLSDA, 4);
+ NewCPV = ARMConstantPoolConstant::Create(MF.getFunction(), PCLabelId,
+ ARMCP::CPLSDA, 4);
+ else if (ACPV->isMachineBasicBlock())
+ NewCPV = ARMConstantPoolMBB::
+ Create(MF.getFunction()->getContext(),
+ cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
else
llvm_unreachable("Unexpected ARM constantpool value type!!");
CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
@@ -1289,7 +1414,7 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB,
// Attempt to estimate the relative costs of predication versus branching.
unsigned TUnpredCost = Probability.getNumerator() * TCycles;
TUnpredCost /= Probability.getDenominator();
-
+
uint32_t Comp = Probability.getDenominator() - Probability.getNumerator();
unsigned FUnpredCost = Comp * FCycles;
FUnpredCost /= Probability.getDenominator();
@@ -1330,6 +1455,57 @@ int llvm::getMatchingCondBranchOpcode(int Opc) {
}
+/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
+/// instruction is encoded with an 'S' bit is determined by the optional CPSR
+/// def operand.
+///
+/// This will go away once we can teach tblgen how to set the optional CPSR def
+/// operand itself.
+struct AddSubFlagsOpcodePair {
+ unsigned PseudoOpc;
+ unsigned MachineOpc;
+};
+
+static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
+ {ARM::ADDSri, ARM::ADDri},
+ {ARM::ADDSrr, ARM::ADDrr},
+ {ARM::ADDSrsi, ARM::ADDrsi},
+ {ARM::ADDSrsr, ARM::ADDrsr},
+
+ {ARM::SUBSri, ARM::SUBri},
+ {ARM::SUBSrr, ARM::SUBrr},
+ {ARM::SUBSrsi, ARM::SUBrsi},
+ {ARM::SUBSrsr, ARM::SUBrsr},
+
+ {ARM::RSBSri, ARM::RSBri},
+ {ARM::RSBSrr, ARM::RSBrr},
+ {ARM::RSBSrsi, ARM::RSBrsi},
+ {ARM::RSBSrsr, ARM::RSBrsr},
+
+ {ARM::t2ADDSri, ARM::t2ADDri},
+ {ARM::t2ADDSrr, ARM::t2ADDrr},
+ {ARM::t2ADDSrs, ARM::t2ADDrs},
+
+ {ARM::t2SUBSri, ARM::t2SUBri},
+ {ARM::t2SUBSrr, ARM::t2SUBrr},
+ {ARM::t2SUBSrs, ARM::t2SUBrs},
+
+ {ARM::t2RSBSri, ARM::t2RSBri},
+ {ARM::t2RSBSrs, ARM::t2RSBrs},
+};
+
+unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
+ static const int NPairs =
+ sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair);
+ for (AddSubFlagsOpcodePair *OpcPair = &AddSubFlagsOpcodeMap[0],
+ *End = &AddSubFlagsOpcodeMap[NPairs]; OpcPair != End; ++OpcPair) {
+ if (OldOpc == OpcPair->PseudoOpc) {
+ return OpcPair->MachineOpc;
+ }
+ }
+ return 0;
+}
+
void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI, DebugLoc dl,
unsigned DestReg, unsigned BaseReg, int NumBytes,
@@ -1862,7 +2038,6 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
case ARM::STMIB_UPD:
case ARM::tLDMIA:
case ARM::tLDMIA_UPD:
- case ARM::tSTMIA:
case ARM::tSTMIA_UPD:
case ARM::tPOP_RET:
case ARM::tPOP:
@@ -2128,7 +2303,6 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::STMDA_UPD:
case ARM::STMDB_UPD:
case ARM::STMIB_UPD:
- case ARM::tSTMIA:
case ARM::tSTMIA_UPD:
case ARM::tPOP_RET:
case ARM::tPOP:
@@ -2567,6 +2741,15 @@ hasLowDefLatency(const InstrItineraryData *ItinData,
return false;
}
+bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI,
+ StringRef &ErrInfo) const {
+ if (convertAddSubFlagsOpcode(MI->getOpcode())) {
+ ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
+ return false;
+ }
+ return true;
+}
+
bool
ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
unsigned &AddSubOpc,
@@ -2582,3 +2765,66 @@ ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
HasLane = Entry.HasLane;
return true;
}
+
+//===----------------------------------------------------------------------===//
+// Execution domains.
+//===----------------------------------------------------------------------===//
+//
+// Some instructions go down the NEON pipeline, some go down the VFP pipeline,
+// and some can go down both. The vmov instructions go down the VFP pipeline,
+// but they can be changed to vorr equivalents that are executed by the NEON
+// pipeline.
+//
+// We use the following execution domain numbering:
+//
+enum ARMExeDomain {
+ ExeGeneric = 0,
+ ExeVFP = 1,
+ ExeNEON = 2
+};
+//
+// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
+//
+std::pair<uint16_t, uint16_t>
+ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
+ // VMOVD is a VFP instruction, but can be changed to NEON if it isn't
+ // predicated.
+ if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
+ return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
+
+ // No other instructions can be swizzled, so just determine their domain.
+ unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
+
+ if (Domain & ARMII::DomainNEON)
+ return std::make_pair(ExeNEON, 0);
+
+ // Certain instructions can go either way on Cortex-A8.
+ // Treat them as NEON instructions.
+ if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
+ return std::make_pair(ExeNEON, 0);
+
+ if (Domain & ARMII::DomainVFP)
+ return std::make_pair(ExeVFP, 0);
+
+ return std::make_pair(ExeGeneric, 0);
+}
+
+void
+ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
+ // We only know how to change VMOVD into VORR.
+ assert(MI->getOpcode() == ARM::VMOVD && "Can only swizzle VMOVD");
+ if (Domain != ExeNEON)
+ return;
+
+ // Zap the predicate operands.
+ assert(!isPredicated(MI) && "Cannot predicate a VORRd");
+ MI->RemoveOperand(3);
+ MI->RemoveOperand(2);
+
+ // Change to a VORRd which requires two identical use operands.
+ MI->setDesc(get(ARM::VORRd));
+
+ // Add the extra source operand and new predicates.
+ // This will go before any implicit ops.
+ AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
+}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 507e897..0f9f321 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -27,146 +27,6 @@ namespace llvm {
class ARMSubtarget;
class ARMBaseRegisterInfo;
-/// ARMII - This namespace holds all of the target specific flags that
-/// instruction info tracks.
-///
-namespace ARMII {
- enum {
- //===------------------------------------------------------------------===//
- // Instruction Flags.
-
- //===------------------------------------------------------------------===//
- // This four-bit field describes the addressing mode used.
- AddrModeMask = 0x1f, // The AddrMode enums are declared in ARMBaseInfo.h
-
- // IndexMode - Unindex, pre-indexed, or post-indexed are valid for load
- // and store ops only. Generic "updating" flag is used for ld/st multiple.
- // The index mode enums are declared in ARMBaseInfo.h
- IndexModeShift = 5,
- IndexModeMask = 3 << IndexModeShift,
-
- //===------------------------------------------------------------------===//
- // Instruction encoding formats.
- //
- FormShift = 7,
- FormMask = 0x3f << FormShift,
-
- // Pseudo instructions
- Pseudo = 0 << FormShift,
-
- // Multiply instructions
- MulFrm = 1 << FormShift,
-
- // Branch instructions
- BrFrm = 2 << FormShift,
- BrMiscFrm = 3 << FormShift,
-
- // Data Processing instructions
- DPFrm = 4 << FormShift,
- DPSoRegFrm = 5 << FormShift,
-
- // Load and Store
- LdFrm = 6 << FormShift,
- StFrm = 7 << FormShift,
- LdMiscFrm = 8 << FormShift,
- StMiscFrm = 9 << FormShift,
- LdStMulFrm = 10 << FormShift,
-
- LdStExFrm = 11 << FormShift,
-
- // Miscellaneous arithmetic instructions
- ArithMiscFrm = 12 << FormShift,
- SatFrm = 13 << FormShift,
-
- // Extend instructions
- ExtFrm = 14 << FormShift,
-
- // VFP formats
- VFPUnaryFrm = 15 << FormShift,
- VFPBinaryFrm = 16 << FormShift,
- VFPConv1Frm = 17 << FormShift,
- VFPConv2Frm = 18 << FormShift,
- VFPConv3Frm = 19 << FormShift,
- VFPConv4Frm = 20 << FormShift,
- VFPConv5Frm = 21 << FormShift,
- VFPLdStFrm = 22 << FormShift,
- VFPLdStMulFrm = 23 << FormShift,
- VFPMiscFrm = 24 << FormShift,
-
- // Thumb format
- ThumbFrm = 25 << FormShift,
-
- // Miscelleaneous format
- MiscFrm = 26 << FormShift,
-
- // NEON formats
- NGetLnFrm = 27 << FormShift,
- NSetLnFrm = 28 << FormShift,
- NDupFrm = 29 << FormShift,
- NLdStFrm = 30 << FormShift,
- N1RegModImmFrm= 31 << FormShift,
- N2RegFrm = 32 << FormShift,
- NVCVTFrm = 33 << FormShift,
- NVDupLnFrm = 34 << FormShift,
- N2RegVShLFrm = 35 << FormShift,
- N2RegVShRFrm = 36 << FormShift,
- N3RegFrm = 37 << FormShift,
- N3RegVShFrm = 38 << FormShift,
- NVExtFrm = 39 << FormShift,
- NVMulSLFrm = 40 << FormShift,
- NVTBLFrm = 41 << FormShift,
-
- //===------------------------------------------------------------------===//
- // Misc flags.
-
- // UnaryDP - Indicates this is a unary data processing instruction, i.e.
- // it doesn't have a Rn operand.
- UnaryDP = 1 << 13,
-
- // Xform16Bit - Indicates this Thumb2 instruction may be transformed into
- // a 16-bit Thumb instruction if certain conditions are met.
- Xform16Bit = 1 << 14,
-
- //===------------------------------------------------------------------===//
- // Code domain.
- DomainShift = 15,
- DomainMask = 7 << DomainShift,
- DomainGeneral = 0 << DomainShift,
- DomainVFP = 1 << DomainShift,
- DomainNEON = 2 << DomainShift,
- DomainNEONA8 = 4 << DomainShift,
-
- //===------------------------------------------------------------------===//
- // Field shifts - such shifts are used to set field while generating
- // machine instructions.
- //
- // FIXME: This list will need adjusting/fixing as the MC code emitter
- // takes shape and the ARMCodeEmitter.cpp bits go away.
- ShiftTypeShift = 4,
-
- M_BitShift = 5,
- ShiftImmShift = 5,
- ShiftShift = 7,
- N_BitShift = 7,
- ImmHiShift = 8,
- SoRotImmShift = 8,
- RegRsShift = 8,
- ExtRotImmShift = 10,
- RegRdLoShift = 12,
- RegRdShift = 12,
- RegRdHiShift = 16,
- RegRnShift = 16,
- S_BitShift = 20,
- W_BitShift = 21,
- AM3_I_BitShift = 22,
- D_BitShift = 22,
- U_BitShift = 23,
- P_BitShift = 24,
- I_BitShift = 25,
- CondShift = 28
- };
-}
-
class ARMBaseInstrInfo : public ARMGenInstrInfo {
const ARMSubtarget &Subtarget;
@@ -241,6 +101,10 @@ public:
int &FrameIndex) const;
virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
+ virtual unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+ virtual unsigned isStoreToStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
virtual void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
@@ -259,6 +123,8 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
+ virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
int FrameIx,
uint64_t Offset,
@@ -346,6 +212,12 @@ public:
int getOperandLatency(const InstrItineraryData *ItinData,
SDNode *DefNode, unsigned DefIdx,
SDNode *UseNode, unsigned UseIdx) const;
+
+ /// VFP/NEON execution domains.
+ std::pair<uint16_t, uint16_t>
+ getExecutionDomain(const MachineInstr *MI) const;
+ void setExecutionDomain(MachineInstr *MI, unsigned Domain) const;
+
private:
int getVLDMDefCycle(const InstrItineraryData *ItinData,
const MCInstrDesc &DefMCID,
@@ -382,6 +254,9 @@ private:
bool hasLowDefLatency(const InstrItineraryData *ItinData,
const MachineInstr *DefMI, unsigned DefIdx) const;
+ /// verifyInstruction - Perform target specific instruction verification.
+ bool verifyInstruction(const MachineInstr *MI, StringRef &ErrInfo) const;
+
private:
/// Modeling special VFP / NEON fp MLA / MLS hazards.
@@ -464,6 +339,12 @@ ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
int getMatchingCondBranchOpcode(int Opc);
+
+/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether
+/// the instruction is encoded with an 'S' bit is determined by the optional
+/// CPSR def operand.
+unsigned convertAddSubFlagsOpcode(unsigned OldOpc);
+
/// emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of
/// instructions to materializea destreg = basereg + immediate in ARM / Thumb2
/// code.
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index ba42295..7c42342 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -12,13 +12,13 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
-#include "ARMAddressingModes.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMFrameLowering.h"
#include "ARMInstrInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
@@ -27,7 +27,6 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Support/Debug.h"
@@ -57,7 +56,7 @@ EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true),
ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
const ARMSubtarget &sti)
- : ARMGenRegisterInfo(), TII(tii), STI(sti),
+ : ARMGenRegisterInfo(ARM::LR), TII(tii), STI(sti),
FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11),
BasePtr(ARM::R6) {
}
@@ -354,7 +353,7 @@ const TargetRegisterClass*
ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)
const {
const TargetRegisterClass *Super = RC;
- TargetRegisterClass::sc_iterator I = RC->superclasses_begin();
+ TargetRegisterClass::sc_iterator I = RC->getSuperClasses();
do {
switch (Super->getID()) {
case ARM::GPRRegClassID:
@@ -375,6 +374,13 @@ ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const {
return ARM::GPRRegisterClass;
}
+const TargetRegisterClass *
+ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
+ if (RC == &ARM::CCRRegClass)
+ return 0; // Can't copy CCR registers.
+ return RC;
+}
+
unsigned
ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
@@ -487,19 +493,19 @@ ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC,
if (!TFI->hasFP(MF)) {
if (!STI.isR9Reserved())
- return ArrayRef<unsigned>(GPREven1);
+ return makeArrayRef(GPREven1);
else
- return ArrayRef<unsigned>(GPREven4);
+ return makeArrayRef(GPREven4);
} else if (FramePtr == ARM::R7) {
if (!STI.isR9Reserved())
- return ArrayRef<unsigned>(GPREven2);
+ return makeArrayRef(GPREven2);
else
- return ArrayRef<unsigned>(GPREven5);
+ return makeArrayRef(GPREven5);
} else { // FramePtr == ARM::R11
if (!STI.isR9Reserved())
- return ArrayRef<unsigned>(GPREven3);
+ return makeArrayRef(GPREven3);
else
- return ArrayRef<unsigned>(GPREven6);
+ return makeArrayRef(GPREven6);
}
} else if (HintType == ARMRI::RegPairOdd) {
if (isPhysicalRegister(HintReg) && getRegisterPairOdd(HintReg, MF) == 0)
@@ -509,19 +515,19 @@ ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC,
if (!TFI->hasFP(MF)) {
if (!STI.isR9Reserved())
- return ArrayRef<unsigned>(GPROdd1);
+ return makeArrayRef(GPROdd1);
else
- return ArrayRef<unsigned>(GPROdd4);
+ return makeArrayRef(GPROdd4);
} else if (FramePtr == ARM::R7) {
if (!STI.isR9Reserved())
- return ArrayRef<unsigned>(GPROdd2);
+ return makeArrayRef(GPROdd2);
else
- return ArrayRef<unsigned>(GPROdd5);
+ return makeArrayRef(GPROdd5);
} else { // FramePtr == ARM::R11
if (!STI.isR9Reserved())
- return ArrayRef<unsigned>(GPROdd3);
+ return makeArrayRef(GPROdd3);
else
- return ArrayRef<unsigned>(GPROdd6);
+ return makeArrayRef(GPROdd6);
}
}
return RC->getRawAllocationOrder(MF);
@@ -649,10 +655,6 @@ cannotEliminateFrame(const MachineFunction &MF) const {
|| needsStackRealignment(MF);
}
-unsigned ARMBaseRegisterInfo::getRARegister() const {
- return ARM::LR;
-}
-
unsigned
ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
@@ -672,99 +674,54 @@ unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const {
return 0;
}
-int ARMBaseRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
- return ARMGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
-}
-
-int ARMBaseRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
- return ARMGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0);
-}
-
unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg,
const MachineFunction &MF) const {
switch (Reg) {
default: break;
// Return 0 if either register of the pair is a special register.
// So no R12, etc.
- case ARM::R1:
- return ARM::R0;
- case ARM::R3:
- return ARM::R2;
- case ARM::R5:
- return ARM::R4;
+ case ARM::R1: return ARM::R0;
+ case ARM::R3: return ARM::R2;
+ case ARM::R5: return ARM::R4;
case ARM::R7:
return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6))
? 0 : ARM::R6;
- case ARM::R9:
- return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8;
- case ARM::R11:
- return isReservedReg(MF, ARM::R11) ? 0 : ARM::R10;
-
- case ARM::S1:
- return ARM::S0;
- case ARM::S3:
- return ARM::S2;
- case ARM::S5:
- return ARM::S4;
- case ARM::S7:
- return ARM::S6;
- case ARM::S9:
- return ARM::S8;
- case ARM::S11:
- return ARM::S10;
- case ARM::S13:
- return ARM::S12;
- case ARM::S15:
- return ARM::S14;
- case ARM::S17:
- return ARM::S16;
- case ARM::S19:
- return ARM::S18;
- case ARM::S21:
- return ARM::S20;
- case ARM::S23:
- return ARM::S22;
- case ARM::S25:
- return ARM::S24;
- case ARM::S27:
- return ARM::S26;
- case ARM::S29:
- return ARM::S28;
- case ARM::S31:
- return ARM::S30;
-
- case ARM::D1:
- return ARM::D0;
- case ARM::D3:
- return ARM::D2;
- case ARM::D5:
- return ARM::D4;
- case ARM::D7:
- return ARM::D6;
- case ARM::D9:
- return ARM::D8;
- case ARM::D11:
- return ARM::D10;
- case ARM::D13:
- return ARM::D12;
- case ARM::D15:
- return ARM::D14;
- case ARM::D17:
- return ARM::D16;
- case ARM::D19:
- return ARM::D18;
- case ARM::D21:
- return ARM::D20;
- case ARM::D23:
- return ARM::D22;
- case ARM::D25:
- return ARM::D24;
- case ARM::D27:
- return ARM::D26;
- case ARM::D29:
- return ARM::D28;
- case ARM::D31:
- return ARM::D30;
+ case ARM::R9: return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8;
+ case ARM::R11: return isReservedReg(MF, ARM::R11) ? 0 : ARM::R10;
+
+ case ARM::S1: return ARM::S0;
+ case ARM::S3: return ARM::S2;
+ case ARM::S5: return ARM::S4;
+ case ARM::S7: return ARM::S6;
+ case ARM::S9: return ARM::S8;
+ case ARM::S11: return ARM::S10;
+ case ARM::S13: return ARM::S12;
+ case ARM::S15: return ARM::S14;
+ case ARM::S17: return ARM::S16;
+ case ARM::S19: return ARM::S18;
+ case ARM::S21: return ARM::S20;
+ case ARM::S23: return ARM::S22;
+ case ARM::S25: return ARM::S24;
+ case ARM::S27: return ARM::S26;
+ case ARM::S29: return ARM::S28;
+ case ARM::S31: return ARM::S30;
+
+ case ARM::D1: return ARM::D0;
+ case ARM::D3: return ARM::D2;
+ case ARM::D5: return ARM::D4;
+ case ARM::D7: return ARM::D6;
+ case ARM::D9: return ARM::D8;
+ case ARM::D11: return ARM::D10;
+ case ARM::D13: return ARM::D12;
+ case ARM::D15: return ARM::D14;
+ case ARM::D17: return ARM::D16;
+ case ARM::D19: return ARM::D18;
+ case ARM::D21: return ARM::D20;
+ case ARM::D23: return ARM::D22;
+ case ARM::D25: return ARM::D24;
+ case ARM::D27: return ARM::D26;
+ case ARM::D29: return ARM::D28;
+ case ARM::D31: return ARM::D30;
}
return 0;
@@ -776,85 +733,48 @@ unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg,
default: break;
// Return 0 if either register of the pair is a special register.
// So no R12, etc.
- case ARM::R0:
- return ARM::R1;
- case ARM::R2:
- return ARM::R3;
- case ARM::R4:
- return ARM::R5;
+ case ARM::R0: return ARM::R1;
+ case ARM::R2: return ARM::R3;
+ case ARM::R4: return ARM::R5;
case ARM::R6:
return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6))
? 0 : ARM::R7;
- case ARM::R8:
- return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9;
- case ARM::R10:
- return isReservedReg(MF, ARM::R11) ? 0 : ARM::R11;
-
- case ARM::S0:
- return ARM::S1;
- case ARM::S2:
- return ARM::S3;
- case ARM::S4:
- return ARM::S5;
- case ARM::S6:
- return ARM::S7;
- case ARM::S8:
- return ARM::S9;
- case ARM::S10:
- return ARM::S11;
- case ARM::S12:
- return ARM::S13;
- case ARM::S14:
- return ARM::S15;
- case ARM::S16:
- return ARM::S17;
- case ARM::S18:
- return ARM::S19;
- case ARM::S20:
- return ARM::S21;
- case ARM::S22:
- return ARM::S23;
- case ARM::S24:
- return ARM::S25;
- case ARM::S26:
- return ARM::S27;
- case ARM::S28:
- return ARM::S29;
- case ARM::S30:
- return ARM::S31;
-
- case ARM::D0:
- return ARM::D1;
- case ARM::D2:
- return ARM::D3;
- case ARM::D4:
- return ARM::D5;
- case ARM::D6:
- return ARM::D7;
- case ARM::D8:
- return ARM::D9;
- case ARM::D10:
- return ARM::D11;
- case ARM::D12:
- return ARM::D13;
- case ARM::D14:
- return ARM::D15;
- case ARM::D16:
- return ARM::D17;
- case ARM::D18:
- return ARM::D19;
- case ARM::D20:
- return ARM::D21;
- case ARM::D22:
- return ARM::D23;
- case ARM::D24:
- return ARM::D25;
- case ARM::D26:
- return ARM::D27;
- case ARM::D28:
- return ARM::D29;
- case ARM::D30:
- return ARM::D31;
+ case ARM::R8: return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9;
+ case ARM::R10: return isReservedReg(MF, ARM::R11) ? 0 : ARM::R11;
+
+ case ARM::S0: return ARM::S1;
+ case ARM::S2: return ARM::S3;
+ case ARM::S4: return ARM::S5;
+ case ARM::S6: return ARM::S7;
+ case ARM::S8: return ARM::S9;
+ case ARM::S10: return ARM::S11;
+ case ARM::S12: return ARM::S13;
+ case ARM::S14: return ARM::S15;
+ case ARM::S16: return ARM::S17;
+ case ARM::S18: return ARM::S19;
+ case ARM::S20: return ARM::S21;
+ case ARM::S22: return ARM::S23;
+ case ARM::S24: return ARM::S25;
+ case ARM::S26: return ARM::S27;
+ case ARM::S28: return ARM::S29;
+ case ARM::S30: return ARM::S31;
+
+ case ARM::D0: return ARM::D1;
+ case ARM::D2: return ARM::D3;
+ case ARM::D4: return ARM::D5;
+ case ARM::D6: return ARM::D7;
+ case ARM::D8: return ARM::D9;
+ case ARM::D10: return ARM::D11;
+ case ARM::D12: return ARM::D13;
+ case ARM::D14: return ARM::D15;
+ case ARM::D16: return ARM::D17;
+ case ARM::D18: return ARM::D19;
+ case ARM::D20: return ARM::D21;
+ case ARM::D22: return ARM::D23;
+ case ARM::D24: return ARM::D25;
+ case ARM::D26: return ARM::D27;
+ case ARM::D28: return ARM::D29;
+ case ARM::D30: return ARM::D31;
}
return 0;
@@ -1111,11 +1031,11 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this));
- MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg)
- .addFrameIndex(FrameIdx).addImm(Offset);
+ MachineInstrBuilder MIB = AddDefaultPred(BuildMI(*MBB, Ins, DL, MCID, BaseReg)
+ .addFrameIndex(FrameIdx).addImm(Offset));
if (!AFI->isThumb1OnlyFunction())
- AddDefaultCC(AddDefaultPred(MIB));
+ AddDefaultCC(MIB);
}
void
@@ -1143,6 +1063,7 @@ ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
Done = rewriteT2FrameIndex(MI, i, BaseReg, Off, TII);
}
assert (Done && "Unable to resolve frame index!");
+ (void)Done;
}
bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index b4b4059..fee17ff 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -33,19 +33,6 @@ namespace ARMRI {
};
}
-/// isARMLowRegister - Returns true if the register is low register r0-r7.
-///
-static inline bool isARMLowRegister(unsigned Reg) {
- using namespace ARM;
- switch (Reg) {
- case R0: case R1: case R2: case R3:
- case R4: case R5: case R6: case R7:
- return true;
- default:
- return false;
- }
-}
-
/// isARMArea1Register - Returns true if the register is a low register (r0-r7)
/// or a stack/pc register that we should push/pop.
static inline bool isARMArea1Register(unsigned Reg, bool isDarwin) {
@@ -129,6 +116,8 @@ public:
unsigned &NewSubIdx) const;
const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
+ const TargetRegisterClass*
+ getCrossCopyRegClass(const TargetRegisterClass *RC) const;
const TargetRegisterClass*
getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
@@ -164,7 +153,6 @@ public:
bool cannotEliminateFrame(const MachineFunction &MF) const;
// Debug information queries.
- unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
unsigned getBaseRegister() const { return BasePtr; }
@@ -172,9 +160,6 @@ public:
unsigned getEHExceptionRegister() const;
unsigned getEHHandlerRegister() const;
- int getDwarfRegNum(unsigned RegNum, bool isEH) const;
- int getLLVMRegNum(unsigned RegNum, bool isEH) const;
-
bool isLowRegister(unsigned Reg) const;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index d6fca62..4148d4a 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -14,12 +14,12 @@
#define DEBUG_TYPE "jit"
#include "ARM.h"
-#include "ARMAddressingModes.h"
#include "ARMConstantPoolValue.h"
#include "ARMInstrInfo.h"
#include "ARMRelocations.h"
#include "ARMSubtarget.h"
#include "ARMTargetMachine.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
@@ -161,11 +161,11 @@ namespace {
// are already handled elsewhere. They are placeholders to allow this
// encoder to continue to function until the MC encoder is sufficiently
// far along that this one can be eliminated entirely.
- unsigned NEONThumb2DataIPostEncoder(const MachineInstr &MI, unsigned Val)
+ unsigned NEONThumb2DataIPostEncoder(const MachineInstr &MI, unsigned Val)
const { return 0; }
- unsigned NEONThumb2LoadStorePostEncoder(const MachineInstr &MI,unsigned Val)
+ unsigned NEONThumb2LoadStorePostEncoder(const MachineInstr &MI,unsigned Val)
const { return 0; }
- unsigned NEONThumb2DupPostEncoder(const MachineInstr &MI,unsigned Val)
+ unsigned NEONThumb2DupPostEncoder(const MachineInstr &MI,unsigned Val)
const { return 0; }
unsigned VFPThumb2PostEncoder(const MachineInstr&MI, unsigned Val)
const { return 0; }
@@ -189,13 +189,17 @@ namespace {
unsigned Op) const { return 0; }
unsigned getARMBranchTargetOpValue(const MachineInstr &MI, unsigned Op)
const { return 0; }
+ unsigned getARMBLXTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
unsigned getCCOutOpValue(const MachineInstr &MI, unsigned Op)
const { return 0; }
unsigned getSOImmOpValue(const MachineInstr &MI, unsigned Op)
const { return 0; }
unsigned getT2SOImmOpValue(const MachineInstr &MI, unsigned Op)
const { return 0; }
- unsigned getSORegOpValue(const MachineInstr &MI, unsigned Op)
+ unsigned getSORegRegOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getSORegImmOpValue(const MachineInstr &MI, unsigned Op)
const { return 0; }
unsigned getThumbAddrModeRegRegOpValue(const MachineInstr &MI, unsigned Op)
const { return 0; }
@@ -203,8 +207,12 @@ namespace {
const { return 0; }
unsigned getT2AddrModeImm8OpValue(const MachineInstr &MI, unsigned Op)
const { return 0; }
+ unsigned getT2Imm8s4OpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
unsigned getT2AddrModeImm8s4OpValue(const MachineInstr &MI, unsigned Op)
const { return 0; }
+ unsigned getT2AddrModeImm0_1020s4OpValue(const MachineInstr &MI,unsigned Op)
+ const { return 0; }
unsigned getT2AddrModeImm8OffsetOpValue(const MachineInstr &MI, unsigned Op)
const { return 0; }
unsigned getT2AddrModeImm12OffsetOpValue(const MachineInstr &MI,unsigned Op)
@@ -213,10 +221,6 @@ namespace {
const { return 0; }
unsigned getT2SORegOpValue(const MachineInstr &MI, unsigned Op)
const { return 0; }
- unsigned getRotImmOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getImmMinusOneOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
unsigned getT2AdrLabelOpValue(const MachineInstr &MI, unsigned Op)
const { return 0; }
unsigned getAddrMode6AddressOpValue(const MachineInstr &MI, unsigned Op)
@@ -230,8 +234,6 @@ namespace {
const { return 0; }
unsigned getBitfieldInvertedMaskOpValue(const MachineInstr &MI,
unsigned Op) const { return 0; }
- unsigned getMsbOpValue(const MachineInstr &MI,
- unsigned Op) const { return 0; }
unsigned getSsatBitPosValue(const MachineInstr &MI,
unsigned Op) const { return 0; }
uint32_t getLdStmModeOpValue(const MachineInstr &MI, unsigned OpIdx)
@@ -268,6 +270,8 @@ namespace {
const { return 0;}
uint32_t getAddrMode2OffsetOpValue(const MachineInstr &MI, unsigned OpIdx)
const { return 0;}
+ uint32_t getPostIdxRegOpValue(const MachineInstr &MI, unsigned OpIdx)
+ const { return 0;}
uint32_t getAddrMode3OffsetOpValue(const MachineInstr &MI, unsigned OpIdx)
const { return 0;}
uint32_t getAddrMode3OpValue(const MachineInstr &MI, unsigned Op)
@@ -632,15 +636,16 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) {
<< (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n');
assert(ACPV->isGlobalValue() && "unsupported constant pool value");
- const GlobalValue *GV = ACPV->getGV();
+ const GlobalValue *GV = cast<ARMConstantPoolConstant>(ACPV)->getGV();
if (GV) {
Reloc::Model RelocM = TM.getRelocationModel();
emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry,
isa<Function>(GV),
Subtarget->GVIsIndirectSymbol(GV, RelocM),
(intptr_t)ACPV);
- } else {
- emitExternalSymbolAddress(ACPV->getSymbol(), ARM::reloc_arm_absolute);
+ } else {
+ const char *Sym = cast<ARMConstantPoolSymbol>(ACPV)->getSymbol();
+ emitExternalSymbolAddress(Sym, ARM::reloc_arm_absolute);
}
emitWordLE(0);
} else {
@@ -983,7 +988,7 @@ unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) {
unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI,
const MCInstrDesc &MCID) const {
- for (unsigned i = MI.getNumOperands(), e = MCID.getNumOperands(); i >= e; --i){
+ for (unsigned i = MI.getNumOperands(), e = MCID.getNumOperands(); i >= e;--i){
const MachineOperand &MO = MI.getOperand(i-1);
if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)
return 1 << ARMII::S_BitShift;
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index f45ebdc..3e3a413 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -15,10 +15,10 @@
#define DEBUG_TYPE "arm-cp-islands"
#include "ARM.h"
-#include "ARMAddressingModes.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMInstrInfo.h"
#include "Thumb2InstrInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
@@ -739,7 +739,11 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
// There doesn't seem to be meaningful DebugInfo available; this doesn't
// correspond to anything in the source.
unsigned Opc = isThumb ? (isThumb2 ? ARM::t2B : ARM::tB) : ARM::B;
- BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB);
+ if (!isThumb)
+ BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB);
+ else
+ BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB)
+ .addImm(ARMCC::AL).addReg(0);
++NumSplit;
// Update the CFG. All succs of OrigBB are now succs of NewBB.
@@ -1151,7 +1155,11 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
// targets will be exchanged, and the altered branch may be out of
// range, so the machinery has to know about it.
int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
- BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
+ if (!isThumb)
+ BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
+ else
+ BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB)
+ .addImm(ARMCC::AL).addReg(0);
unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
ImmBranches.push_back(ImmBranch(&UserMBB->back(),
MaxDisp, false, UncondBr));
@@ -1512,7 +1520,11 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
.addMBB(NextBB).addImm(CC).addReg(CCReg);
Br.MI = &MBB->back();
BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
- BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
+ if (isThumb)
+ BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB)
+ .addImm(ARMCC::AL).addReg(0);
+ else
+ BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
@@ -1891,7 +1903,8 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
// There doesn't seem to be meaningful DebugInfo available; this doesn't
// correspond directly to anything in the source.
assert (isThumb2 && "Adjusting for TB[BH] but not in Thumb2?");
- BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)).addMBB(BB);
+ BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)).addMBB(BB)
+ .addImm(ARMCC::AL).addReg(0);
// Update internal data structures to account for the newly inserted MBB.
MF.RenumberBlocks(NewBB);
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
index 165a1d8..aadfd47 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -17,79 +17,57 @@
#include "llvm/Constants.h"
#include "llvm/GlobalValue.h"
#include "llvm/Type.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdlib>
using namespace llvm;
-ARMConstantPoolValue::ARMConstantPoolValue(const Constant *cval, unsigned id,
- ARMCP::ARMCPKind K,
+//===----------------------------------------------------------------------===//
+// ARMConstantPoolValue
+//===----------------------------------------------------------------------===//
+
+ARMConstantPoolValue::ARMConstantPoolValue(Type *Ty, unsigned id,
+ ARMCP::ARMCPKind kind,
unsigned char PCAdj,
- ARMCP::ARMCPModifier Modif,
- bool AddCA)
- : MachineConstantPoolValue((const Type*)cval->getType()),
- CVal(cval), S(NULL), LabelId(id), Kind(K), PCAdjust(PCAdj),
- Modifier(Modif), AddCurrentAddress(AddCA) {}
-
-ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C,
- const char *s, unsigned id,
+ ARMCP::ARMCPModifier modifier,
+ bool addCurrentAddress)
+ : MachineConstantPoolValue(Ty), LabelId(id), Kind(kind),
+ PCAdjust(PCAdj), Modifier(modifier),
+ AddCurrentAddress(addCurrentAddress) {}
+
+ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C, unsigned id,
+ ARMCP::ARMCPKind kind,
unsigned char PCAdj,
- ARMCP::ARMCPModifier Modif,
- bool AddCA)
- : MachineConstantPoolValue((const Type*)Type::getInt32Ty(C)),
- CVal(NULL), S(strdup(s)), LabelId(id), Kind(ARMCP::CPExtSymbol),
- PCAdjust(PCAdj), Modifier(Modif), AddCurrentAddress(AddCA) {}
-
-ARMConstantPoolValue::ARMConstantPoolValue(const GlobalValue *gv,
- ARMCP::ARMCPModifier Modif)
- : MachineConstantPoolValue((const Type*)Type::getInt32Ty(gv->getContext())),
- CVal(gv), S(NULL), LabelId(0), Kind(ARMCP::CPValue), PCAdjust(0),
- Modifier(Modif), AddCurrentAddress(false) {}
-
-const GlobalValue *ARMConstantPoolValue::getGV() const {
- return dyn_cast_or_null<GlobalValue>(CVal);
-}
+ ARMCP::ARMCPModifier modifier,
+ bool addCurrentAddress)
+ : MachineConstantPoolValue((Type*)Type::getInt32Ty(C)),
+ LabelId(id), Kind(kind), PCAdjust(PCAdj), Modifier(modifier),
+ AddCurrentAddress(addCurrentAddress) {}
-const BlockAddress *ARMConstantPoolValue::getBlockAddress() const {
- return dyn_cast_or_null<BlockAddress>(CVal);
-}
+ARMConstantPoolValue::~ARMConstantPoolValue() {}
-static bool CPV_streq(const char *S1, const char *S2) {
- if (S1 == S2)
- return true;
- if (S1 && S2 && strcmp(S1, S2) == 0)
- return true;
- return false;
+const char *ARMConstantPoolValue::getModifierText() const {
+ switch (Modifier) {
+ default: llvm_unreachable("Unknown modifier!");
+ // FIXME: Are these case sensitive? It'd be nice to lower-case all the
+ // strings if that's legal.
+ case ARMCP::no_modifier: return "none";
+ case ARMCP::TLSGD: return "tlsgd";
+ case ARMCP::GOT: return "GOT";
+ case ARMCP::GOTOFF: return "GOTOFF";
+ case ARMCP::GOTTPOFF: return "gottpoff";
+ case ARMCP::TPOFF: return "tpoff";
+ }
}
int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) {
- unsigned AlignMask = Alignment - 1;
- const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
- for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
- if (Constants[i].isMachineConstantPoolEntry() &&
- (Constants[i].getAlignment() & AlignMask) == 0) {
- ARMConstantPoolValue *CPV =
- (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
- if (CPV->CVal == CVal &&
- CPV->LabelId == LabelId &&
- CPV->PCAdjust == PCAdjust &&
- CPV_streq(CPV->S, S) &&
- CPV->Modifier == Modifier)
- return i;
- }
- }
-
+ assert(false && "Shouldn't be calling this directly!");
return -1;
}
-ARMConstantPoolValue::~ARMConstantPoolValue() {
- free((void*)S);
-}
-
void
-ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) {
- ID.AddPointer(CVal);
- ID.AddPointer(S);
+ARMConstantPoolValue::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
ID.AddInteger(LabelId);
ID.AddInteger(PCAdjust);
}
@@ -97,9 +75,7 @@ ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) {
bool
ARMConstantPoolValue::hasSameValue(ARMConstantPoolValue *ACPV) {
if (ACPV->Kind == Kind &&
- ACPV->CVal == CVal &&
ACPV->PCAdjust == PCAdjust &&
- CPV_streq(ACPV->S, S) &&
ACPV->Modifier == Modifier) {
if (ACPV->LabelId == LabelId)
return true;
@@ -115,12 +91,7 @@ void ARMConstantPoolValue::dump() const {
errs() << " " << *this;
}
-
void ARMConstantPoolValue::print(raw_ostream &O) const {
- if (CVal)
- O << CVal->getName();
- else
- O << S;
if (Modifier) O << "(" << getModifierText() << ")";
if (PCAdjust != 0) {
O << "-(LPC" << LabelId << "+" << (unsigned)PCAdjust;
@@ -128,3 +99,221 @@ void ARMConstantPoolValue::print(raw_ostream &O) const {
O << ")";
}
}
+
+//===----------------------------------------------------------------------===//
+// ARMConstantPoolConstant
+//===----------------------------------------------------------------------===//
+
+ARMConstantPoolConstant::ARMConstantPoolConstant(Type *Ty,
+ const Constant *C,
+ unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress)
+ : ARMConstantPoolValue(Ty, ID, Kind, PCAdj, Modifier, AddCurrentAddress),
+ CVal(C) {}
+
+ARMConstantPoolConstant::ARMConstantPoolConstant(const Constant *C,
+ unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress)
+ : ARMConstantPoolValue((Type*)C->getType(), ID, Kind, PCAdj, Modifier,
+ AddCurrentAddress),
+ CVal(C) {}
+
+ARMConstantPoolConstant *
+ARMConstantPoolConstant::Create(const Constant *C, unsigned ID) {
+ return new ARMConstantPoolConstant(C, ID, ARMCP::CPValue, 0,
+ ARMCP::no_modifier, false);
+}
+
+ARMConstantPoolConstant *
+ARMConstantPoolConstant::Create(const GlobalValue *GV,
+ ARMCP::ARMCPModifier Modifier) {
+ return new ARMConstantPoolConstant((Type*)Type::getInt32Ty(GV->getContext()),
+ GV, 0, ARMCP::CPValue, 0,
+ Modifier, false);
+}
+
+ARMConstantPoolConstant *
+ARMConstantPoolConstant::Create(const Constant *C, unsigned ID,
+ ARMCP::ARMCPKind Kind, unsigned char PCAdj) {
+ return new ARMConstantPoolConstant(C, ID, Kind, PCAdj,
+ ARMCP::no_modifier, false);
+}
+
+ARMConstantPoolConstant *
+ARMConstantPoolConstant::Create(const Constant *C, unsigned ID,
+ ARMCP::ARMCPKind Kind, unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress) {
+ return new ARMConstantPoolConstant(C, ID, Kind, PCAdj, Modifier,
+ AddCurrentAddress);
+}
+
+const GlobalValue *ARMConstantPoolConstant::getGV() const {
+ return dyn_cast_or_null<GlobalValue>(CVal);
+}
+
+const BlockAddress *ARMConstantPoolConstant::getBlockAddress() const {
+ return dyn_cast_or_null<BlockAddress>(CVal);
+}
+
+int ARMConstantPoolConstant::getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) {
+ unsigned AlignMask = Alignment - 1;
+ const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ if (Constants[i].isMachineConstantPoolEntry() &&
+ (Constants[i].getAlignment() & AlignMask) == 0) {
+ ARMConstantPoolValue *CPV =
+ (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
+ ARMConstantPoolConstant *APC = dyn_cast<ARMConstantPoolConstant>(CPV);
+ if (!APC) continue;
+ if (APC->CVal == CVal && equals(APC))
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+bool ARMConstantPoolConstant::hasSameValue(ARMConstantPoolValue *ACPV) {
+ const ARMConstantPoolConstant *ACPC = dyn_cast<ARMConstantPoolConstant>(ACPV);
+ return ACPC && ACPC->CVal == CVal && ARMConstantPoolValue::hasSameValue(ACPV);
+}
+
+void ARMConstantPoolConstant::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddPointer(CVal);
+ ARMConstantPoolValue::addSelectionDAGCSEId(ID);
+}
+
+void ARMConstantPoolConstant::print(raw_ostream &O) const {
+ O << CVal->getName();
+ ARMConstantPoolValue::print(O);
+}
+
+//===----------------------------------------------------------------------===//
+// ARMConstantPoolSymbol
+//===----------------------------------------------------------------------===//
+
+ARMConstantPoolSymbol::ARMConstantPoolSymbol(LLVMContext &C, const char *s,
+ unsigned id,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress)
+ : ARMConstantPoolValue(C, id, ARMCP::CPExtSymbol, PCAdj, Modifier,
+ AddCurrentAddress),
+ S(strdup(s)) {}
+
+ARMConstantPoolSymbol::~ARMConstantPoolSymbol() {
+ free((void*)S);
+}
+
+ARMConstantPoolSymbol *
+ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s,
+ unsigned ID, unsigned char PCAdj) {
+ return new ARMConstantPoolSymbol(C, s, ID, PCAdj, ARMCP::no_modifier, false);
+}
+
+static bool CPV_streq(const char *S1, const char *S2) {
+ if (S1 == S2)
+ return true;
+ if (S1 && S2 && strcmp(S1, S2) == 0)
+ return true;
+ return false;
+}
+
+int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) {
+ unsigned AlignMask = Alignment - 1;
+ const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ if (Constants[i].isMachineConstantPoolEntry() &&
+ (Constants[i].getAlignment() & AlignMask) == 0) {
+ ARMConstantPoolValue *CPV =
+ (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
+ ARMConstantPoolSymbol *APS = dyn_cast<ARMConstantPoolSymbol>(CPV);
+ if (!APS) continue;
+
+ if (CPV_streq(APS->S, S) && equals(APS))
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+bool ARMConstantPoolSymbol::hasSameValue(ARMConstantPoolValue *ACPV) {
+ const ARMConstantPoolSymbol *ACPS = dyn_cast<ARMConstantPoolSymbol>(ACPV);
+ return ACPS && CPV_streq(ACPS->S, S) &&
+ ARMConstantPoolValue::hasSameValue(ACPV);
+}
+
+void ARMConstantPoolSymbol::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddPointer(S);
+ ARMConstantPoolValue::addSelectionDAGCSEId(ID);
+}
+
+void ARMConstantPoolSymbol::print(raw_ostream &O) const {
+ O << S;
+ ARMConstantPoolValue::print(O);
+}
+
+//===----------------------------------------------------------------------===//
+// ARMConstantPoolMBB
+//===----------------------------------------------------------------------===//
+
+ARMConstantPoolMBB::ARMConstantPoolMBB(LLVMContext &C,
+ const MachineBasicBlock *mbb,
+ unsigned id, unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress)
+ : ARMConstantPoolValue(C, id, ARMCP::CPMachineBasicBlock, PCAdj,
+ Modifier, AddCurrentAddress),
+ MBB(mbb) {}
+
+ARMConstantPoolMBB *ARMConstantPoolMBB::Create(LLVMContext &C,
+ const MachineBasicBlock *mbb,
+ unsigned ID,
+ unsigned char PCAdj) {
+ return new ARMConstantPoolMBB(C, mbb, ID, PCAdj, ARMCP::no_modifier, false);
+}
+
+int ARMConstantPoolMBB::getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) {
+ unsigned AlignMask = Alignment - 1;
+ const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ if (Constants[i].isMachineConstantPoolEntry() &&
+ (Constants[i].getAlignment() & AlignMask) == 0) {
+ ARMConstantPoolValue *CPV =
+ (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
+ ARMConstantPoolMBB *APMBB = dyn_cast<ARMConstantPoolMBB>(CPV);
+ if (!APMBB) continue;
+
+ if (APMBB->MBB == MBB && equals(APMBB))
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+bool ARMConstantPoolMBB::hasSameValue(ARMConstantPoolValue *ACPV) {
+ const ARMConstantPoolMBB *ACPMBB = dyn_cast<ARMConstantPoolMBB>(ACPV);
+ return ACPMBB && ACPMBB->MBB == MBB &&
+ ARMConstantPoolValue::hasSameValue(ACPV);
+}
+
+void ARMConstantPoolMBB::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddPointer(MBB);
+ ARMConstantPoolValue::addSelectionDAGCSEId(ID);
+}
+
+void ARMConstantPoolMBB::print(raw_ostream &O) const {
+ ARMConstantPoolValue::print(O);
+}
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index d008811..0d0def3 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -20,17 +20,19 @@
namespace llvm {
-class Constant;
class BlockAddress;
+class Constant;
class GlobalValue;
class LLVMContext;
+class MachineBasicBlock;
namespace ARMCP {
enum ARMCPKind {
CPValue,
CPExtSymbol,
CPBlockAddress,
- CPLSDA
+ CPLSDA,
+ CPMachineBasicBlock
};
enum ARMCPModifier {
@@ -47,8 +49,6 @@ namespace ARMCP {
/// represent PC-relative displacement between the address of the load
/// instruction and the constant being loaded, i.e. (&GV-(LPIC+8)).
class ARMConstantPoolValue : public MachineConstantPoolValue {
- const Constant *CVal; // Constant being loaded.
- const char *S; // ExtSymbol being loaded.
unsigned LabelId; // Label id of the load.
ARMCP::ARMCPKind Kind; // Kind of constant.
unsigned char PCAdjust; // Extra adjustment if constantpool is pc-relative.
@@ -56,60 +56,54 @@ class ARMConstantPoolValue : public MachineConstantPoolValue {
ARMCP::ARMCPModifier Modifier; // GV modifier i.e. (&GV(modifier)-(LPIC+8))
bool AddCurrentAddress;
+protected:
+ ARMConstantPoolValue(Type *Ty, unsigned id, ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj, ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+
+ ARMConstantPoolValue(LLVMContext &C, unsigned id, ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj, ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
public:
- ARMConstantPoolValue(const Constant *cval, unsigned id,
- ARMCP::ARMCPKind Kind = ARMCP::CPValue,
- unsigned char PCAdj = 0,
- ARMCP::ARMCPModifier Modifier = ARMCP::no_modifier,
- bool AddCurrentAddress = false);
- ARMConstantPoolValue(LLVMContext &C, const char *s, unsigned id,
- unsigned char PCAdj = 0,
- ARMCP::ARMCPModifier Modifier = ARMCP::no_modifier,
- bool AddCurrentAddress = false);
- ARMConstantPoolValue(const GlobalValue *GV, ARMCP::ARMCPModifier Modifier);
- ARMConstantPoolValue();
- ~ARMConstantPoolValue();
+ virtual ~ARMConstantPoolValue();
- const GlobalValue *getGV() const;
- const char *getSymbol() const { return S; }
- const BlockAddress *getBlockAddress() const;
ARMCP::ARMCPModifier getModifier() const { return Modifier; }
- const char *getModifierText() const {
- switch (Modifier) {
- default: llvm_unreachable("Unknown modifier!");
- // FIXME: Are these case sensitive? It'd be nice to lower-case all the
- // strings if that's legal.
- case ARMCP::no_modifier: return "none";
- case ARMCP::TLSGD: return "tlsgd";
- case ARMCP::GOT: return "GOT";
- case ARMCP::GOTOFF: return "GOTOFF";
- case ARMCP::GOTTPOFF: return "gottpoff";
- case ARMCP::TPOFF: return "tpoff";
- }
- }
+ const char *getModifierText() const;
bool hasModifier() const { return Modifier != ARMCP::no_modifier; }
+
bool mustAddCurrentAddress() const { return AddCurrentAddress; }
+
unsigned getLabelId() const { return LabelId; }
unsigned char getPCAdjustment() const { return PCAdjust; }
+
bool isGlobalValue() const { return Kind == ARMCP::CPValue; }
bool isExtSymbol() const { return Kind == ARMCP::CPExtSymbol; }
- bool isBlockAddress() { return Kind == ARMCP::CPBlockAddress; }
- bool isLSDA() { return Kind == ARMCP::CPLSDA; }
+ bool isBlockAddress() const { return Kind == ARMCP::CPBlockAddress; }
+ bool isLSDA() const { return Kind == ARMCP::CPLSDA; }
+ bool isMachineBasicBlock() const{ return Kind == ARMCP::CPMachineBasicBlock; }
virtual unsigned getRelocationInfo() const { return 2; }
virtual int getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment);
- virtual void AddSelectionDAGCSEId(FoldingSetNodeID &ID);
+ virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID);
- /// hasSameValue - Return true if this ARM constpool value
- /// can share the same constantpool entry as another ARM constpool value.
- bool hasSameValue(ARMConstantPoolValue *ACPV);
+ /// hasSameValue - Return true if this ARM constpool value can share the same
+ /// constantpool entry as another ARM constpool value.
+ virtual bool hasSameValue(ARMConstantPoolValue *ACPV);
+
+ bool equals(const ARMConstantPoolValue *A) const {
+ return this->LabelId == A->LabelId &&
+ this->PCAdjust == A->PCAdjust &&
+ this->Modifier == A->Modifier;
+ }
+ virtual void print(raw_ostream &O) const;
void print(raw_ostream *O) const { if (O) print(*O); }
- void print(raw_ostream &O) const;
void dump() const;
+
+ static bool classof(const ARMConstantPoolValue *) { return true; }
};
inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) {
@@ -117,6 +111,123 @@ inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) {
return O;
}
+/// ARMConstantPoolConstant - ARM-specific constant pool values for Constants,
+/// Functions, and BlockAddresses.
+class ARMConstantPoolConstant : public ARMConstantPoolValue {
+ const Constant *CVal; // Constant being loaded.
+
+ ARMConstantPoolConstant(const Constant *C,
+ unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+ ARMConstantPoolConstant(Type *Ty, const Constant *C,
+ unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+
+public:
+ static ARMConstantPoolConstant *Create(const Constant *C, unsigned ID);
+ static ARMConstantPoolConstant *Create(const GlobalValue *GV,
+ ARMCP::ARMCPModifier Modifier);
+ static ARMConstantPoolConstant *Create(const Constant *C, unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj);
+ static ARMConstantPoolConstant *Create(const Constant *C, unsigned ID,
+ ARMCP::ARMCPKind Kind,
+ unsigned char PCAdj,
+ ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+
+ const GlobalValue *getGV() const;
+ const BlockAddress *getBlockAddress() const;
+
+ virtual int getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment);
+
+ /// hasSameValue - Return true if this ARM constpool value can share the same
+ /// constantpool entry as another ARM constpool value.
+ virtual bool hasSameValue(ARMConstantPoolValue *ACPV);
+
+ virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID);
+
+ virtual void print(raw_ostream &O) const;
+ static bool classof(const ARMConstantPoolValue *APV) {
+ return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA();
+ }
+ static bool classof(const ARMConstantPoolConstant *) { return true; }
+};
+
+/// ARMConstantPoolSymbol - ARM-specific constantpool values for external
+/// symbols.
+class ARMConstantPoolSymbol : public ARMConstantPoolValue {
+ const char *S; // ExtSymbol being loaded.
+
+ ARMConstantPoolSymbol(LLVMContext &C, const char *s, unsigned id,
+ unsigned char PCAdj, ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+
+public:
+ ~ARMConstantPoolSymbol();
+
+ static ARMConstantPoolSymbol *Create(LLVMContext &C, const char *s,
+ unsigned ID, unsigned char PCAdj);
+
+ const char *getSymbol() const { return S; }
+
+ virtual int getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment);
+
+ virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID);
+
+ /// hasSameValue - Return true if this ARM constpool value can share the same
+ /// constantpool entry as another ARM constpool value.
+ virtual bool hasSameValue(ARMConstantPoolValue *ACPV);
+
+ virtual void print(raw_ostream &O) const;
+
+ static bool classof(const ARMConstantPoolValue *ACPV) {
+ return ACPV->isExtSymbol();
+ }
+ static bool classof(const ARMConstantPoolSymbol *) { return true; }
+};
+
+/// ARMConstantPoolMBB - ARM-specific constantpool value of a machine basic
+/// block.
+class ARMConstantPoolMBB : public ARMConstantPoolValue {
+ const MachineBasicBlock *MBB; // Machine basic block.
+
+ ARMConstantPoolMBB(LLVMContext &C, const MachineBasicBlock *mbb, unsigned id,
+ unsigned char PCAdj, ARMCP::ARMCPModifier Modifier,
+ bool AddCurrentAddress);
+
+public:
+ static ARMConstantPoolMBB *Create(LLVMContext &C,
+ const MachineBasicBlock *mbb,
+ unsigned ID, unsigned char PCAdj);
+
+ const MachineBasicBlock *getMBB() const { return MBB; }
+
+ virtual int getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment);
+
+ virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID);
+
+ /// hasSameValue - Return true if this ARM constpool value can share the same
+ /// constantpool entry as another ARM constpool value.
+ virtual bool hasSameValue(ARMConstantPoolValue *ACPV);
+
+ virtual void print(raw_ostream &O) const;
+
+ static bool classof(const ARMConstantPoolValue *ACPV) {
+ return ACPV->isMachineBasicBlock();
+ }
+ static bool classof(const ARMConstantPoolMBB *) { return true; }
+};
+
} // End llvm namespace
#endif
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 94b72fd..7872cb9 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -16,19 +16,24 @@
#define DEBUG_TYPE "arm-pseudo"
#include "ARM.h"
-#include "ARMAddressingModes.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMRegisterInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove!
using namespace llvm;
+static cl::opt<bool>
+VerifyARMPseudo("verify-arm-pseudo-expand", cl::Hidden,
+ cl::desc("Verify machine code after expanding ARM pseudos"));
+
namespace {
class ARMExpandPseudo : public MachineFunctionPass {
public:
@@ -741,8 +746,22 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MI.eraseFromParent();
return true;
}
- case ARM::MOVCCs: {
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs),
+ case ARM::MOVCCsi: {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
+ (MI.getOperand(1).getReg()))
+ .addReg(MI.getOperand(2).getReg(),
+ getKillRegState(MI.getOperand(2).isKill()))
+ .addImm(MI.getOperand(3).getImm())
+ .addImm(MI.getOperand(4).getImm()) // 'pred'
+ .addReg(MI.getOperand(5).getReg())
+ .addReg(0); // 's' bit
+
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::MOVCCsr: {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr),
(MI.getOperand(1).getReg()))
.addReg(MI.getOperand(2).getReg(),
getKillRegState(MI.getOperand(2).isKill()))
@@ -837,10 +856,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::MOVsrl_flag:
case ARM::MOVsra_flag: {
// These are just fancy MOVs insructions.
- AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs),
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
MI.getOperand(0).getReg())
.addOperand(MI.getOperand(1))
- .addReg(0)
.addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ?
ARM_AM::lsr : ARM_AM::asr),
1)))
@@ -851,10 +869,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::RRX: {
// This encodes as "MOVs Rd, Rm, rrx
MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs),
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),TII->get(ARM::MOVsi),
MI.getOperand(0).getReg())
.addOperand(MI.getOperand(1))
- .addOperand(MI.getOperand(1))
.addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0)))
.addReg(0);
TransferImpOps(MI, MIB, MIB);
@@ -953,34 +970,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
ExpandMOV32BitImm(MBB, MBBI);
return true;
- case ARM::VMOVQQ: {
- unsigned DstReg = MI.getOperand(0).getReg();
- bool DstIsDead = MI.getOperand(0).isDead();
- unsigned EvenDst = TRI->getSubReg(DstReg, ARM::qsub_0);
- unsigned OddDst = TRI->getSubReg(DstReg, ARM::qsub_1);
- unsigned SrcReg = MI.getOperand(1).getReg();
- bool SrcIsKill = MI.getOperand(1).isKill();
- unsigned EvenSrc = TRI->getSubReg(SrcReg, ARM::qsub_0);
- unsigned OddSrc = TRI->getSubReg(SrcReg, ARM::qsub_1);
- MachineInstrBuilder Even =
- AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get(ARM::VORRq))
- .addReg(EvenDst,
- RegState::Define | getDeadRegState(DstIsDead))
- .addReg(EvenSrc, getKillRegState(SrcIsKill))
- .addReg(EvenSrc, getKillRegState(SrcIsKill)));
- MachineInstrBuilder Odd =
- AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get(ARM::VORRq))
- .addReg(OddDst,
- RegState::Define | getDeadRegState(DstIsDead))
- .addReg(OddSrc, getKillRegState(SrcIsKill))
- .addReg(OddSrc, getKillRegState(SrcIsKill)));
- TransferImpOps(MI, Even, Odd);
- MI.eraseFromParent();
- return true;
- }
-
case ARM::VLDMQIA: {
unsigned NewOpc = ARM::VLDMDIA;
MachineInstrBuilder MIB =
@@ -1316,6 +1305,8 @@ bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
++MFI)
Modified |= ExpandMBB(*MFI);
+ if (VerifyARMPseudo)
+ MF.verify(this, "After expanding ARM pseudo instructions.");
return Modified;
}
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index f469d7e..9bc7ef2 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -14,13 +14,13 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
-#include "ARMAddressingModes.h"
#include "ARMBaseInstrInfo.h"
#include "ARMCallingConv.h"
#include "ARMRegisterInfo.h"
#include "ARMTargetMachine.h"
#include "ARMSubtarget.h"
#include "ARMConstantPoolValue.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CallingConv.h"
#include "llvm/DerivedTypes.h"
#include "llvm/GlobalVariable.h"
@@ -171,8 +171,8 @@ class ARMFastISel : public FastISel {
// Utility routines.
private:
- bool isTypeLegal(const Type *Ty, MVT &VT);
- bool isLoadTypeLegal(const Type *Ty, MVT &VT);
+ bool isTypeLegal(Type *Ty, MVT &VT);
+ bool isLoadTypeLegal(Type *Ty, MVT &VT);
bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr);
bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr);
bool ARMComputeAddress(const Value *Obj, Address &Addr);
@@ -502,11 +502,19 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) {
// This checks to see if we can use VFP3 instructions to materialize
// a constant, otherwise we have to go through the constant pool.
if (TLI.isFPImmLegal(Val, VT)) {
- unsigned Opc = is64bit ? ARM::FCONSTD : ARM::FCONSTS;
+ int Imm;
+ unsigned Opc;
+ if (is64bit) {
+ Imm = ARM_AM::getFP64Imm(Val);
+ Opc = ARM::FCONSTD;
+ } else {
+ Imm = ARM_AM::getFP32Imm(Val);
+ Opc = ARM::FCONSTS;
+ }
unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
DestReg)
- .addFPImm(CFP));
+ .addImm(Imm));
return DestReg;
}
@@ -590,8 +598,9 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
// Grab index.
unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8);
unsigned Id = AFI->createPICLabelUId();
- ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, Id,
- ARMCP::CPValue, PCAdj);
+ ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id,
+ ARMCP::CPValue,
+ PCAdj);
unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
// Load value.
@@ -615,8 +624,8 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) {
unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
if (isThumb)
- MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::t2LDRi12),
- NewDestReg)
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::t2LDRi12), NewDestReg)
.addReg(DestReg)
.addImm(0);
else
@@ -673,7 +682,7 @@ unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
return 0;
}
-bool ARMFastISel::isTypeLegal(const Type *Ty, MVT &VT) {
+bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) {
EVT evt = TLI.getValueType(Ty, true);
// Only handle simple types.
@@ -685,7 +694,7 @@ bool ARMFastISel::isTypeLegal(const Type *Ty, MVT &VT) {
return TLI.isTypeLegal(VT);
}
-bool ARMFastISel::isLoadTypeLegal(const Type *Ty, MVT &VT) {
+bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
if (isTypeLegal(Ty, VT)) return true;
// If this is a type than can be sign or zero-extended to a basic operation
@@ -714,7 +723,7 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
U = C;
}
- if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
+ if (PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
if (Ty->getAddressSpace() > 255)
// Fast instruction selection doesn't support the special
// address spaces.
@@ -749,7 +758,7 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
i != e; ++i, ++GTI) {
const Value *Op = *i;
- if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
const StructLayout *SL = TD.getStructLayout(STy);
unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
TmpOffset += SL->getElementOffset(Idx);
@@ -946,6 +955,10 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) {
}
bool ARMFastISel::SelectLoad(const Instruction *I) {
+ // Atomic loads need special handling.
+ if (cast<LoadInst>(I)->isAtomic())
+ return false;
+
// Verify we have a legal type before going any further.
MVT VT;
if (!isLoadTypeLegal(I->getType(), VT))
@@ -1008,6 +1021,10 @@ bool ARMFastISel::SelectStore(const Instruction *I) {
Value *Op0 = I->getOperand(0);
unsigned SrcReg = 0;
+ // Atomic stores need special handling.
+ if (cast<StoreInst>(I)->isAtomic())
+ return false;
+
// Verify we have a legal type before going any further.
MVT VT;
if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
@@ -1085,7 +1102,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
// TODO: Factor this out.
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
MVT SourceVT;
- const Type *Ty = CI->getOperand(0)->getType();
+ Type *Ty = CI->getOperand(0)->getType();
if (CI->hasOneUse() && (CI->getParent() == I->getParent())
&& isTypeLegal(Ty, SourceVT)) {
bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
@@ -1201,7 +1218,7 @@ bool ARMFastISel::SelectCmp(const Instruction *I) {
const CmpInst *CI = cast<CmpInst>(I);
MVT VT;
- const Type *Ty = CI->getOperand(0)->getType();
+ Type *Ty = CI->getOperand(0)->getType();
if (!isTypeLegal(Ty, VT))
return false;
@@ -1309,7 +1326,7 @@ bool ARMFastISel::SelectSIToFP(const Instruction *I) {
if (!Subtarget->hasVFP2()) return false;
MVT DstVT;
- const Type *Ty = I->getType();
+ Type *Ty = I->getType();
if (!isTypeLegal(Ty, DstVT))
return false;
@@ -1328,7 +1345,7 @@ bool ARMFastISel::SelectSIToFP(const Instruction *I) {
unsigned Opc;
if (Ty->isFloatTy()) Opc = ARM::VSITOS;
else if (Ty->isDoubleTy()) Opc = ARM::VSITOD;
- else return 0;
+ else return false;
unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
@@ -1343,7 +1360,7 @@ bool ARMFastISel::SelectFPToSI(const Instruction *I) {
if (!Subtarget->hasVFP2()) return false;
MVT DstVT;
- const Type *RetTy = I->getType();
+ Type *RetTy = I->getType();
if (!isTypeLegal(RetTy, DstVT))
return false;
@@ -1351,10 +1368,10 @@ bool ARMFastISel::SelectFPToSI(const Instruction *I) {
if (Op == 0) return false;
unsigned Opc;
- const Type *OpTy = I->getOperand(0)->getType();
+ Type *OpTy = I->getOperand(0)->getType();
if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS;
else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD;
- else return 0;
+ else return false;
// f64->s32 or f32->s32 both need an intermediate f32 reg.
unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
@@ -1401,7 +1418,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
bool ARMFastISel::SelectSDiv(const Instruction *I) {
MVT VT;
- const Type *Ty = I->getType();
+ Type *Ty = I->getType();
if (!isTypeLegal(Ty, VT))
return false;
@@ -1429,7 +1446,7 @@ bool ARMFastISel::SelectSDiv(const Instruction *I) {
bool ARMFastISel::SelectSRem(const Instruction *I) {
MVT VT;
- const Type *Ty = I->getType();
+ Type *Ty = I->getType();
if (!isTypeLegal(Ty, VT))
return false;
@@ -1456,7 +1473,7 @@ bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
// operations, but can't figure out how to. Just use the vfp instructions
// if we have them.
// FIXME: It'd be nice to use NEON instructions.
- const Type *Ty = I->getType();
+ Type *Ty = I->getType();
bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
if (isFloat && !Subtarget->hasVFP2())
return false;
@@ -1711,7 +1728,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
- CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, I->getContext());
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,I->getContext());
CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */));
const Value *RV = Ret->getOperand(0);
@@ -1778,7 +1795,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
CallingConv::ID CC = TLI.getLibcallCallingConv(Call);
// Handle *simple* calls for now.
- const Type *RetTy = I->getType();
+ Type *RetTy = I->getType();
MVT RetVT;
if (RetTy->isVoidTy())
RetVT = MVT::isVoid;
@@ -1802,7 +1819,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
unsigned Arg = getRegForValue(Op);
if (Arg == 0) return false;
- const Type *ArgTy = Op->getType();
+ Type *ArgTy = Op->getType();
MVT ArgVT;
if (!isTypeLegal(ArgTy, ArgVT)) return false;
@@ -1870,13 +1887,13 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
// TODO: Avoid some calling conventions?
// Let SDISel handle vararg functions.
- const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
- const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ FunctionType *FTy = cast<FunctionType>(PT->getElementType());
if (FTy->isVarArg())
return false;
// Handle *simple* calls for now.
- const Type *RetTy = I->getType();
+ Type *RetTy = I->getType();
MVT RetVT;
if (RetTy->isVoidTy())
RetVT = MVT::isVoid;
@@ -1915,7 +1932,7 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
CS.paramHasAttr(AttrInd, Attribute::ByVal))
return false;
- const Type *ArgTy = (*i)->getType();
+ Type *ArgTy = (*i)->getType();
MVT ArgVT;
if (!isTypeLegal(ArgTy, ArgVT))
return false;
@@ -1969,9 +1986,9 @@ bool ARMFastISel::SelectIntCast(const Instruction *I) {
// On ARM, in general, integer casts don't involve legal types; this code
// handles promotable integers. The high bits for a type smaller than
// the register size are assumed to be undefined.
- const Type *DestTy = I->getType();
+ Type *DestTy = I->getType();
Value *Op = I->getOperand(0);
- const Type *SrcTy = Op->getType();
+ Type *SrcTy = Op->getType();
EVT SrcVT, DestVT;
SrcVT = TLI.getValueType(SrcTy, true);
@@ -2002,16 +2019,18 @@ bool ARMFastISel::SelectIntCast(const Instruction *I) {
switch (SrcVT.getSimpleVT().SimpleTy) {
default: return false;
case MVT::i16:
+ if (!Subtarget->hasV6Ops()) return false;
if (isZext)
- Opc = isThumb ? ARM::t2UXTHr : ARM::UXTHr;
+ Opc = isThumb ? ARM::t2UXTH : ARM::UXTH;
else
- Opc = isThumb ? ARM::t2SXTHr : ARM::SXTHr;
+ Opc = isThumb ? ARM::t2SXTH : ARM::SXTH;
break;
case MVT::i8:
+ if (!Subtarget->hasV6Ops()) return false;
if (isZext)
- Opc = isThumb ? ARM::t2UXTBr : ARM::UXTBr;
+ Opc = isThumb ? ARM::t2UXTB : ARM::UXTB;
else
- Opc = isThumb ? ARM::t2SXTBr : ARM::SXTBr;
+ Opc = isThumb ? ARM::t2SXTB : ARM::SXTB;
break;
case MVT::i1:
if (isZext) {
@@ -2033,6 +2052,8 @@ bool ARMFastISel::SelectIntCast(const Instruction *I) {
.addReg(SrcReg);
if (isBoolZext)
MIB.addImm(1);
+ else
+ MIB.addImm(0);
AddOptionalDefs(MIB);
UpdateValueMap(I, DestReg);
return true;
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 381b404..2d1de6f 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -12,10 +12,10 @@
//===----------------------------------------------------------------------===//
#include "ARMFrameLowering.h"
-#include "ARMAddressingModes.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMMachineFunctionInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -93,7 +93,8 @@ static bool isCSRestore(MachineInstr *MI,
return false;
return true;
}
- if ((MI->getOpcode() == ARM::LDR_POST ||
+ if ((MI->getOpcode() == ARM::LDR_POST_IMM ||
+ MI->getOpcode() == ARM::LDR_POST_REG ||
MI->getOpcode() == ARM::t2LDR_POST) &&
isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) &&
MI->getOperand(1).getReg() == ARM::SP)
@@ -413,6 +414,9 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
MIB.addExternalSymbol(JumpTarget.getSymbolName(),
JumpTarget.getTargetFlags());
}
+
+ // Add the default predicate in Thumb mode.
+ if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0);
} else if (RetOpcode == ARM::TCRETURNri) {
BuildMI(MBB, MBBI, dl,
TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
@@ -502,7 +506,7 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
}
}
} else if (AFI->isThumb2Function()) {
- // Use add <rd>, sp, #<imm8>
+ // Use add <rd>, sp, #<imm8>
// ldr <rd>, [sp, #<imm8>]
// if at all possible to save space.
if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
@@ -587,14 +591,8 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc),
ARM::SP)
.addReg(Regs[0].first, getKillRegState(Regs[0].second))
- .addReg(ARM::SP).setMIFlags(MIFlags);
- // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
- // that refactoring is complete (eventually).
- if (StrOpc == ARM::STR_PRE) {
- MIB.addReg(0);
- MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::sub, 4, ARM_AM::no_shift));
- } else
- MIB.addImm(-4);
+ .addReg(ARM::SP).setMIFlags(MIFlags)
+ .addImm(-4);
AddDefaultPred(MIB);
}
Regs.clear();
@@ -651,8 +649,10 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
.addReg(ARM::SP));
for (unsigned i = 0, e = Regs.size(); i < e; ++i)
MIB.addReg(Regs[i], getDefRegState(true));
- if (DeleteRet)
+ if (DeleteRet) {
+ MIB->copyImplicitOps(&*MI);
MI->eraseFromParent();
+ }
MI = MIB;
} else if (Regs.size() == 1) {
// If we adjusted the reg to PC from LR above, switch it back here. We
@@ -665,7 +665,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
.addReg(ARM::SP);
// ARM mode needs an extra reg0 here due to addrmode2. Will go away once
// that refactoring is complete (eventually).
- if (LdrOpc == ARM::LDR_POST) {
+ if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
MIB.addReg(0);
MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift));
} else
@@ -687,7 +687,8 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
- unsigned PushOneOpc = AFI->isThumbFunction() ? ARM::t2STR_PRE : ARM::STR_PRE;
+ unsigned PushOneOpc = AFI->isThumbFunction() ?
+ ARM::t2STR_PRE : ARM::STR_PRE_IMM;
unsigned FltOpc = ARM::VSTMDDB_UPD;
emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register,
MachineInstr::FrameSetup);
@@ -711,7 +712,7 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
- unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST;
+ unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;
unsigned FltOpc = ARM::VLDMDIA_UPD;
emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register);
emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
diff --git a/lib/Target/ARM/ARMGlobalMerge.cpp b/lib/Target/ARM/ARMGlobalMerge.cpp
index 8d77b2d..5f863ea 100644
--- a/lib/Target/ARM/ARMGlobalMerge.cpp
+++ b/lib/Target/ARM/ARMGlobalMerge.cpp
@@ -100,8 +100,8 @@ namespace {
GlobalCmp(const TargetData *td) : TD(td) { }
bool operator()(const GlobalVariable *GV1, const GlobalVariable *GV2) {
- const Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
- const Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
+ Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
+ Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2));
}
@@ -123,7 +123,7 @@ bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
// FIXME: Find better heuristics
std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD));
- const Type *Int32Ty = Type::getInt32Ty(M.getContext());
+ Type *Int32Ty = Type::getInt32Ty(M.getContext());
for (size_t i = 0, e = Globals.size(); i != e; ) {
size_t j = 0;
@@ -150,7 +150,7 @@ bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
ConstantInt::get(Int32Ty, 0),
ConstantInt::get(Int32Ty, k-i)
};
- Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx, 2);
+ Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx);
Globals[k]->replaceAllUsesWith(GEP);
Globals[k]->eraseFromParent();
}
@@ -176,7 +176,7 @@ bool ARMGlobalMerge::doInitialization(Module &M) {
// Ignore fancy-aligned globals for now.
unsigned Alignment = I->getAlignment();
- const Type *Ty = I->getType()->getElementType();
+ Type *Ty = I->getType()->getElementType();
if (Alignment > TD->getABITypeAlignment(Ty))
continue;
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 2c9481b..5ee009c 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -14,8 +14,8 @@
#define DEBUG_TYPE "arm-isel"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
-#include "ARMAddressingModes.h"
#include "ARMTargetMachine.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
@@ -47,6 +47,11 @@ CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
cl::desc("Check fp vmla / vmls hazard at isel time"),
cl::init(true));
+static cl::opt<bool>
+DisableARMIntABS("disable-arm-int-abs", cl::Hidden,
+ cl::desc("Enable / disable ARM integer abs transform"),
+ cl::init(false));
+
//===--------------------------------------------------------------------===//
/// ARMDAGToDAGISel - ARM specific code to select ARM machine
/// instructions for SelectionDAG operations.
@@ -90,13 +95,20 @@ public:
bool hasNoVMLxHazardUse(SDNode *N) const;
bool isShifterOpProfitable(const SDValue &Shift,
ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
- bool SelectShifterOperandReg(SDValue N, SDValue &A,
+ bool SelectRegShifterOperand(SDValue N, SDValue &A,
SDValue &B, SDValue &C,
bool CheckProfitability = true);
- bool SelectShiftShifterOperandReg(SDValue N, SDValue &A,
+ bool SelectImmShifterOperand(SDValue N, SDValue &A,
+ SDValue &B, bool CheckProfitability = true);
+ bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
SDValue &B, SDValue &C) {
// Don't apply the profitability check
- return SelectShifterOperandReg(N, A, B, C, false);
+ return SelectRegShifterOperand(N, A, B, C, false);
+ }
+ bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
+ SDValue &B) {
+ // Don't apply the profitability check
+ return SelectImmShifterOperand(N, A, B, false);
}
bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
@@ -122,8 +134,13 @@ public:
return true;
}
- bool SelectAddrMode2Offset(SDNode *Op, SDValue N,
+ bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
bool SelectAddrMode3(SDValue N, SDValue &Base,
SDValue &Offset, SDValue &Opc);
bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
@@ -240,8 +257,13 @@ private:
ARMCC::CondCodes CCVal, SDValue CCR,
SDValue InFlag);
+ // Select special operations if node forms integer ABS pattern
+ SDNode *SelectABSOp(SDNode *N);
+
SDNode *SelectConcatVector(SDNode *N);
+ SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
+
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
@@ -291,10 +313,10 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
/// (N * Scale) where (N in [\arg RangeMin, \arg RangeMax).
///
/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
-static bool isScaledConstantInRange(SDValue Node, unsigned Scale,
+static bool isScaledConstantInRange(SDValue Node, int Scale,
int RangeMin, int RangeMax,
int &ScaledConstant) {
- assert(Scale && "Invalid scale!");
+ assert(Scale > 0 && "Invalid scale!");
// Check that this is a constant.
const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
@@ -365,7 +387,30 @@ bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
return ShOpcVal == ARM_AM::lsl && ShAmt == 2;
}
-bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,
+bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
+ SDValue &BaseReg,
+ SDValue &Opc,
+ bool CheckProfitability) {
+ if (DisableShifterOp)
+ return false;
+
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
+
+ // Don't match base register only case. That is matched to a separate
+ // lower complexity pattern with explicit register operand.
+ if (ShOpcVal == ARM_AM::no_shift) return false;
+
+ BaseReg = N.getOperand(0);
+ unsigned ShImmVal = 0;
+ ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!RHS) return false;
+ ShImmVal = RHS->getZExtValue() & 31;
+ Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
+ MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
SDValue &BaseReg,
SDValue &ShReg,
SDValue &Opc,
@@ -373,7 +418,7 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,
if (DisableShifterOp)
return false;
- ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
// Don't match base register only case. That is matched to a separate
// lower complexity pattern with explicit register operand.
@@ -381,19 +426,18 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,
BaseReg = N.getOperand(0);
unsigned ShImmVal = 0;
- if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- ShReg = CurDAG->getRegister(0, MVT::i32);
- ShImmVal = RHS->getZExtValue() & 31;
- } else {
- ShReg = N.getOperand(1);
- if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
- return false;
- }
+ ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (RHS) return false;
+
+ ShReg = N.getOperand(1);
+ if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
+ return false;
Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
MVT::i32);
return true;
}
+
bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
SDValue &Base,
SDValue &OffImm) {
@@ -483,13 +527,10 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
return false;
}
- if (Subtarget->isCortexA9() && !N.hasOneUse())
- // Compute R +/- (R << N) and reuse it.
- return false;
-
// Otherwise this is R +/- [possibly shifted] R.
ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
- ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
+ ARM_AM::ShiftOpc ShOpcVal =
+ ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
unsigned ShAmt = 0;
Base = N.getOperand(0);
@@ -515,16 +556,14 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
// Try matching (R shl C) + (R).
if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
!(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
- ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
+ ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
if (ShOpcVal != ARM_AM::no_shift) {
// Check to see if the RHS of the shift is a constant, if not, we can't
// fold it.
if (ConstantSDNode *Sh =
dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
ShAmt = Sh->getZExtValue();
- if (!Subtarget->isCortexA9() ||
- (N.hasOneUse() &&
- isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
+ if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
Offset = N.getOperand(0).getOperand(0);
Base = N.getOperand(1);
} else {
@@ -630,7 +669,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
// Otherwise this is R +/- [possibly shifted] R.
ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
- ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
+ ARM_AM::ShiftOpc ShOpcVal =
+ ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
unsigned ShAmt = 0;
Base = N.getOperand(0);
@@ -656,16 +696,14 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
// Try matching (R shl C) + (R).
if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
!(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
- ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
+ ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
if (ShOpcVal != ARM_AM::no_shift) {
// Check to see if the RHS of the shift is a constant, if not, we can't
// fold it.
if (ConstantSDNode *Sh =
dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
ShAmt = Sh->getZExtValue();
- if (!Subtarget->isCortexA9() ||
- (N.hasOneUse() &&
- isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
+ if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
Offset = N.getOperand(0).getOperand(0);
Base = N.getOperand(1);
} else {
@@ -683,7 +721,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
return AM2_SHOP;
}
-bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N,
+bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
SDValue &Offset, SDValue &Opc) {
unsigned Opcode = Op->getOpcode();
ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
@@ -692,16 +730,11 @@ bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N,
ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
? ARM_AM::add : ARM_AM::sub;
int Val;
- if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
- Offset = CurDAG->getRegister(0, MVT::i32);
- Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
- ARM_AM::no_shift),
- MVT::i32);
- return true;
- }
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
+ return false;
Offset = N;
- ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
unsigned ShAmt = 0;
if (ShOpcVal != ARM_AM::no_shift) {
// Check to see if the RHS of the shift is a constant, if not, we can't fold
@@ -724,6 +757,50 @@ bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N,
return true;
}
+bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc) {
+ unsigned Opcode = Op->getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+ ? ARM_AM::add : ARM_AM::sub;
+ int Val;
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
+ if (AddSub == ARM_AM::sub) Val *= -1;
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(Val, MVT::i32);
+ return true;
+ }
+
+ return false;
+}
+
+
+bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
+ SDValue &Offset, SDValue &Opc) {
+ unsigned Opcode = Op->getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+ ? ARM_AM::add : ARM_AM::sub;
+ int Val;
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return true;
+ }
+
+ return false;
+}
+
+bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
+ Base = N;
+ return true;
+}
bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
SDValue &Base, SDValue &Offset,
@@ -1079,7 +1156,7 @@ bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg,
if (DisableShifterOp)
return false;
- ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
// Don't match base register only case. That is matched to a separate
// lower complexity pattern with explicit register operand.
@@ -1208,21 +1285,15 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
return false;
}
- if (Subtarget->isCortexA9() && !N.hasOneUse()) {
- // Compute R + (R << [1,2,3]) and reuse it.
- Base = N;
- return false;
- }
-
// Look for (R + R) or (R + (R << [1,2,3])).
unsigned ShAmt = 0;
Base = N.getOperand(0);
OffReg = N.getOperand(1);
// Swap if it is ((R << c) + R).
- ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg);
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
if (ShOpcVal != ARM_AM::lsl) {
- ShOpcVal = ARM_AM::getShiftOpcForNode(Base);
+ ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
if (ShOpcVal == ARM_AM::lsl)
std::swap(Base, OffReg);
}
@@ -1266,10 +1337,19 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
unsigned Opcode = 0;
bool Match = false;
- if (LoadedVT == MVT::i32 &&
- SelectAddrMode2Offset(N, LD->getOffset(), Offset, AMOpc)) {
- Opcode = isPre ? ARM::LDR_PRE : ARM::LDR_POST;
+ if (LoadedVT == MVT::i32 && isPre &&
+ SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
+ Opcode = ARM::LDR_PRE_IMM;
+ Match = true;
+ } else if (LoadedVT == MVT::i32 && !isPre &&
+ SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
+ Opcode = ARM::LDR_POST_IMM;
Match = true;
+ } else if (LoadedVT == MVT::i32 &&
+ SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
+ Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
+ Match = true;
+
} else if (LoadedVT == MVT::i16 &&
SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
Match = true;
@@ -1283,20 +1363,37 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
}
} else {
- if (SelectAddrMode2Offset(N, LD->getOffset(), Offset, AMOpc)) {
+ if (isPre &&
+ SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = ARM::LDRB_PRE_IMM;
+ } else if (!isPre &&
+ SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = ARM::LDRB_POST_IMM;
+ } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
Match = true;
- Opcode = isPre ? ARM::LDRB_PRE : ARM::LDRB_POST;
+ Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
}
}
}
if (Match) {
- SDValue Chain = LD->getChain();
- SDValue Base = LD->getBasePtr();
- SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
- CurDAG->getRegister(0, MVT::i32), Chain };
- return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32,
- MVT::Other, Ops, 6);
+ if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[]= { Base, AMOpc, getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32), Chain };
+ return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32,
+ MVT::i32, MVT::Other, Ops, 5);
+ } else {
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32), Chain };
+ return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32,
+ MVT::i32, MVT::Other, Ops, 6);
+ }
}
return NULL;
@@ -1966,7 +2063,8 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
Srl_imm)) {
assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
- unsigned Width = CountTrailingOnes_32(And_imm);
+ // Note: The width operand is encoded as width-1.
+ unsigned Width = CountTrailingOnes_32(And_imm) - 1;
unsigned LSB = Srl_imm;
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0).getOperand(0),
@@ -1986,7 +2084,8 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
unsigned Srl_imm = 0;
if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
- unsigned Width = 32 - Srl_imm;
+ // Note: The width operand is encoded as width-1.
+ unsigned Width = 32 - Srl_imm - 1;
int LSB = Srl_imm - Shl_imm;
if (LSB < 0)
return NULL;
@@ -2034,10 +2133,16 @@ SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
SDValue CPTmp0;
SDValue CPTmp1;
SDValue CPTmp2;
- if (SelectShifterOperandReg(TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
+ if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) {
+ SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, InFlag };
+ return CurDAG->SelectNodeTo(N, ARM::MOVCCsi, MVT::i32, Ops, 6);
+ }
+
+ if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag };
- return CurDAG->SelectNodeTo(N, ARM::MOVCCs, MVT::i32, Ops, 7);
+ return CurDAG->SelectNodeTo(N, ARM::MOVCCsr, MVT::i32, Ops, 7);
}
return 0;
}
@@ -2198,6 +2303,56 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
}
+/// Target-specific DAG combining for ISD::XOR.
+/// Target-independent combining lowers SELECT_CC nodes of the form
+/// select_cc setg[ge] X, 0, X, -X
+/// select_cc setgt X, -1, X, -X
+/// select_cc setl[te] X, 0, -X, X
+/// select_cc setlt X, 1, -X, X
+/// which represent Integer ABS into:
+/// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+/// ARM instruction selection detects the latter and matches it to
+/// ARM::ABS or ARM::t2ABS machine node.
+SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
+ SDValue XORSrc0 = N->getOperand(0);
+ SDValue XORSrc1 = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ if (DisableARMIntABS)
+ return NULL;
+
+ if (Subtarget->isThumb1Only())
+ return NULL;
+
+ if (XORSrc0.getOpcode() != ISD::ADD ||
+ XORSrc1.getOpcode() != ISD::SRA)
+ return NULL;
+
+ SDValue ADDSrc0 = XORSrc0.getOperand(0);
+ SDValue ADDSrc1 = XORSrc0.getOperand(1);
+ SDValue SRASrc0 = XORSrc1.getOperand(0);
+ SDValue SRASrc1 = XORSrc1.getOperand(1);
+ ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
+ EVT XType = SRASrc0.getValueType();
+ unsigned Size = XType.getSizeInBits() - 1;
+
+ if (ADDSrc1 == XORSrc1 &&
+ ADDSrc0 == SRASrc0 &&
+ XType.isInteger() &&
+ SRAConstant != NULL &&
+ Size == SRAConstant->getZExtValue()) {
+
+ unsigned Opcode = ARM::ABS;
+ if (Subtarget->isThumb2())
+ Opcode = ARM::t2ABS;
+
+ return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
+ }
+
+ return NULL;
+}
+
SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
// The only time a CONCAT_VECTORS operation can have legal types is when
// two 64-bit vectors are concatenated to a 128-bit vector.
@@ -2207,6 +2362,25 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
return PairDRegs(VT, N->getOperand(0), N->getOperand(1));
}
+SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(Node->getOperand(1)); // Ptr
+ Ops.push_back(Node->getOperand(2)); // Low part of Val1
+ Ops.push_back(Node->getOperand(3)); // High part of Val1
+ if (Opc == ARM::ATOMCMPXCHG6432) {
+ Ops.push_back(Node->getOperand(4)); // Low part of Val2
+ Ops.push_back(Node->getOperand(5)); // High part of Val2
+ }
+ Ops.push_back(Node->getOperand(0)); // Chain
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
+ SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
+ MVT::i32, MVT::i32, MVT::Other,
+ Ops.data() ,Ops.size());
+ cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
+ return ResNode;
+}
+
SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
@@ -2215,6 +2389,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
switch (N->getOpcode()) {
default: break;
+ case ISD::XOR: {
+ // Select special operations if XOR node forms integer ABS pattern
+ SDNode *ResNode = SelectABSOp(N);
+ if (ResNode)
+ return ResNode;
+ // Other cases are autogenerated.
+ break;
+ }
case ISD::Constant: {
unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
bool UseCP = true;
@@ -2269,8 +2451,9 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
if (Subtarget->isThumb1Only()) {
- return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, TFI,
- CurDAG->getTargetConstant(0, MVT::i32));
+ SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, Ops, 4);
} else {
unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
ARM::t2ADDri : ARM::ADDri);
@@ -2307,7 +2490,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops, 6);
} else {
SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
- return CurDAG->SelectNodeTo(N, ARM::ADDrs, MVT::i32, Ops, 7);
+ return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops, 7);
}
}
if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
@@ -2323,7 +2506,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 6);
} else {
SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
- return CurDAG->SelectNodeTo(N, ARM::RSBrs, MVT::i32, Ops, 7);
+ return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops, 7);
}
}
}
@@ -2986,6 +3169,23 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ISD::CONCAT_VECTORS:
return SelectConcatVector(N);
+
+ case ARMISD::ATOMOR64_DAG:
+ return SelectAtomic64(N, ARM::ATOMOR6432);
+ case ARMISD::ATOMXOR64_DAG:
+ return SelectAtomic64(N, ARM::ATOMXOR6432);
+ case ARMISD::ATOMADD64_DAG:
+ return SelectAtomic64(N, ARM::ATOMADD6432);
+ case ARMISD::ATOMSUB64_DAG:
+ return SelectAtomic64(N, ARM::ATOMSUB6432);
+ case ARMISD::ATOMNAND64_DAG:
+ return SelectAtomic64(N, ARM::ATOMNAND6432);
+ case ARMISD::ATOMAND64_DAG:
+ return SelectAtomic64(N, ARM::ATOMAND6432);
+ case ARMISD::ATOMSWAP64_DAG:
+ return SelectAtomic64(N, ARM::ATOMSWAP6432);
+ case ARMISD::ATOMCMPXCHG64_DAG:
+ return SelectAtomic64(N, ARM::ATOMCMPXCHG6432);
}
return SelectCode(N);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index cf8c5ba..e44e356 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -14,7 +14,6 @@
#define DEBUG_TYPE "arm-isel"
#include "ARM.h"
-#include "ARMAddressingModes.h"
#include "ARMCallingConv.h"
#include "ARMConstantPoolValue.h"
#include "ARMISelLowering.h"
@@ -24,6 +23,7 @@
#include "ARMSubtarget.h"
#include "ARMTargetMachine.h"
#include "ARMTargetObjectFile.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
@@ -38,6 +38,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -106,7 +107,7 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
EVT ElemTy = VT.getVectorElementType();
if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
- setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
if (ElemTy != MVT::i32) {
setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
@@ -178,6 +179,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
RegInfo = TM.getRegisterInfo();
Itins = TM.getInstrItineraryData();
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+
if (Subtarget->isTargetDarwin()) {
// Uses VFP for Thumb libfuncs if available.
if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
@@ -419,6 +422,13 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setLibcallName(RTLIB::MEMSET, "__aeabi_memset");
}
+ // Use divmod compiler-rt calls for iOS 5.0 and later.
+ if (Subtarget->getTargetTriple().getOS() == Triple::IOS &&
+ !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) {
+ setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
+ setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
+ }
+
if (Subtarget->isThumb1Only())
addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
else
@@ -453,7 +463,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
setOperationAction(ISD::FREM, MVT::v2f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
- setOperationAction(ISD::VSETCC, MVT::v2f64, Expand);
+ setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
setOperationAction(ISD::FABS, MVT::v2f64, Expand);
setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
@@ -485,8 +495,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
- setOperationAction(ISD::VSETCC, MVT::v1i64, Expand);
- setOperationAction(ISD::VSETCC, MVT::v2i64, Expand);
+ setOperationAction(ISD::SETCC, MVT::v1i64, Expand);
+ setOperationAction(ISD::SETCC, MVT::v2i64, Expand);
// Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
// a destination type that is wider than the source.
setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
@@ -551,6 +561,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::SRL, MVT::i64, Custom);
setOperationAction(ISD::SRA, MVT::i64, Custom);
+ if (!Subtarget->isThumb1Only()) {
+ // FIXME: We should do this for Thumb1 as well.
+ setOperationAction(ISD::ADDC, MVT::i32, Custom);
+ setOperationAction(ISD::ADDE, MVT::i32, Custom);
+ setOperationAction(ISD::SUBC, MVT::i32, Custom);
+ setOperationAction(ISD::SUBE, MVT::i32, Custom);
+ }
+
// ARM does not have ROTL.
setOperationAction(ISD::ROTL, MVT::i32, Expand);
setOperationAction(ISD::CTTZ, MVT::i32, Custom);
@@ -596,62 +614,46 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
// the default expansion.
+ // FIXME: This should be checking for v6k, not just v6.
if (Subtarget->hasDataBarrier() ||
(Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
// membarrier needs custom lowering; the rest are legal and handled
// normally.
setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+ // Custom lowering for 64-bit ops
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
+ // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
+ setInsertFencesForAtomic(true);
} else {
// Set them all for expansion, which will force libcalls.
setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Expand);
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, Expand);
- setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, Expand);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, Expand);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Expand);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i8, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i16, Expand);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, Expand);
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, Expand);
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand);
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i8, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i16, Expand);
setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i8, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i16, Expand);
setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i8, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i16, Expand);
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i8, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i16, Expand);
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
+ // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
+ // Unordered/Monotonic case.
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
// Since the libcalls include locking, fold in the fences
setShouldFoldAtomicFences(true);
}
- // 64-bit versions are always libcalls (for now)
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Expand);
- setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand);
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
@@ -839,6 +841,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
case ARMISD::RRX: return "ARMISD::RRX";
+ case ARMISD::ADDC: return "ARMISD::ADDC";
+ case ARMISD::ADDE: return "ARMISD::ADDE";
+ case ARMISD::SUBC: return "ARMISD::SUBC";
+ case ARMISD::SUBE: return "ARMISD::SUBE";
+
case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
@@ -935,6 +942,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
}
+EVT ARMTargetLowering::getSetCCResultType(EVT VT) const {
+ if (!VT.isVector()) return getPointerTy();
+ return VT.changeVectorElementTypeToInteger();
+}
+
/// getRegClassFor - Return the register class that should be used for the
/// specified value type.
TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
@@ -1210,8 +1222,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
MachineFunction &MF = DAG.getMachineFunction();
bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
bool IsSibCall = false;
- // Temporarily disable tail calls so things don't break.
- if (!EnableARMTailCalls)
+ // Disable tail calls if they're not supported.
+ if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
isTailCall = false;
if (isTailCall) {
// Check if it's really possible to do a tail call.
@@ -1336,10 +1348,12 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
MVT::i32);
+ // TODO: Disable AlwaysInline when it becomes possible
+ // to emit a nested call sequence.
MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
Flags.getByValAlign(),
/*isVolatile=*/false,
- /*AlwaysInline=*/false,
+ /*AlwaysInline=*/true,
MachinePointerInfo(0),
MachinePointerInfo(0)));
@@ -1404,9 +1418,9 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
const GlobalValue *GV = G->getGlobal();
// Create a constant pool entry for the callee address
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
- ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
- ARMPCLabelIndex,
- ARMCP::CPValue, 0);
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
+
// Get the address of the callee into a register
SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
@@ -1419,8 +1433,9 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Create a constant pool entry for the callee address
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
- ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
- Sym, ARMPCLabelIndex, 0);
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
+ ARMPCLabelIndex, 0);
// Get the address of the callee into a register
SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
@@ -1441,9 +1456,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// tBX takes a register source operand.
if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
- ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
- ARMPCLabelIndex,
- ARMCP::CPValue, 4);
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4);
SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(getPointerTy(), dl,
@@ -1470,8 +1484,9 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
const char *Sym = S->getSymbol();
if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
- ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
- Sym, ARMPCLabelIndex, 4);
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
+ ARMPCLabelIndex, 4);
SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(getPointerTy(), dl,
@@ -1940,9 +1955,9 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
} else {
unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
ARMPCLabelIndex = AFI->createPICLabelUId();
- ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex,
- ARMCP::CPBlockAddress,
- PCAdj);
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
+ ARMCP::CPBlockAddress, PCAdj);
CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
}
CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
@@ -1966,8 +1981,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
ARMConstantPoolValue *CPV =
- new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
- ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
+ ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
+ ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
@@ -1982,11 +1997,11 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
ArgListTy Args;
ArgListEntry Entry;
Entry.Node = Argument;
- Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext());
+ Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
Args.push_back(Entry);
// FIXME: is there useful debug info available here?
std::pair<SDValue, SDValue> CallResult =
- LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()),
+ LowerCallTo(Chain, (Type *) Type::getInt32Ty(*DAG.getContext()),
false, false, false, false,
0, CallingConv::C, false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
@@ -2013,8 +2028,9 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
// Initial exec model.
unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
ARMConstantPoolValue *CPV =
- new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
- ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, true);
+ ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
+ ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
+ true);
Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
@@ -2030,7 +2046,8 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
false, false, 0);
} else {
// local exec model
- ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMCP::TPOFF);
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
@@ -2066,7 +2083,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
if (RelocM == Reloc::PIC_) {
bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
ARMConstantPoolValue *CPV =
- new ARMConstantPoolValue(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
+ ARMConstantPoolConstant::Create(GV,
+ UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
@@ -2135,7 +2153,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
ARMPCLabelIndex = AFI->createPICLabelUId();
unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
ARMConstantPoolValue *CPV =
- new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj);
+ ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue,
+ PCAdj);
CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
}
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
@@ -2167,9 +2186,9 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
EVT PtrVT = getPointerTy();
DebugLoc dl = Op.getDebugLoc();
unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
- ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
- "_GLOBAL_OFFSET_TABLE_",
- ARMPCLabelIndex, PCAdj);
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_",
+ ARMPCLabelIndex, PCAdj);
SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
@@ -2191,7 +2210,8 @@ SDValue
ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
SDValue Val = DAG.getConstant(0, MVT::i32);
- return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0),
+ return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
+ DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
Op.getOperand(1), Val);
}
@@ -2224,8 +2244,8 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
unsigned PCAdj = (RelocM != Reloc::PIC_)
? 0 : (Subtarget->isThumb() ? 4 : 8);
ARMConstantPoolValue *CPV =
- new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex,
- ARMCP::CPLSDA, PCAdj);
+ ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
+ ARMCP::CPLSDA, PCAdj);
CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result =
@@ -2277,6 +2297,25 @@ static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
DAG.getConstant(DMBOpt, MVT::i32));
}
+
+static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ // FIXME: handle "fence singlethread" more efficiently.
+ DebugLoc dl = Op.getDebugLoc();
+ if (!Subtarget->hasDataBarrier()) {
+ // Some ARMv6 cpus can support data barriers with an mcr instruction.
+ // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
+ // here.
+ assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
+ "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
+ return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(0, MVT::i32));
+ }
+
+ return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(ARM_MB::ISH, MVT::i32));
+}
+
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
// ARM pre v5TE and Thumb1 does not have preload instructions.
@@ -2754,7 +2793,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue ARMcc;
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
- return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp);
+ return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
}
ARMCC::CondCodes CondCode, CondCode2;
@@ -2993,8 +3032,8 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
- EVT OperandVT = Op.getOperand(0).getValueType();
- assert(OperandVT == MVT::v4i16 && "Invalid type for custom lowering!");
+ assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&
+ "Invalid type for custom lowering!");
if (VT != MVT::v4f32)
return DAG.UnrollVectorOp(Op.getNode());
@@ -3905,8 +3944,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
}
// Try an immediate VMVN.
- uint64_t NegatedImm = (SplatBits.getZExtValue() ^
- ((1LL << SplatBitSize) - 1));
+ uint64_t NegatedImm = (~SplatBits).getZExtValue();
Val = isNEONModifiedImm(NegatedImm,
SplatUndef.getZExtValue(), SplatBitSize,
DAG, VmovVT, VT.is128BitVector(),
@@ -4019,6 +4057,14 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
// A shuffle can only come from building a vector from various
// elements of other vectors.
return SDValue();
+ } else if (V.getOperand(0).getValueType().getVectorElementType() !=
+ VT.getVectorElementType()) {
+ // This code doesn't know how to handle shuffles where the vector
+ // element types do not match (this happens because type legalization
+ // promotes the return type of EXTRACT_VECTOR_ELT).
+ // FIXME: It might be appropriate to extend this code to handle
+ // mismatched types.
+ return SDValue();
}
// Record this extraction against the appropriate vector if possible...
@@ -4819,6 +4865,71 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
return N0;
}
+static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getNode()->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+
+ unsigned Opc;
+ bool ExtraOp = false;
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Invalid code");
+ case ISD::ADDC: Opc = ARMISD::ADDC; break;
+ case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break;
+ case ISD::SUBC: Opc = ARMISD::SUBC; break;
+ case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break;
+ }
+
+ if (!ExtraOp)
+ return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+ Op.getOperand(1));
+ return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+ Op.getOperand(1), Op.getOperand(2));
+}
+
+static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
+ // Monotonic load/store is legal for all targets
+ if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic)
+ return Op;
+
+ // Aquire/Release load/store is not legal for targets without a
+ // dmb or equivalent available.
+ return SDValue();
+}
+
+
+static void
+ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results,
+ SelectionDAG &DAG, unsigned NewOp) {
+ DebugLoc dl = Node->getDebugLoc();
+ assert (Node->getValueType(0) == MVT::i64 &&
+ "Only know how to expand i64 atomics");
+
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(Node->getOperand(0)); // Chain
+ Ops.push_back(Node->getOperand(1)); // Ptr
+ // Low part of Val1
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Node->getOperand(2), DAG.getIntPtrConstant(0)));
+ // High part of Val1
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Node->getOperand(2), DAG.getIntPtrConstant(1)));
+ if (NewOp == ARMISD::ATOMCMPXCHG64_DAG) {
+ // High part of Val1
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Node->getOperand(3), DAG.getIntPtrConstant(0)));
+ // High part of Val2
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Node->getOperand(3), DAG.getIntPtrConstant(1)));
+ }
+ SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
+ SDValue Result =
+ DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops.data(), Ops.size(), MVT::i64,
+ cast<MemSDNode>(Node)->getMemOperand());
+ SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) };
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
+ Results.push_back(Result.getValue(2));
+}
+
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Don't know how to custom lower this!");
@@ -4834,6 +4945,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget);
+ case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
@@ -4856,7 +4968,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRL_PARTS:
case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
- case ISD::VSETCC: return LowerVSETCC(Op, DAG);
+ case ISD::SETCC: return LowerVSETCC(Op, DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
@@ -4865,6 +4977,12 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::MUL: return LowerMUL(Op, DAG);
case ISD::SDIV: return LowerSDIV(Op, DAG);
case ISD::UDIV: return LowerUDIV(Op, DAG);
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUBC:
+ case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
+ case ISD::ATOMIC_LOAD:
+ case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
}
return SDValue();
}
@@ -4886,6 +5004,30 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::SRA:
Res = Expand64BitShift(N, DAG, Subtarget);
break;
+ case ISD::ATOMIC_LOAD_ADD:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_AND:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMAND64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_NAND:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_OR:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMOR64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_SUB:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_XOR:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG);
+ return;
+ case ISD::ATOMIC_SWAP:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG);
+ return;
+ case ISD::ATOMIC_CMP_SWAP:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG);
+ return;
}
if (Res.getNode())
Results.push_back(Res);
@@ -4963,7 +5105,10 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
// cmp dest, oldval
// bne exitMBB
BB = loop1MBB;
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
+ MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
+ if (ldrOpc == ARM::t2LDREX)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
.addReg(dest).addReg(oldval));
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
@@ -4976,8 +5121,10 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
// cmp scratch, #0
// bne loop1MBB
BB = loop2MBB;
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval)
- .addReg(ptr));
+ MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr);
+ if (strOpc == ARM::t2STREX)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
.addReg(scratch).addImm(0));
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
@@ -5063,7 +5210,10 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
// bne- loopMBB
// fallthrough --> exitMBB
BB = loopMBB;
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
+ MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
+ if (ldrOpc == ARM::t2LDREX)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
if (BinOpcode) {
// operand order needs to go the other way for NAND
if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr)
@@ -5074,8 +5224,10 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
addReg(dest).addReg(incr)).addReg(0);
}
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2)
- .addReg(ptr));
+ MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
+ if (strOpc == ARM::t2STREX)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
.addReg(scratch).addImm(0));
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
@@ -5125,12 +5277,12 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
case 1:
ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
- extendOpc = isThumb2 ? ARM::t2SXTBr : ARM::SXTBr;
+ extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB;
break;
case 2:
ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
- extendOpc = isThumb2 ? ARM::t2SXTHr : ARM::SXTHr;
+ extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH;
break;
case 4:
ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
@@ -5170,12 +5322,17 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
// bne- loopMBB
// fallthrough --> exitMBB
BB = loopMBB;
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
+ MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
+ if (ldrOpc == ARM::t2LDREX)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
// Sign extend the value, if necessary.
if (signExtend && extendOpc) {
oldval = MRI.createVirtualRegister(ARM::GPRRegisterClass);
- AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval).addReg(dest));
+ AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval)
+ .addReg(dest)
+ .addImm(0));
}
// Build compare and cmov instructions.
@@ -5184,8 +5341,10 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2)
.addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR);
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2)
- .addReg(ptr));
+ MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
+ if (strOpc == ARM::t2STREX)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
.addReg(scratch).addImm(0));
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
@@ -5203,79 +5362,596 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
return BB;
}
-static
-MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
- for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
- E = MBB->succ_end(); I != E; ++I)
- if (*I != Succ)
- return *I;
- llvm_unreachable("Expecting a BB with two successors!");
-}
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned Op1, unsigned Op2,
+ bool NeedsCarry, bool IsCmpxchg) const {
+ // This also handles ATOMIC_SWAP, indicated by Op1==0.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-// FIXME: This opcode table should obviously be expressed in the target
-// description. We probably just need a "machine opcode" value in the pseudo
-// instruction. But the ideal solution maybe to simply remove the "S" version
-// of the opcode altogether.
-struct AddSubFlagsOpcodePair {
- unsigned PseudoOpc;
- unsigned MachineOpc;
-};
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *MF = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
-static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
- {ARM::ADCSri, ARM::ADCri},
- {ARM::ADCSrr, ARM::ADCrr},
- {ARM::ADCSrs, ARM::ADCrs},
- {ARM::SBCSri, ARM::SBCri},
- {ARM::SBCSrr, ARM::SBCrr},
- {ARM::SBCSrs, ARM::SBCrs},
- {ARM::RSBSri, ARM::RSBri},
- {ARM::RSBSrr, ARM::RSBrr},
- {ARM::RSBSrs, ARM::RSBrs},
- {ARM::RSCSri, ARM::RSCri},
- {ARM::RSCSrs, ARM::RSCrs},
- {ARM::t2ADCSri, ARM::t2ADCri},
- {ARM::t2ADCSrr, ARM::t2ADCrr},
- {ARM::t2ADCSrs, ARM::t2ADCrs},
- {ARM::t2SBCSri, ARM::t2SBCri},
- {ARM::t2SBCSrr, ARM::t2SBCrr},
- {ARM::t2SBCSrs, ARM::t2SBCrs},
- {ARM::t2RSBSri, ARM::t2RSBri},
- {ARM::t2RSBSrs, ARM::t2RSBrs},
-};
+ unsigned destlo = MI->getOperand(0).getReg();
+ unsigned desthi = MI->getOperand(1).getReg();
+ unsigned ptr = MI->getOperand(2).getReg();
+ unsigned vallo = MI->getOperand(3).getReg();
+ unsigned valhi = MI->getOperand(4).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
-// Convert and Add or Subtract with Carry and Flags to a generic opcode with
-// CPSR<def> operand. e.g. ADCS (...) -> ADC (... CPSR<def>).
-//
-// FIXME: Somewhere we should assert that CPSR<def> is in the correct
-// position to be recognized by the target descrition as the 'S' bit.
-bool ARMTargetLowering::RemapAddSubWithFlags(MachineInstr *MI,
- MachineBasicBlock *BB) const {
- unsigned OldOpc = MI->getOpcode();
- unsigned NewOpc = 0;
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ if (isThumb2) {
+ MRI.constrainRegClass(destlo, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(desthi, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass);
+ }
- // This is only called for instructions that need remapping, so iterating over
- // the tiny opcode table is not costly.
- static const int NPairs =
- sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair);
- for (AddSubFlagsOpcodePair *Pair = &AddSubFlagsOpcodeMap[0],
- *End = &AddSubFlagsOpcodeMap[NPairs]; Pair != End; ++Pair) {
- if (OldOpc == Pair->PseudoOpc) {
- NewOpc = Pair->MachineOpc;
- break;
+ unsigned ldrOpc = isThumb2 ? ARM::t2LDREXD : ARM::LDREXD;
+ unsigned strOpc = isThumb2 ? ARM::t2STREXD : ARM::STREXD;
+
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *contBB = 0, *cont2BB = 0;
+ if (IsCmpxchg) {
+ contBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ cont2BB = MF->CreateMachineBasicBlock(LLVM_BB);
+ }
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loopMBB);
+ if (IsCmpxchg) {
+ MF->insert(It, contBB);
+ MF->insert(It, cont2BB);
+ }
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ TargetRegisterClass *TRC =
+ isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+ unsigned storesuccess = MRI.createVirtualRegister(TRC);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // ldrexd r2, r3, ptr
+ // <binopa> r0, r2, incr
+ // <binopb> r1, r3, incr
+ // strexd storesuccess, r0, r1, ptr
+ // cmp storesuccess, #0
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ //
+ // Note that the registers are explicitly specified because there is not any
+ // way to force the register allocator to allocate a register pair.
+ //
+ // FIXME: The hardcoded registers are not necessary for Thumb2, but we
+ // need to properly enforce the restriction that the two output registers
+ // for ldrexd must be different.
+ BB = loopMBB;
+ // Load
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
+ .addReg(ARM::R2, RegState::Define)
+ .addReg(ARM::R3, RegState::Define).addReg(ptr));
+ // Copy r2/r3 into dest. (This copy will normally be coalesced.)
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo).addReg(ARM::R2);
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi).addReg(ARM::R3);
+
+ if (IsCmpxchg) {
+ // Add early exit
+ for (unsigned i = 0; i < 2; i++) {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr :
+ ARM::CMPrr))
+ .addReg(i == 0 ? destlo : desthi)
+ .addReg(i == 0 ? vallo : valhi));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ BB->addSuccessor(exitMBB);
+ BB->addSuccessor(i == 0 ? contBB : cont2BB);
+ BB = (i == 0 ? contBB : cont2BB);
}
+
+ // Copy to physregs for strexd
+ unsigned setlo = MI->getOperand(5).getReg();
+ unsigned sethi = MI->getOperand(6).getReg();
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(setlo);
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(sethi);
+ } else if (Op1) {
+ // Perform binary operation
+ AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), ARM::R0)
+ .addReg(destlo).addReg(vallo))
+ .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry));
+ AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), ARM::R1)
+ .addReg(desthi).addReg(valhi)).addReg(0);
+ } else {
+ // Copy to physregs for strexd
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(vallo);
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(valhi);
}
- if (!NewOpc)
- return false;
+ // Store
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
+ .addReg(ARM::R0).addReg(ARM::R1).addReg(ptr));
+ // Cmp+jump
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(storesuccess).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
+/// EmitBasePointerRecalculation - For functions using a base pointer, we
+/// rematerialize it (via the frame pointer).
+void ARMTargetLowering::
+EmitBasePointerRecalculation(MachineInstr *MI, MachineBasicBlock *MBB,
+ MachineBasicBlock *DispatchBB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
+ MachineFunction &MF = *MI->getParent()->getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
+
+ if (!RI.hasBasePointer(MF)) return;
+
+ MachineBasicBlock::iterator MBBI = MI;
+
+ int32_t NumBytes = AFI->getFramePtrSpillOffset();
+ unsigned FramePtr = RI.getFrameRegister(MF);
+ assert(MF.getTarget().getFrameLowering()->hasFP(MF) &&
+ "Base pointer without frame pointer?");
+
+ if (AFI->isThumb2Function())
+ llvm::emitT2RegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, ARMCC::AL, 0, *AII);
+ else if (AFI->isThumbFunction())
+ llvm::emitThumbRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, *AII, RI);
+ else
+ llvm::emitARMRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, ARMCC::AL, 0, *AII);
+
+ if (!RI.needsStackRealignment(MF)) return;
+
+ // If there's dynamic realignment, adjust for it.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ assert(!AFI->isThumb1OnlyFunction());
+
+ // Emit bic r6, r6, MaxAlign
+ unsigned bicOpc = AFI->isThumbFunction() ? ARM::t2BICri : ARM::BICri;
+ AddDefaultCC(
+ AddDefaultPred(
+ BuildMI(*MBB, MBBI, MI->getDebugLoc(), TII->get(bicOpc), ARM::R6)
+ .addReg(ARM::R6, RegState::Kill)
+ .addImm(MaxAlign - 1)));
+}
+
+/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
+/// registers the function context.
+void ARMTargetLowering::
+SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
+ MachineBasicBlock *DispatchBB, int FI) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
- MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
- for (unsigned i = 0; i < MI->getNumOperands(); ++i)
- MIB.addOperand(MI->getOperand(i));
- AddDefaultPred(MIB);
- MIB.addReg(ARM::CPSR, RegState::Define); // S bit
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+ MachineConstantPool *MCP = MF->getConstantPool();
+ ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
+ const Function *F = MF->getFunction();
+
+ bool isThumb = Subtarget->isThumb();
+ bool isThumb2 = Subtarget->isThumb2();
+
+ unsigned PCLabelId = AFI->createPICLabelUId();
+ unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj);
+ unsigned CPI = MCP->getConstantPoolIndex(CPV, 4);
+
+ const TargetRegisterClass *TRC =
+ isThumb ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+
+ // Grab constant pool and fixed stack memory operands.
+ MachineMemOperand *CPMMO =
+ MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(),
+ MachineMemOperand::MOLoad, 4, 4);
+
+ MachineMemOperand *FIMMOSt =
+ MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+ MachineMemOperand::MOStore, 4, 4);
+
+ EmitBasePointerRecalculation(MI, MBB, DispatchBB);
+
+ // Load the address of the dispatch MBB into the jump buffer.
+ if (isThumb2) {
+ // Incoming value: jbuf
+ // ldr.n r5, LCPI1_1
+ // orr r5, r5, #1
+ // add r5, pc
+ // str r5, [$jbuf, #+4] ; &jbuf[1]
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
+ .addConstantPoolIndex(CPI)
+ .addMemOperand(CPMMO));
+ // Set the low bit because of thumb mode.
+ unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ AddDefaultCC(
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
+ .addReg(NewVReg1, RegState::Kill)
+ .addImm(0x01)));
+ unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
+ .addReg(NewVReg2, RegState::Kill)
+ .addImm(PCLabelId);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
+ .addReg(NewVReg3, RegState::Kill)
+ .addFrameIndex(FI)
+ .addImm(36) // &jbuf[1] :: pc
+ .addMemOperand(FIMMOSt));
+ } else if (isThumb) {
+ // Incoming value: jbuf
+ // ldr.n r1, LCPI1_4
+ // add r1, pc
+ // mov r2, #1
+ // orrs r1, r2
+ // add r2, $jbuf, #+4 ; &jbuf[1]
+ // str r1, [r2]
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
+ .addConstantPoolIndex(CPI)
+ .addMemOperand(CPMMO));
+ unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
+ .addReg(NewVReg1, RegState::Kill)
+ .addImm(PCLabelId);
+ // Set the low bit because of thumb mode.
+ unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addImm(1));
+ unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg2, RegState::Kill)
+ .addReg(NewVReg3, RegState::Kill));
+ unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5)
+ .addFrameIndex(FI)
+ .addImm(36)); // &jbuf[1] :: pc
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
+ .addReg(NewVReg4, RegState::Kill)
+ .addReg(NewVReg5, RegState::Kill)
+ .addImm(0)
+ .addMemOperand(FIMMOSt));
+ } else {
+ // Incoming value: jbuf
+ // ldr r1, LCPI1_1
+ // add r1, pc, r1
+ // str r1, [$jbuf, #+4] ; &jbuf[1]
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
+ .addConstantPoolIndex(CPI)
+ .addImm(0)
+ .addMemOperand(CPMMO));
+ unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
+ .addReg(NewVReg1, RegState::Kill)
+ .addImm(PCLabelId));
+ AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
+ .addReg(NewVReg2, RegState::Kill)
+ .addFrameIndex(FI)
+ .addImm(36) // &jbuf[1] :: pc
+ .addMemOperand(FIMMOSt));
+ }
+}
+
+MachineBasicBlock *ARMTargetLowering::
+EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+ ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
+ MachineFrameInfo *MFI = MF->getFrameInfo();
+ int FI = MFI->getFunctionContextIndex();
+
+ const TargetRegisterClass *TRC =
+ Subtarget->isThumb() ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+
+ // Get a mapping of the call site numbers to all of the landing pads they're
+ // associated with.
+ DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad;
+ unsigned MaxCSNum = 0;
+ MachineModuleInfo &MMI = MF->getMMI();
+ for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; ++BB) {
+ if (!BB->isLandingPad()) continue;
+
+ // FIXME: We should assert that the EH_LABEL is the first MI in the landing
+ // pad.
+ for (MachineBasicBlock::iterator
+ II = BB->begin(), IE = BB->end(); II != IE; ++II) {
+ if (!II->isEHLabel()) continue;
+
+ MCSymbol *Sym = II->getOperand(0).getMCSymbol();
+ if (!MMI.hasCallSiteLandingPad(Sym)) continue;
+
+ SmallVectorImpl<unsigned> &CallSiteIdxs = MMI.getCallSiteLandingPad(Sym);
+ for (SmallVectorImpl<unsigned>::iterator
+ CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end();
+ CSI != CSE; ++CSI) {
+ CallSiteNumToLPad[*CSI].push_back(BB);
+ MaxCSNum = std::max(MaxCSNum, *CSI);
+ }
+ break;
+ }
+ }
+
+ // Get an ordered list of the machine basic blocks for the jump table.
+ std::vector<MachineBasicBlock*> LPadList;
+ SmallPtrSet<MachineBasicBlock*, 64> InvokeBBs;
+ LPadList.reserve(CallSiteNumToLPad.size());
+ for (unsigned I = 1; I <= MaxCSNum; ++I) {
+ SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
+ for (SmallVectorImpl<MachineBasicBlock*>::iterator
+ II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) {
+ LPadList.push_back(*II);
+ InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end());
+ }
+ }
+
+ assert(!LPadList.empty() &&
+ "No landing pad destinations for the dispatch jump table!");
+
+ // Create the jump table and associated information.
+ MachineJumpTableInfo *JTI =
+ MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
+ unsigned MJTI = JTI->createJumpTableIndex(LPadList);
+ unsigned UId = AFI->createJumpTableUId();
+
+ // Create the MBBs for the dispatch code.
+
+ // Shove the dispatch's address into the return slot in the function context.
+ MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
+ DispatchBB->setIsLandingPad();
+
+ MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
+ BuildMI(TrapBB, dl, TII->get(Subtarget->isThumb() ? ARM::tTRAP : ARM::TRAP));
+ DispatchBB->addSuccessor(TrapBB);
+
+ MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
+ DispatchBB->addSuccessor(DispContBB);
+
+ // Insert and renumber MBBs.
+ MachineBasicBlock *Last = &MF->back();
+ MF->insert(MF->end(), DispatchBB);
+ MF->insert(MF->end(), DispContBB);
+ MF->insert(MF->end(), TrapBB);
+ MF->RenumberBlocks(Last);
+
+ // Insert code into the entry block that creates and registers the function
+ // context.
+ SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
+
+ MachineMemOperand *FIMMOLd =
+ MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+ MachineMemOperand::MOLoad |
+ MachineMemOperand::MOVolatile, 4, 4);
+
+ if (Subtarget->isThumb2()) {
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
+ .addFrameIndex(FI)
+ .addImm(4)
+ .addMemOperand(FIMMOLd));
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
+ .addReg(NewVReg1)
+ .addImm(LPadList.size()));
+ BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
+ .addMBB(TrapBB)
+ .addImm(ARMCC::HI)
+ .addReg(ARM::CPSR);
+
+ unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg2)
+ .addJumpTableIndex(MJTI)
+ .addImm(UId));
+
+ unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ AddDefaultCC(
+ AddDefaultPred(
+ BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg3)
+ .addReg(NewVReg2, RegState::Kill)
+ .addReg(NewVReg1)
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
+
+ BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
+ .addReg(NewVReg3, RegState::Kill)
+ .addReg(NewVReg1)
+ .addJumpTableIndex(MJTI)
+ .addImm(UId);
+ } else if (Subtarget->isThumb()) {
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
+ .addFrameIndex(FI)
+ .addImm(1)
+ .addMemOperand(FIMMOLd));
+
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
+ .addReg(NewVReg1)
+ .addImm(LPadList.size()));
+ BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
+ .addMBB(TrapBB)
+ .addImm(ARMCC::HI)
+ .addReg(ARM::CPSR);
+
+ unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg1)
+ .addImm(2));
+
+ unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
+ .addJumpTableIndex(MJTI)
+ .addImm(UId));
+
+ unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg2, RegState::Kill)
+ .addReg(NewVReg3));
+
+ MachineMemOperand *JTMMOLd =
+ MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
+ MachineMemOperand::MOLoad, 4, 4);
+
+ unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
+ .addReg(NewVReg4, RegState::Kill)
+ .addImm(0)
+ .addMemOperand(JTMMOLd));
+
+ unsigned NewVReg6 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg5, RegState::Kill)
+ .addReg(NewVReg3));
+
+ BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
+ .addReg(NewVReg6, RegState::Kill)
+ .addJumpTableIndex(MJTI)
+ .addImm(UId);
+ } else {
+ unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
+ .addFrameIndex(FI)
+ .addImm(4)
+ .addMemOperand(FIMMOLd));
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
+ .addReg(NewVReg1)
+ .addImm(LPadList.size()));
+ BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
+ .addMBB(TrapBB)
+ .addImm(ARMCC::HI)
+ .addReg(ARM::CPSR);
+
+ unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ AddDefaultCC(
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg2)
+ .addReg(NewVReg1)
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
+ unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg3)
+ .addJumpTableIndex(MJTI)
+ .addImm(UId));
+
+ MachineMemOperand *JTMMOLd =
+ MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
+ MachineMemOperand::MOLoad, 4, 4);
+ unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(
+ BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg4)
+ .addReg(NewVReg2, RegState::Kill)
+ .addReg(NewVReg3)
+ .addImm(0)
+ .addMemOperand(JTMMOLd));
+
+ BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
+ .addReg(NewVReg4, RegState::Kill)
+ .addReg(NewVReg3)
+ .addJumpTableIndex(MJTI)
+ .addImm(UId);
+ }
+
+ // Add the jump table entries as successors to the MBB.
+ MachineBasicBlock *PrevMBB = 0;
+ for (std::vector<MachineBasicBlock*>::iterator
+ I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
+ MachineBasicBlock *CurMBB = *I;
+ if (PrevMBB != CurMBB)
+ DispContBB->addSuccessor(CurMBB);
+ PrevMBB = CurMBB;
+ }
+
+ const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
+ const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
+ const unsigned *SavedRegs = RI.getCalleeSavedRegs(MF);
+ for (SmallPtrSet<MachineBasicBlock*, 64>::iterator
+ I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) {
+ MachineBasicBlock *BB = *I;
+
+ // Remove the landing pad successor from the invoke block and replace it
+ // with the new dispatch block.
+ for (MachineBasicBlock::succ_iterator
+ SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock *SMBB = *SI;
+ if (SMBB->isLandingPad()) {
+ BB->removeSuccessor(SMBB);
+ SMBB->setIsLandingPad(false);
+ }
+ }
+
+ BB->addSuccessor(DispatchBB);
+
+ // Find the invoke call and mark all of the callee-saved registers as
+ // 'implicit defined' so that they're spilled. This prevents code from
+ // moving instructions to before the EH block, where they will never be
+ // executed.
+ for (MachineBasicBlock::reverse_iterator
+ II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
+ if (!II->getDesc().isCall()) continue;
+
+ DenseMap<unsigned, bool> DefRegs;
+ for (MachineInstr::mop_iterator
+ OI = II->operands_begin(), OE = II->operands_end();
+ OI != OE; ++OI) {
+ if (!OI->isReg()) continue;
+ DefRegs[OI->getReg()] = true;
+ }
+
+ MachineInstrBuilder MIB(&*II);
+
+ for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
+ if (!TRC->contains(SavedRegs[i])) continue;
+ if (!DefRegs[SavedRegs[i]])
+ MIB.addReg(SavedRegs[i], RegState::ImplicitDefine | RegState::Dead);
+ }
+
+ break;
+ }
+ }
+
+ // The instruction is gone now.
MI->eraseFromParent();
- return true;
+
+ return MBB;
+}
+
+static
+MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
+ for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I)
+ if (*I != Succ)
+ return *I;
+ llvm_unreachable("Expecting a BB with two successors!");
}
MachineBasicBlock *
@@ -5286,12 +5962,61 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
bool isThumb2 = Subtarget->isThumb2();
switch (MI->getOpcode()) {
default: {
- if (RemapAddSubWithFlags(MI, BB))
- return BB;
-
MI->dump();
llvm_unreachable("Unexpected instr type to insert");
}
+ // The Thumb2 pre-indexed stores have the same MI operands, they just
+ // define them differently in the .td files from the isel patterns, so
+ // they need pseudos.
+ case ARM::t2STR_preidx:
+ MI->setDesc(TII->get(ARM::t2STR_PRE));
+ return BB;
+ case ARM::t2STRB_preidx:
+ MI->setDesc(TII->get(ARM::t2STRB_PRE));
+ return BB;
+ case ARM::t2STRH_preidx:
+ MI->setDesc(TII->get(ARM::t2STRH_PRE));
+ return BB;
+
+ case ARM::STRi_preidx:
+ case ARM::STRBi_preidx: {
+ unsigned NewOpc = MI->getOpcode() == ARM::STRi_preidx ?
+ ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM;
+ // Decode the offset.
+ unsigned Offset = MI->getOperand(4).getImm();
+ bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
+ Offset = ARM_AM::getAM2Offset(Offset);
+ if (isSub)
+ Offset = -Offset;
+
+ MachineMemOperand *MMO = *MI->memoperands_begin();
+ BuildMI(*BB, MI, dl, TII->get(NewOpc))
+ .addOperand(MI->getOperand(0)) // Rn_wb
+ .addOperand(MI->getOperand(1)) // Rt
+ .addOperand(MI->getOperand(2)) // Rn
+ .addImm(Offset) // offset (skip GPR==zero_reg)
+ .addOperand(MI->getOperand(5)) // pred
+ .addOperand(MI->getOperand(6))
+ .addMemOperand(MMO);
+ MI->eraseFromParent();
+ return BB;
+ }
+ case ARM::STRr_preidx:
+ case ARM::STRBr_preidx:
+ case ARM::STRH_preidx: {
+ unsigned NewOpc;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
+ case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
+ case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
+ }
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
+ for (unsigned i = 0; i < MI->getNumOperands(); ++i)
+ MIB.addOperand(MI->getOperand(i));
+ MI->eraseFromParent();
+ return BB;
+ }
case ARM::ATOMIC_LOAD_ADD_I8:
return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
case ARM::ATOMIC_LOAD_ADD_I16:
@@ -5370,6 +6095,31 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
+
+ case ARM::ATOMADD6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr,
+ isThumb2 ? ARM::t2ADCrr : ARM::ADCrr,
+ /*NeedsCarry*/ true);
+ case ARM::ATOMSUB6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
+ isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
+ /*NeedsCarry*/ true);
+ case ARM::ATOMOR6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr,
+ isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+ case ARM::ATOMXOR6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr,
+ isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+ case ARM::ATOMAND6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr,
+ isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+ case ARM::ATOMSWAP6432:
+ return EmitAtomicBinary64(MI, BB, 0, 0, false);
+ case ARM::ATOMCMPXCHG6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
+ isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
+ /*NeedsCarry*/ false, /*IsCmpxchg*/true);
+
case ARM::tMOVCCr_pseudo: {
// To "insert" a SELECT_CC instruction, we actually have to insert the
// diamond control-flow pattern. The incoming instruction knows the
@@ -5461,13 +6211,159 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
.addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2B : ARM::B))
- .addMBB(exitMBB);
+ if (isThumb2)
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB));
+ else
+ BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
+
+ case ARM::ABS:
+ case ARM::t2ABS: {
+ // To insert an ABS instruction, we have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // source vreg to test against 0, the destination vreg to set,
+ // the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ // It transforms
+ // V1 = ABS V0
+ // into
+ // V2 = MOVS V0
+ // BCC (branch to SinkBB if V0 >= 0)
+ // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
+ // SinkBB: V1 = PHI(V2, V3)
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator BBI = BB;
+ ++BBI;
+ MachineFunction *Fn = BB->getParent();
+ MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
+ Fn->insert(BBI, RSBBB);
+ Fn->insert(BBI, SinkBB);
+
+ unsigned int ABSSrcReg = MI->getOperand(1).getReg();
+ unsigned int ABSDstReg = MI->getOperand(0).getReg();
+ bool isThumb2 = Subtarget->isThumb2();
+ MachineRegisterInfo &MRI = Fn->getRegInfo();
+ // In Thumb mode S must not be specified if source register is the SP or
+ // PC and if destination register is the SP, so restrict register class
+ unsigned NewMovDstReg = MRI.createVirtualRegister(
+ isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass);
+ unsigned NewRsbDstReg = MRI.createVirtualRegister(
+ isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ SinkBB->splice(SinkBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ SinkBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(RSBBB);
+ BB->addSuccessor(SinkBB);
+
+ // fall through to SinkMBB
+ RSBBB->addSuccessor(SinkBB);
+
+ // insert a movs at the end of BB
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVr : ARM::MOVr),
+ NewMovDstReg)
+ .addReg(ABSSrcReg, RegState::Kill)
+ .addImm((unsigned)ARMCC::AL).addReg(0)
+ .addReg(ARM::CPSR, RegState::Define);
+
+ // insert a bcc with opposite CC to ARMCC::MI at the end of BB
+ BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
+ .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);
+
+ // insert rsbri in RSBBB
+ // Note: BCC and rsbri will be converted into predicated rsbmi
+ // by if-conversion pass
+ BuildMI(*RSBBB, RSBBB->begin(), dl,
+ TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
+ .addReg(NewMovDstReg, RegState::Kill)
+ .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+
+ // insert PHI in SinkBB,
+ // reuse ABSDstReg to not change uses of ABS instruction
+ BuildMI(*SinkBB, SinkBB->begin(), dl,
+ TII->get(ARM::PHI), ABSDstReg)
+ .addReg(NewRsbDstReg).addMBB(RSBBB)
+ .addReg(NewMovDstReg).addMBB(BB);
+
+ // remove ABS instruction
+ MI->eraseFromParent();
+
+ // return last added BB
+ return SinkBB;
+ }
+ }
+}
+
+void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
+ SDNode *Node) const {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.hasPostISelHook()) {
+ assert(!convertAddSubFlagsOpcode(MI->getOpcode()) &&
+ "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'");
+ return;
+ }
+
+ // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
+ // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
+ // operand is still set to noreg. If needed, set the optional operand's
+ // register to CPSR, and remove the redundant implicit def.
+ //
+ // e.g. ADCS (...opt:%noreg, CPSR<imp-def>) -> ADC (... opt:CPSR<def>).
+
+ // Rename pseudo opcodes.
+ unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode());
+ if (NewOpc) {
+ const ARMBaseInstrInfo *TII =
+ static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo());
+ MI->setDesc(TII->get(NewOpc));
+ }
+ unsigned ccOutIdx = MCID.getNumOperands() - 1;
+
+ // Any ARM instruction that sets the 's' bit should specify an optional
+ // "cc_out" operand in the last operand position.
+ if (!MCID.hasOptionalDef() || !MCID.OpInfo[ccOutIdx].isOptionalDef()) {
+ assert(!NewOpc && "Optional cc_out operand required");
+ return;
+ }
+ // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
+ // since we already have an optional CPSR def.
+ bool definesCPSR = false;
+ bool deadCPSR = false;
+ for (unsigned i = MCID.getNumOperands(), e = MI->getNumOperands();
+ i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
+ definesCPSR = true;
+ if (MO.isDead())
+ deadCPSR = true;
+ MI->RemoveOperand(i);
+ break;
+ }
+ }
+ if (!definesCPSR) {
+ assert(!NewOpc && "Optional cc_out operand required");
+ return;
+ }
+ assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
+ if (deadCPSR) {
+ assert(!MI->getOperand(ccOutIdx).getReg() &&
+ "expect uninitialized optional cc_out operand");
+ return;
}
+
+ // If this instruction was defined with an optional CPSR def and its dag node
+ // had a live implicit CPSR def, then activate the optional CPSR def.
+ MachineOperand &MO = MI->getOperand(ccOutIdx);
+ MO.setReg(ARM::CPSR);
+ MO.setIsDef(true);
}
//===----------------------------------------------------------------------===//
@@ -6975,7 +7871,8 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
SDValue FalseVal = N->getOperand(0);
SDValue TrueVal = N->getOperand(1);
SDValue ARMcc = N->getOperand(2);
- ARMCC::CondCodes CC = (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
+ ARMCC::CondCodes CC =
+ (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
// Simplify
// mov r1, r0
@@ -6995,7 +7892,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
// movne r0, y
/// FIXME: Turn this into a target neutral optimization?
SDValue Res;
- if (CC == ARMCC::NE && FalseVal == RHS) {
+ if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
N->getOperand(3), Cmp);
} else if (CC == ARMCC::EQ && TrueVal == RHS) {
@@ -7235,7 +8132,7 @@ bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- const Type *Ty) const {
+ Type *Ty) const {
EVT VT = getValueType(Ty, true);
if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
return false;
@@ -7351,7 +8248,8 @@ static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
if (Ptr->getOpcode() == ISD::ADD) {
isInc = true;
- ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0));
+ ARM_AM::ShiftOpc ShOpcVal=
+ ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());
if (ShOpcVal != ARM_AM::no_shift) {
Base = Ptr->getOperand(1);
Offset = Ptr->getOperand(0);
@@ -7536,7 +8434,7 @@ bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
if (AsmPieces.size() == 3 &&
AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
- const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
if (Ty && Ty->getBitWidth() == 32)
return IntrinsicLowering::LowerToByteSwap(CI);
}
@@ -7559,6 +8457,9 @@ ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
case 'x': return C_RegisterClass;
case 't': return C_RegisterClass;
case 'j': return C_Other; // Constant for movw.
+ // An address with a single base register. Due to the way we
+ // currently handle addresses it is the same as an 'r' memory constraint.
+ case 'Q': return C_Memory;
}
} else if (Constraint.size() == 2) {
switch (Constraint[0]) {
@@ -7582,7 +8483,7 @@ ARMTargetLowering::getSingleConstraintMatchWeight(
// but allow it at the lowest weight.
if (CallOperandVal == NULL)
return CW_Default;
- const Type *type = CallOperandVal->getType();
+ Type *type = CallOperandVal->getType();
// Look at the constraint type.
switch (*constraint) {
default:
@@ -7618,7 +8519,7 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
return RCPair(0U, ARM::GPRRegisterClass);
case 'h': // High regs or no regs.
if (Subtarget->isThumb())
- return RCPair(0U, ARM::hGPRRegisterClass);
+ return RCPair(0U, ARM::hGPRRegisterClass);
break;
case 'r':
return RCPair(0U, ARM::GPRRegisterClass);
@@ -7632,15 +8533,15 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
break;
case 'x':
if (VT == MVT::f32)
- return RCPair(0U, ARM::SPR_8RegisterClass);
+ return RCPair(0U, ARM::SPR_8RegisterClass);
if (VT.getSizeInBits() == 64)
- return RCPair(0U, ARM::DPR_8RegisterClass);
+ return RCPair(0U, ARM::DPR_8RegisterClass);
if (VT.getSizeInBits() == 128)
- return RCPair(0U, ARM::QPR_8RegisterClass);
+ return RCPair(0U, ARM::QPR_8RegisterClass);
break;
case 't':
if (VT == MVT::f32)
- return RCPair(0U, ARM::SPRRegisterClass);
+ return RCPair(0U, ARM::SPRRegisterClass);
break;
}
}
@@ -7680,12 +8581,12 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
switch (ConstraintLetter) {
case 'j':
- // Constant suitable for movw, must be between 0 and
- // 65535.
- if (Subtarget->hasV6T2Ops())
- if (CVal >= 0 && CVal <= 65535)
- break;
- return;
+ // Constant suitable for movw, must be between 0 and
+ // 65535.
+ if (Subtarget->hasV6T2Ops())
+ if (CVal >= 0 && CVal <= 65535)
+ break;
+ return;
case 'I':
if (Subtarget->isThumb1Only()) {
// This must be a constant between 0 and 255, for ADD
@@ -7823,50 +8724,6 @@ ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
return false;
}
-int ARM::getVFPf32Imm(const APFloat &FPImm) {
- APInt Imm = FPImm.bitcastToAPInt();
- uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
- int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127
- int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits
-
- // We can handle 4 bits of mantissa.
- // mantissa = (16+UInt(e:f:g:h))/16.
- if (Mantissa & 0x7ffff)
- return -1;
- Mantissa >>= 19;
- if ((Mantissa & 0xf) != Mantissa)
- return -1;
-
- // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
- if (Exp < -3 || Exp > 4)
- return -1;
- Exp = ((Exp+3) & 0x7) ^ 4;
-
- return ((int)Sign << 7) | (Exp << 4) | Mantissa;
-}
-
-int ARM::getVFPf64Imm(const APFloat &FPImm) {
- APInt Imm = FPImm.bitcastToAPInt();
- uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
- int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023
- uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffLL;
-
- // We can handle 4 bits of mantissa.
- // mantissa = (16+UInt(e:f:g:h))/16.
- if (Mantissa & 0xffffffffffffLL)
- return -1;
- Mantissa >>= 48;
- if ((Mantissa & 0xf) != Mantissa)
- return -1;
-
- // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
- if (Exp < -3 || Exp > 4)
- return -1;
- Exp = ((Exp+3) & 0x7) ^ 4;
-
- return ((int)Sign << 7) | (Exp << 4) | Mantissa;
-}
-
bool ARM::isBitFieldInvertedMask(unsigned v) {
if (v == 0xffffffff)
return 0;
@@ -7889,9 +8746,9 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
if (!Subtarget->hasVFP3())
return false;
if (VT == MVT::f32)
- return ARM::getVFPf32Imm(Imm) != -1;
+ return ARM_AM::getFP32Imm(Imm) != -1;
if (VT == MVT::f64)
- return ARM::getVFPf64Imm(Imm) != -1;
+ return ARM_AM::getFP64Imm(Imm) != -1;
return false;
}
@@ -7933,7 +8790,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
// Conservatively set memVT to the entire set of vectors stored.
unsigned NumElts = 0;
for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
- const Type *ArgTy = I.getArgOperand(ArgI)->getType();
+ Type *ArgTy = I.getArgOperand(ArgI)->getType();
if (!ArgTy->isVectorTy())
break;
NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8;
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 980fb40..5da9b27 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -71,6 +71,11 @@ namespace llvm {
SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag.
+ ADDC, // Add with carry
+ ADDE, // Add using carry
+ SUBC, // Sub with carry
+ SUBE, // Sub using carry
+
VMOVRRD, // double to two gprs.
VMOVDRR, // Two gprs to double.
@@ -206,18 +211,22 @@ namespace llvm {
VST4_UPD,
VST2LN_UPD,
VST3LN_UPD,
- VST4LN_UPD
+ VST4LN_UPD,
+
+ // 64-bit atomic ops (value split into two registers)
+ ATOMADD64_DAG,
+ ATOMSUB64_DAG,
+ ATOMOR64_DAG,
+ ATOMXOR64_DAG,
+ ATOMAND64_DAG,
+ ATOMNAND64_DAG,
+ ATOMSWAP64_DAG,
+ ATOMCMPXCHG64_DAG
};
}
/// Define some predicates that are used for node matching.
namespace ARM {
- /// getVFPf32Imm / getVFPf64Imm - If the given fp immediate can be
- /// materialized with a VMOV.f32 / VMOV.f64 (i.e. fconsts / fconstd)
- /// instruction, returns its 8-bit integer representation. Otherwise,
- /// returns -1.
- int getVFPf32Imm(const APFloat &FPImm);
- int getVFPf64Imm(const APFloat &FPImm);
bool isBitFieldInvertedMask(unsigned v);
}
@@ -240,10 +249,16 @@ namespace llvm {
virtual const char *getTargetNodeName(unsigned Opcode) const;
+ /// getSetCCResultType - Return the value type to use for ISD::SETCC.
+ virtual EVT getSetCCResultType(EVT VT) const;
+
virtual MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB) const;
+ virtual void
+ AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const;
+
SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const;
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
@@ -256,7 +271,7 @@ namespace llvm {
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
- virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
+ virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
/// isLegalICmpImmediate - Return true if the specified immediate is legal
@@ -485,12 +500,28 @@ namespace llvm {
MachineBasicBlock *BB,
unsigned Size,
unsigned BinOpcode) const;
+ MachineBasicBlock *EmitAtomicBinary64(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Op1,
+ unsigned Op2,
+ bool NeedsCarry = false,
+ bool IsCmpxchg = false) const;
MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Size,
bool signExtend,
ARMCC::CondCodes Cond) const;
+ void EmitBasePointerRecalculation(MachineInstr *MI, MachineBasicBlock *MBB,
+ MachineBasicBlock *DispatchBB) const;
+
+ void SetupEntryBlockForSjLj(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *DispatchBB, int FI) const;
+
+ MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const;
};
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 3ccf22f..7cbc911 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -25,7 +25,7 @@ def BrFrm : Format<2>;
def BrMiscFrm : Format<3>;
def DPFrm : Format<4>;
-def DPSoRegFrm : Format<5>;
+def DPSoRegRegFrm : Format<5>;
def LdFrm : Format<6>;
def StFrm : Format<7>;
@@ -68,6 +68,7 @@ def N3RegVShFrm : Format<38>;
def NVExtFrm : Format<39>;
def NVMulSLFrm : Format<40>;
def NVTBLFrm : Format<41>;
+def DPSoRegImmFrm : Format<42>;
// Misc flags.
@@ -130,39 +131,15 @@ def VFPNeonA8Domain : Domain<5>; // Instructions in VFP & Neon under A8
// ARM special operands.
//
-def CondCodeOperand : AsmOperandClass {
- let Name = "CondCode";
- let SuperClasses = [];
-}
-
-def CCOutOperand : AsmOperandClass {
- let Name = "CCOut";
- let SuperClasses = [];
-}
-
-def MemBarrierOptOperand : AsmOperandClass {
- let Name = "MemBarrierOpt";
- let SuperClasses = [];
- let ParserMethod = "tryParseMemBarrierOptOperand";
-}
-
-def ProcIFlagsOperand : AsmOperandClass {
- let Name = "ProcIFlags";
- let SuperClasses = [];
- let ParserMethod = "tryParseProcIFlagsOperand";
-}
-
-def MSRMaskOperand : AsmOperandClass {
- let Name = "MSRMask";
- let SuperClasses = [];
- let ParserMethod = "tryParseMSRMaskOperand";
-}
-
// ARM imod and iflag operands, used only by the CPS instruction.
def imod_op : Operand<i32> {
let PrintMethod = "printCPSIMod";
}
+def ProcIFlagsOperand : AsmOperandClass {
+ let Name = "ProcIFlags";
+ let ParserMethod = "parseProcIFlagsOperand";
+}
def iflags_op : Operand<i32> {
let PrintMethod = "printCPSIFlag";
let ParserMatchClass = ProcIFlagsOperand;
@@ -170,17 +147,21 @@ def iflags_op : Operand<i32> {
// ARM Predicate operand. Default to 14 = always (AL). Second part is CC
// register whose default is 0 (no register).
-def pred : PredicateOperand<OtherVT, (ops i32imm, CCR),
+def CondCodeOperand : AsmOperandClass { let Name = "CondCode"; }
+def pred : PredicateOperand<OtherVT, (ops i32imm, i32imm),
(ops (i32 14), (i32 zero_reg))> {
let PrintMethod = "printPredicateOperand";
let ParserMatchClass = CondCodeOperand;
+ let DecoderMethod = "DecodePredicateOperand";
}
// Conditional code result for instructions whose 's' bit is set, e.g. subs.
+def CCOutOperand : AsmOperandClass { let Name = "CCOut"; }
def cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 zero_reg))> {
let EncoderMethod = "getCCOutOpValue";
let PrintMethod = "printSBitModifierOperand";
let ParserMatchClass = CCOutOperand;
+ let DecoderMethod = "DecodeCCOutOperand";
}
// Same as cc_out except it defaults to setting CPSR.
@@ -188,16 +169,27 @@ def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> {
let EncoderMethod = "getCCOutOpValue";
let PrintMethod = "printSBitModifierOperand";
let ParserMatchClass = CCOutOperand;
+ let DecoderMethod = "DecodeCCOutOperand";
}
// ARM special operands for disassembly only.
//
+def SetEndAsmOperand : AsmOperandClass {
+ let Name = "SetEndImm";
+ let ParserMethod = "parseSetEndImm";
+}
def setend_op : Operand<i32> {
let PrintMethod = "printSetendOperand";
+ let ParserMatchClass = SetEndAsmOperand;
}
+def MSRMaskOperand : AsmOperandClass {
+ let Name = "MSRMask";
+ let ParserMethod = "parseMSRMaskOperand";
+}
def msr_mask : Operand<i32> {
let PrintMethod = "printMSRMaskOperand";
+ let DecoderMethod = "DecodeMSRMask";
let ParserMatchClass = MSRMaskOperand;
}
@@ -211,21 +203,40 @@ def msr_mask : Operand<i32> {
// 64 64 - <imm> is encoded in imm6<5:0>
def shr_imm8 : Operand<i32> {
let EncoderMethod = "getShiftRight8Imm";
+ let DecoderMethod = "DecodeShiftRight8Imm";
}
def shr_imm16 : Operand<i32> {
let EncoderMethod = "getShiftRight16Imm";
+ let DecoderMethod = "DecodeShiftRight16Imm";
}
def shr_imm32 : Operand<i32> {
let EncoderMethod = "getShiftRight32Imm";
+ let DecoderMethod = "DecodeShiftRight32Imm";
}
def shr_imm64 : Operand<i32> {
let EncoderMethod = "getShiftRight64Imm";
+ let DecoderMethod = "DecodeShiftRight64Imm";
}
//===----------------------------------------------------------------------===//
+// ARM Assembler alias templates.
+//
+class ARMInstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[IsARM]>;
+class tInstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[IsThumb]>;
+class t2InstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[IsThumb2]>;
+class VFP2InstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[HasVFP2]>;
+class VFP3InstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[HasVFP3]>;
+
+//===----------------------------------------------------------------------===//
// ARM Instruction templates.
//
+
class InstTemplate<AddrMode am, int sz, IndexMode im,
Format f, Domain d, string cstr, InstrItinClass itin>
: Instruction {
@@ -240,17 +251,22 @@ class InstTemplate<AddrMode am, int sz, IndexMode im,
Domain D = d;
bit isUnaryDataProc = 0;
bit canXformTo16Bit = 0;
+ // The instruction is a 16-bit flag setting Thumb instruction. Used
+ // by the parser to determine whether to require the 'S' suffix on the
+ // mnemonic (when not in an IT block) or preclude it (when in an IT block).
+ bit thumbArithFlagSetting = 0;
// If this is a pseudo instruction, mark it isCodeGenOnly.
let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
- // The layout of TSFlags should be kept in sync with ARMBaseInstrInfo.h.
+ // The layout of TSFlags should be kept in sync with ARMBaseInfo.h.
let TSFlags{4-0} = AM.Value;
let TSFlags{6-5} = IndexModeBits;
let TSFlags{12-7} = Form;
let TSFlags{13} = isUnaryDataProc;
let TSFlags{14} = canXformTo16Bit;
let TSFlags{17-15} = D.Value;
+ let TSFlags{18} = thumbArithFlagSetting;
let Constraints = cstr;
let Itinerary = itin;
@@ -262,13 +278,17 @@ class Encoding {
class InstARM<AddrMode am, int sz, IndexMode im,
Format f, Domain d, string cstr, InstrItinClass itin>
- : InstTemplate<am, sz, im, f, d, cstr, itin>, Encoding;
+ : InstTemplate<am, sz, im, f, d, cstr, itin>, Encoding {
+ let DecoderNamespace = "ARM";
+}
// This Encoding-less class is used by Thumb1 to specify the encoding bits later
// on by adding flavors to specific instructions.
class InstThumb<AddrMode am, int sz, IndexMode im,
Format f, Domain d, string cstr, InstrItinClass itin>
- : InstTemplate<am, sz, im, f, d, cstr, itin>;
+ : InstTemplate<am, sz, im, f, d, cstr, itin> {
+ let DecoderNamespace = "Thumb";
+}
class PseudoInst<dag oops, dag iops, InstrItinClass itin, list<dag> pattern>
: InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo,
@@ -426,11 +446,11 @@ class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
: I<oops, iops, AddrModeNone, 4, IndexModeNone, LdStExFrm, itin,
opc, asm, "", pattern> {
bits<4> Rt;
- bits<4> Rn;
+ bits<4> addr;
let Inst{27-23} = 0b00011;
let Inst{22-21} = opcod;
let Inst{20} = 1;
- let Inst{19-16} = Rn;
+ let Inst{19-16} = addr;
let Inst{15-12} = Rt;
let Inst{11-0} = 0b111110011111;
}
@@ -450,14 +470,14 @@ class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
let Inst{3-0} = Rt;
}
class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern>
- : AI<oops, iops, MiscFrm, NoItinerary, opc, "\t$Rt, $Rt2, [$Rn]", pattern> {
+ : AI<oops, iops, MiscFrm, NoItinerary, opc, "\t$Rt, $Rt2, $addr", pattern> {
bits<4> Rt;
bits<4> Rt2;
- bits<4> Rn;
+ bits<4> addr;
let Inst{27-23} = 0b00010;
let Inst{22} = b;
let Inst{21-20} = 0b00;
- let Inst{19-16} = Rn;
+ let Inst{19-16} = addr;
let Inst{15-12} = Rt;
let Inst{11-4} = 0b00001001;
let Inst{3-0} = Rt2;
@@ -515,22 +535,41 @@ class AI2ldstidx<bit isLd, bit isByte, bit isPre, dag oops, dag iops,
let Inst{20} = isLd; // L bit
let Inst{15-12} = Rt;
}
-class AI2stridx<bit isByte, bit isPre, dag oops, dag iops,
+class AI2stridx_reg<bit isByte, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
+ pattern> {
+ // AM2 store w/ two operands: (GPR, am2offset)
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> Rn;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
+ let Inst{19-16} = Rn;
+ let Inst{11-5} = offset{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = offset{3-0};
+}
+
+class AI2stridx_imm<bit isByte, bit isPre, dag oops, dag iops,
IndexMode im, Format f, InstrItinClass itin, string opc,
string asm, string cstr, list<dag> pattern>
: AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
pattern> {
// AM2 store w/ two operands: (GPR, am2offset)
- // {13} 1 == Rm, 0 == imm12
// {12} isAdd
// {11-0} imm12/Rm
bits<14> offset;
bits<4> Rn;
- let Inst{25} = offset{13};
+ let Inst{25} = 0;
let Inst{23} = offset{12};
let Inst{19-16} = Rn;
let Inst{11-0} = offset{11-0};
}
+
+
// FIXME: Merge with the above class when addrmode2 gets used for STR, STRB
// but for now use this class for STRT and STRBT.
class AI2stridxT<bit isByte, bit isPre, dag oops, dag iops,
@@ -568,9 +607,11 @@ class AI3ld<bits<4> op, bit op20, dag oops, dag iops, Format f,
let Inst{11-8} = addr{7-4}; // imm7_4/zero
let Inst{7-4} = op;
let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+
+ let DecoderMethod = "DecodeAddrMode3Instruction";
}
-class AI3ldstidx<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops,
+class AI3ldstidx<bits<4> op, bit op20, bit isPre, dag oops, dag iops,
IndexMode im, Format f, InstrItinClass itin, string opc,
string asm, string cstr, list<dag> pattern>
: I<oops, iops, AddrMode3, 4, im, f, itin,
@@ -586,48 +627,24 @@ class AI3ldstidx<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops,
// FIXME: Merge with the above class when addrmode2 gets used for LDR, LDRB
// but for now use this class for LDRSBT, LDRHT, LDSHT.
-class AI3ldstidxT<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops,
+class AI3ldstidxT<bits<4> op, bit isLoad, dag oops, dag iops,
IndexMode im, Format f, InstrItinClass itin, string opc,
string asm, string cstr, list<dag> pattern>
- : I<oops, iops, AddrMode3, 4, im, f, itin,
- opc, asm, cstr, pattern> {
+ : I<oops, iops, AddrMode3, 4, im, f, itin, opc, asm, cstr, pattern> {
// {13} 1 == imm8, 0 == Rm
// {12-9} Rn
// {8} isAdd
// {7-4} imm7_4/zero
// {3-0} imm3_0/Rm
- bits<14> addr;
- bits<4> Rt;
- let Inst{27-25} = 0b000;
- let Inst{24} = isPre; // P bit
- let Inst{23} = addr{8}; // U bit
- let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
- let Inst{20} = op20; // L bit
- let Inst{19-16} = addr{12-9}; // Rn
- let Inst{15-12} = Rt; // Rt
- let Inst{11-8} = addr{7-4}; // imm7_4/zero
- let Inst{7-4} = op;
- let Inst{3-0} = addr{3-0}; // imm3_0/Rm
- let AsmMatchConverter = "CvtLdWriteBackRegAddrMode3";
-}
-
-class AI3stridx<bits<4> op, bit isByte, bit isPre, dag oops, dag iops,
- IndexMode im, Format f, InstrItinClass itin, string opc,
- string asm, string cstr, list<dag> pattern>
- : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
- pattern> {
- // AM3 store w/ two operands: (GPR, am3offset)
- bits<14> offset;
+ bits<4> addr;
bits<4> Rt;
- bits<4> Rn;
let Inst{27-25} = 0b000;
- let Inst{23} = offset{8};
- let Inst{22} = offset{9};
- let Inst{19-16} = Rn;
+ let Inst{24} = 0; // P bit
+ let Inst{21} = 1;
+ let Inst{20} = isLoad; // L bit
+ let Inst{19-16} = addr; // Rn
let Inst{15-12} = Rt; // Rt
- let Inst{11-8} = offset{7-4}; // imm7_4/zero
let Inst{7-4} = op;
- let Inst{3-0} = offset{3-0}; // imm3_0/Rm
}
// stores
@@ -648,75 +665,7 @@ class AI3str<bits<4> op, dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{11-8} = addr{7-4}; // imm7_4/zero
let Inst{7-4} = op;
let Inst{3-0} = addr{3-0}; // imm3_0/Rm
-}
-
-// Pre-indexed stores
-class AI3sthpr<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : I<oops, iops, AddrMode3, 4, IndexModePre, f, itin,
- opc, asm, cstr, pattern> {
- let Inst{4} = 1;
- let Inst{5} = 1; // H bit
- let Inst{6} = 0; // S bit
- let Inst{7} = 1;
- let Inst{20} = 0; // L bit
- let Inst{21} = 1; // W bit
- let Inst{24} = 1; // P bit
- let Inst{27-25} = 0b000;
-}
-class AI3stdpr<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : I<oops, iops, AddrMode3, 4, IndexModePre, f, itin,
- opc, asm, cstr, pattern> {
- let Inst{4} = 1;
- let Inst{5} = 1; // H bit
- let Inst{6} = 1; // S bit
- let Inst{7} = 1;
- let Inst{20} = 0; // L bit
- let Inst{21} = 1; // W bit
- let Inst{24} = 1; // P bit
- let Inst{27-25} = 0b000;
-}
-
-// Post-indexed stores
-class AI3sthpo<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : I<oops, iops, AddrMode3, 4, IndexModePost, f, itin,
- opc, asm, cstr,pattern> {
- // {13} 1 == imm8, 0 == Rm
- // {12-9} Rn
- // {8} isAdd
- // {7-4} imm7_4/zero
- // {3-0} imm3_0/Rm
- bits<14> addr;
- bits<4> Rt;
- let Inst{3-0} = addr{3-0}; // imm3_0/Rm
- let Inst{4} = 1;
- let Inst{5} = 1; // H bit
- let Inst{6} = 0; // S bit
- let Inst{7} = 1;
- let Inst{11-8} = addr{7-4}; // imm7_4/zero
- let Inst{15-12} = Rt; // Rt
- let Inst{19-16} = addr{12-9}; // Rn
- let Inst{20} = 0; // L bit
- let Inst{21} = 0; // W bit
- let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
- let Inst{23} = addr{8}; // U bit
- let Inst{24} = 0; // P bit
- let Inst{27-25} = 0b000;
-}
-class AI3stdpo<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : I<oops, iops, AddrMode3, 4, IndexModePost, f, itin,
- opc, asm, cstr, pattern> {
- let Inst{4} = 1;
- let Inst{5} = 1; // H bit
- let Inst{6} = 1; // S bit
- let Inst{7} = 1;
- let Inst{20} = 0; // L bit
- let Inst{21} = 0; // W bit
- let Inst{24} = 0; // P bit
- let Inst{27-25} = 0b000;
+ let DecoderMethod = "DecodeAddrMode3Instruction";
}
// addrmode4 instructions
@@ -843,6 +792,23 @@ class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops,
}
// PKH instructions
+def PKHLSLAsmOperand : AsmOperandClass {
+ let Name = "PKHLSLImm";
+ let ParserMethod = "parsePKHLSLImm";
+}
+def pkh_lsl_amt: Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 32; }]>{
+ let PrintMethod = "printPKHLSLShiftImm";
+ let ParserMatchClass = PKHLSLAsmOperand;
+}
+def PKHASRAsmOperand : AsmOperandClass {
+ let Name = "PKHASRImm";
+ let ParserMethod = "parsePKHASRImm";
+}
+def pkh_asr_amt: Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 32; }]>{
+ let PrintMethod = "printPKHASRShiftImm";
+ let ParserMatchClass = PKHASRAsmOperand;
+}
+
class APKHI<bits<8> opcod, bit tb, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin,
@@ -850,11 +816,11 @@ class APKHI<bits<8> opcod, bit tb, dag oops, dag iops, InstrItinClass itin,
bits<4> Rd;
bits<4> Rn;
bits<4> Rm;
- bits<8> sh;
+ bits<5> sh;
let Inst{27-20} = opcod;
let Inst{19-16} = Rn;
let Inst{15-12} = Rd;
- let Inst{11-7} = sh{7-3};
+ let Inst{11-7} = sh;
let Inst{6} = tb;
let Inst{5-4} = 0b01;
let Inst{3-0} = Rm;
@@ -949,7 +915,9 @@ class Thumb1sI<dag oops, dag iops, AddrMode am, int sz,
let InOperandList = !con(iops, (ins pred:$p));
let AsmString = !strconcat(opc, "${s}${p}", asm);
let Pattern = pattern;
+ let thumbArithFlagSetting = 1;
list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+ let DecoderNamespace = "ThumbSBit";
}
class T1sI<dag oops, dag iops, InstrItinClass itin,
@@ -1071,6 +1039,7 @@ class Thumb2I<dag oops, dag iops, AddrMode am, int sz,
let AsmString = !strconcat(opc, "${p}", asm);
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb2];
+ let DecoderNamespace = "Thumb2";
}
// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as an
@@ -1091,6 +1060,7 @@ class Thumb2sI<dag oops, dag iops, AddrMode am, int sz,
let AsmString = !strconcat(opc, "${s}${p}", asm);
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb2];
+ let DecoderNamespace = "Thumb2";
}
// Special cases
@@ -1103,6 +1073,7 @@ class Thumb2XI<dag oops, dag iops, AddrMode am, int sz,
let AsmString = asm;
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb2];
+ let DecoderNamespace = "Thumb2";
}
class ThumbXI<dag oops, dag iops, AddrMode am, int sz,
@@ -1114,6 +1085,7 @@ class ThumbXI<dag oops, dag iops, AddrMode am, int sz,
let AsmString = asm;
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+ let DecoderNamespace = "Thumb";
}
class T2I<dag oops, dag iops, InstrItinClass itin,
@@ -1132,8 +1104,8 @@ class T2Ipc<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: Thumb2I<oops, iops, AddrModeT2_pc, 4, itin, opc, asm, "", pattern>;
class T2Ii8s4<bit P, bit W, bit isLoad, dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : Thumb2I<oops, iops, AddrModeT2_i8s4, 4, itin, opc, asm, "",
+ string opc, string asm, string cstr, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_i8s4, 4, itin, opc, asm, cstr,
pattern> {
bits<4> Rt;
bits<4> Rt2;
@@ -1149,6 +1121,26 @@ class T2Ii8s4<bit P, bit W, bit isLoad, dag oops, dag iops, InstrItinClass itin,
let Inst{11-8} = Rt2{3-0};
let Inst{7-0} = addr{7-0};
}
+class T2Ii8s4post<bit P, bit W, bit isLoad, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, string cstr,
+ list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_i8s4, 4, itin, opc, asm, cstr,
+ pattern> {
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> addr;
+ bits<9> imm;
+ let Inst{31-25} = 0b1110100;
+ let Inst{24} = P;
+ let Inst{23} = imm{8};
+ let Inst{22} = 1;
+ let Inst{21} = W;
+ let Inst{20} = isLoad;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt{3-0};
+ let Inst{11-8} = Rt2{3-0};
+ let Inst{7-0} = imm{7-0};
+}
class T2sI<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
@@ -1172,8 +1164,8 @@ class T2XIt<dag oops, dag iops, InstrItinClass itin,
string asm, string cstr, list<dag> pattern>
: Thumb2XI<oops, iops, AddrModeNone, 4, itin, asm, cstr, pattern>;
-// T2Iidxldst - Thumb2 indexed load / store instructions.
-class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre,
+// T2Ipreldst - Thumb2 pre-indexed load / store instructions.
+class T2Ipreldst<bit signed, bits<2> opcod, bit load, bit pre,
dag oops, dag iops,
AddrMode am, IndexMode im, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
@@ -1183,25 +1175,60 @@ class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre,
let AsmString = !strconcat(opc, "${p}", asm);
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb2];
+ let DecoderNamespace = "Thumb2";
+
+ bits<4> Rt;
+ bits<13> addr;
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
let Inst{24} = signed;
let Inst{23} = 0;
let Inst{22-21} = opcod;
let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = Rt{3-0};
let Inst{11} = 1;
// (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed
let Inst{10} = pre; // The P bit.
+ let Inst{9} = addr{8}; // Sign bit
let Inst{8} = 1; // The W bit.
+ let Inst{7-0} = addr{7-0};
- bits<9> addr;
- let Inst{7-0} = addr{7-0};
- let Inst{9} = addr{8}; // Sign bit
+ let DecoderMethod = "DecodeT2LdStPre";
+}
+
+// T2Ipostldst - Thumb2 post-indexed load / store instructions.
+class T2Ipostldst<bit signed, bits<2> opcod, bit load, bit pre,
+ dag oops, dag iops,
+ AddrMode am, IndexMode im, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : InstARM<am, 4, im, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let AsmString = !strconcat(opc, "${p}", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb2];
+ let DecoderNamespace = "Thumb2";
bits<4> Rt;
bits<4> Rn;
+ bits<9> offset;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
+ let Inst{22-21} = opcod;
+ let Inst{20} = load;
+ let Inst{19-16} = Rn;
let Inst{15-12} = Rt{3-0};
- let Inst{19-16} = Rn{3-0};
+ let Inst{11} = 1;
+ // (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed
+ let Inst{10} = pre; // The P bit.
+ let Inst{9} = offset{8}; // Sign bit
+ let Inst{8} = 1; // The W bit.
+ let Inst{7-0} = offset{7-0};
+
+ let DecoderMethod = "DecodeT2LdStPre";
}
// Tv5Pat - Same as Pat<>, but requires V5T Thumb mode.
@@ -1242,6 +1269,7 @@ class VFPI<dag oops, dag iops, AddrMode am, int sz,
let AsmString = !strconcat(opc, "${p}", asm);
let Pattern = pattern;
let PostEncoderMethod = "VFPThumb2PostEncoder";
+ let DecoderNamespace = "VFP";
list<Predicate> Predicates = [HasVFP2];
}
@@ -1257,6 +1285,7 @@ class VFPXI<dag oops, dag iops, AddrMode am, int sz,
let AsmString = asm;
let Pattern = pattern;
let PostEncoderMethod = "VFPThumb2PostEncoder";
+ let DecoderNamespace = "VFP";
list<Predicate> Predicates = [HasVFP2];
}
@@ -1574,6 +1603,7 @@ class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm);
let Pattern = pattern;
list<Predicate> Predicates = [HasNEON];
+ let DecoderNamespace = "NEON";
}
// Same as NeonI except it does not have a "data type" specifier.
@@ -1586,6 +1616,7 @@ class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
let AsmString = !strconcat(opc, "${p}", "\t", asm);
let Pattern = pattern;
list<Predicate> Predicates = [HasNEON];
+ let DecoderNamespace = "NEON";
}
class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
@@ -1600,6 +1631,7 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
let Inst{7-4} = op7_4;
let PostEncoderMethod = "NEONThumb2LoadStorePostEncoder";
+ let DecoderNamespace = "NEONLoadStore";
bits<5> Vd;
bits<6> Rn;
@@ -1643,6 +1675,7 @@ class NDataI<dag oops, dag iops, Format f, InstrItinClass itin,
pattern> {
let Inst{31-25} = 0b1111001;
let PostEncoderMethod = "NEONThumb2DataIPostEncoder";
+ let DecoderNamespace = "NEONData";
}
class NDataXI<dag oops, dag iops, Format f, InstrItinClass itin,
@@ -1651,6 +1684,7 @@ class NDataXI<dag oops, dag iops, Format f, InstrItinClass itin,
cstr, pattern> {
let Inst{31-25} = 0b1111001;
let PostEncoderMethod = "NEONThumb2DataIPostEncoder";
+ let DecoderNamespace = "NEONData";
}
// NEON "one register and a modified immediate" format.
@@ -1677,6 +1711,7 @@ class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6,
let Inst{24} = SIMM{7};
let Inst{18-16} = SIMM{6-4};
let Inst{3-0} = SIMM{3-0};
+ let DecoderMethod = "DecodeNEONModImmInstruction";
}
// NEON 2 vector register format.
@@ -1874,6 +1909,7 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
list<Predicate> Predicates = [HasNEON];
let PostEncoderMethod = "NEONThumb2DupPostEncoder";
+ let DecoderNamespace = "NEONDup";
bits<5> V;
bits<4> R;
@@ -1915,7 +1951,6 @@ class NVDupLane<bits<4> op19_16, bit op6, dag oops, dag iops,
bits<5> Vd;
bits<5> Vm;
- bits<4> lane;
let Inst{22} = Vd{4};
let Inst{15-12} = Vd{3-0};
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index adcbf18..48da03f 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -13,8 +13,8 @@
#include "ARMInstrInfo.h"
#include "ARM.h"
-#include "ARMAddressingModes.h"
#include "ARMMachineFunctionInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -30,14 +30,18 @@ ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
switch (Opc) {
default: break;
- case ARM::LDR_PRE:
- case ARM::LDR_POST:
+ case ARM::LDR_PRE_IMM:
+ case ARM::LDR_PRE_REG:
+ case ARM::LDR_POST_IMM:
+ case ARM::LDR_POST_REG:
return ARM::LDRi12;
case ARM::LDRH_PRE:
case ARM::LDRH_POST:
return ARM::LDRH;
- case ARM::LDRB_PRE:
- case ARM::LDRB_POST:
+ case ARM::LDRB_PRE_IMM:
+ case ARM::LDRB_PRE_REG:
+ case ARM::LDRB_POST_IMM:
+ case ARM::LDRB_POST_REG:
return ARM::LDRBi12;
case ARM::LDRSH_PRE:
case ARM::LDRSH_POST:
@@ -45,14 +49,18 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
case ARM::LDRSB_PRE:
case ARM::LDRSB_POST:
return ARM::LDRSB;
- case ARM::STR_PRE:
- case ARM::STR_POST:
+ case ARM::STR_PRE_IMM:
+ case ARM::STR_PRE_REG:
+ case ARM::STR_POST_IMM:
+ case ARM::STR_POST_REG:
return ARM::STRi12;
case ARM::STRH_PRE:
case ARM::STRH_POST:
return ARM::STRH;
- case ARM::STRB_PRE:
- case ARM::STRB_POST:
+ case ARM::STRB_PRE_IMM:
+ case ARM::STRB_PRE_REG:
+ case ARM::STRB_POST_IMM:
+ case ARM::STRB_POST_REG:
return ARM::STRBi12;
}
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index a42dd1a..2cf0f09 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -70,6 +70,18 @@ def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
+ [SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>, SDTCisVT<1, i32>]>;
+
+// SDTBinaryArithWithFlagsInOut - RES1, CPSR = op LHS, RHS, CPSR
+def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
+ [SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>,
+ SDTCisVT<1, i32>,
+ SDTCisVT<4, i32>]>;
// Node definitions.
def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
def ARMWrapperDYN : SDNode<"ARMISD::WrapperDYN", SDTIntUnaryOp>;
@@ -120,6 +132,12 @@ def ARMsrl_flag : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInGlue ]>;
+def ARMaddc : SDNode<"ARMISD::ADDC", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def ARMsubc : SDNode<"ARMISD::SUBC", SDTBinaryArithWithFlags>;
+def ARMadde : SDNode<"ARMISD::ADDE", SDTBinaryArithWithFlagsInOut>;
+def ARMsube : SDNode<"ARMISD::SUBE", SDTBinaryArithWithFlagsInOut>;
+
def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>;
def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>;
@@ -187,10 +205,16 @@ def IsThumb : Predicate<"Subtarget->isThumb()">,
def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
def IsThumb2 : Predicate<"Subtarget->isThumb2()">,
AssemblerPredicate<"ModeThumb,FeatureThumb2">;
+def IsMClass : Predicate<"Subtarget->isMClass()">,
+ AssemblerPredicate<"FeatureMClass">;
+def IsARClass : Predicate<"!Subtarget->isMClass()">,
+ AssemblerPredicate<"!FeatureMClass">;
def IsARM : Predicate<"!Subtarget->isThumb()">,
AssemblerPredicate<"!ModeThumb">;
def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">;
def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
+def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">,
+ AssemblerPredicate<"ModeNaCl">;
// FIXME: Eventually this will be just "hasV6T2Ops".
def UseMovt : Predicate<"Subtarget->useMovt()">;
@@ -263,24 +287,11 @@ def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm0_65535AsmOperand;
}
+class BinOpWithFlagFrag<dag res> :
+ PatFrag<(ops node:$LHS, node:$RHS, node:$FLAG), res>;
class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>;
-/// adde and sube predicates - True based on whether the carry flag output
-/// will be needed or not.
-def adde_dead_carry :
- PatFrag<(ops node:$LHS, node:$RHS), (adde node:$LHS, node:$RHS),
- [{return !N->hasAnyUseOfValue(1);}]>;
-def sube_dead_carry :
- PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS),
- [{return !N->hasAnyUseOfValue(1);}]>;
-def adde_live_carry :
- PatFrag<(ops node:$LHS, node:$RHS), (adde node:$LHS, node:$RHS),
- [{return N->hasAnyUseOfValue(1);}]>;
-def sube_live_carry :
- PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS),
- [{return N->hasAnyUseOfValue(1);}]>;
-
// An 'and' node with a single use.
def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
return N->hasOneUse();
@@ -315,6 +326,7 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
def brtarget : Operand<OtherVT> {
let EncoderMethod = "getBranchTargetOpValue";
let OperandType = "OPERAND_PCREL";
+ let DecoderMethod = "DecodeT2BROperand";
}
// FIXME: get rid of this one?
@@ -345,39 +357,35 @@ def bl_target : Operand<i32> {
let OperandType = "OPERAND_PCREL";
}
-
-// A list of registers separated by comma. Used by load/store multiple.
-def RegListAsmOperand : AsmOperandClass {
- let Name = "RegList";
- let SuperClasses = [];
-}
-
-def DPRRegListAsmOperand : AsmOperandClass {
- let Name = "DPRRegList";
- let SuperClasses = [];
-}
-
-def SPRRegListAsmOperand : AsmOperandClass {
- let Name = "SPRRegList";
- let SuperClasses = [];
+def blx_target : Operand<i32> {
+ // Encoded the same as branch targets.
+ let EncoderMethod = "getARMBLXTargetOpValue";
+ let OperandType = "OPERAND_PCREL";
}
+// A list of registers separated by comma. Used by load/store multiple.
+def RegListAsmOperand : AsmOperandClass { let Name = "RegList"; }
def reglist : Operand<i32> {
let EncoderMethod = "getRegisterListOpValue";
let ParserMatchClass = RegListAsmOperand;
let PrintMethod = "printRegisterList";
+ let DecoderMethod = "DecodeRegListOperand";
}
+def DPRRegListAsmOperand : AsmOperandClass { let Name = "DPRRegList"; }
def dpr_reglist : Operand<i32> {
let EncoderMethod = "getRegisterListOpValue";
let ParserMatchClass = DPRRegListAsmOperand;
let PrintMethod = "printRegisterList";
+ let DecoderMethod = "DecodeDPRRegListOperand";
}
+def SPRRegListAsmOperand : AsmOperandClass { let Name = "SPRRegList"; }
def spr_reglist : Operand<i32> {
let EncoderMethod = "getRegisterListOpValue";
let ParserMatchClass = SPRRegListAsmOperand;
let PrintMethod = "printRegisterList";
+ let DecoderMethod = "DecodeSPRRegListOperand";
}
// An operand for the CONSTPOOL_ENTRY pseudo-instruction.
@@ -397,56 +405,99 @@ def adrlabel : Operand<i32> {
def neon_vcvt_imm32 : Operand<i32> {
let EncoderMethod = "getNEONVcvtImm32OpValue";
+ let DecoderMethod = "DecodeVCVTImmOperand";
}
// rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24.
-def rot_imm : Operand<i32>, ImmLeaf<i32, [{
- int32_t v = (int32_t)Imm;
- return v == 8 || v == 16 || v == 24; }]> {
- let EncoderMethod = "getRotImmOpValue";
+def rot_imm_XFORM: SDNodeXForm<imm, [{
+ switch (N->getZExtValue()){
+ default: assert(0);
+ case 0: return CurDAG->getTargetConstant(0, MVT::i32);
+ case 8: return CurDAG->getTargetConstant(1, MVT::i32);
+ case 16: return CurDAG->getTargetConstant(2, MVT::i32);
+ case 24: return CurDAG->getTargetConstant(3, MVT::i32);
+ }
+}]>;
+def RotImmAsmOperand : AsmOperandClass {
+ let Name = "RotImm";
+ let ParserMethod = "parseRotImm";
}
-
-def ShifterAsmOperand : AsmOperandClass {
- let Name = "Shifter";
- let SuperClasses = [];
+def rot_imm : Operand<i32>, PatLeaf<(i32 imm), [{
+ int32_t v = N->getZExtValue();
+ return v == 8 || v == 16 || v == 24; }],
+ rot_imm_XFORM> {
+ let PrintMethod = "printRotImmOperand";
+ let ParserMatchClass = RotImmAsmOperand;
}
// shift_imm: An integer that encodes a shift amount and the type of shift
-// (currently either asr or lsl) using the same encoding used for the
-// immediates in so_reg operands.
+// (asr or lsl). The 6-bit immediate encodes as:
+// {5} 0 ==> lsl
+// 1 asr
+// {4-0} imm5 shift amount.
+// asr #32 encoded as imm5 == 0.
+def ShifterImmAsmOperand : AsmOperandClass {
+ let Name = "ShifterImm";
+ let ParserMethod = "parseShifterImm";
+}
def shift_imm : Operand<i32> {
let PrintMethod = "printShiftImmOperand";
- let ParserMatchClass = ShifterAsmOperand;
+ let ParserMatchClass = ShifterImmAsmOperand;
}
-def ShiftedRegAsmOperand : AsmOperandClass {
- let Name = "ShiftedReg";
+// shifter_operand operands: so_reg_reg, so_reg_imm, and so_imm.
+def ShiftedRegAsmOperand : AsmOperandClass { let Name = "RegShiftedReg"; }
+def so_reg_reg : Operand<i32>, // reg reg imm
+ ComplexPattern<i32, 3, "SelectRegShifterOperand",
+ [shl, srl, sra, rotr]> {
+ let EncoderMethod = "getSORegRegOpValue";
+ let PrintMethod = "printSORegRegOperand";
+ let DecoderMethod = "DecodeSORegRegOperand";
+ let ParserMatchClass = ShiftedRegAsmOperand;
+ let MIOperandInfo = (ops GPRnopc, GPRnopc, i32imm);
}
-// shifter_operand operands: so_reg and so_imm.
-def so_reg : Operand<i32>, // reg reg imm
- ComplexPattern<i32, 3, "SelectShifterOperandReg",
- [shl,srl,sra,rotr]> {
- let EncoderMethod = "getSORegOpValue";
- let PrintMethod = "printSORegOperand";
- let ParserMatchClass = ShiftedRegAsmOperand;
- let MIOperandInfo = (ops GPR, GPR, shift_imm);
+def ShiftedImmAsmOperand : AsmOperandClass { let Name = "RegShiftedImm"; }
+def so_reg_imm : Operand<i32>, // reg imm
+ ComplexPattern<i32, 2, "SelectImmShifterOperand",
+ [shl, srl, sra, rotr]> {
+ let EncoderMethod = "getSORegImmOpValue";
+ let PrintMethod = "printSORegImmOperand";
+ let DecoderMethod = "DecodeSORegImmOperand";
+ let ParserMatchClass = ShiftedImmAsmOperand;
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// FIXME: Does this need to be distinct from so_reg?
+def shift_so_reg_reg : Operand<i32>, // reg reg imm
+ ComplexPattern<i32, 3, "SelectShiftRegShifterOperand",
+ [shl,srl,sra,rotr]> {
+ let EncoderMethod = "getSORegRegOpValue";
+ let PrintMethod = "printSORegRegOperand";
+ let DecoderMethod = "DecodeSORegRegOperand";
+ let MIOperandInfo = (ops GPR, GPR, i32imm);
}
+
// FIXME: Does this need to be distinct from so_reg?
-def shift_so_reg : Operand<i32>, // reg reg imm
- ComplexPattern<i32, 3, "SelectShiftShifterOperandReg",
+def shift_so_reg_imm : Operand<i32>, // reg reg imm
+ ComplexPattern<i32, 2, "SelectShiftImmShifterOperand",
[shl,srl,sra,rotr]> {
- let EncoderMethod = "getSORegOpValue";
- let PrintMethod = "printSORegOperand";
- let MIOperandInfo = (ops GPR, GPR, shift_imm);
+ let EncoderMethod = "getSORegImmOpValue";
+ let PrintMethod = "printSORegImmOperand";
+ let DecoderMethod = "DecodeSORegImmOperand";
+ let MIOperandInfo = (ops GPR, i32imm);
}
+
// so_imm - Match a 32-bit shifter_operand immediate operand, which is an
// 8-bit immediate rotated by an arbitrary number of bits.
+def SOImmAsmOperand: AsmOperandClass { let Name = "ARMSOImm"; }
def so_imm : Operand<i32>, ImmLeaf<i32, [{
return ARM_AM::getSOImmVal(Imm) != -1;
}]> {
let EncoderMethod = "getSOImmOpValue";
+ let ParserMatchClass = SOImmAsmOperand;
+ let DecoderMethod = "DecodeSOImmOperand";
}
// Break so_imm's up into two pieces. This handles immediates with up to 16
@@ -464,7 +515,7 @@ def arm_i32imm : PatLeaf<(imm), [{
return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
}]>;
-/// imm0_7 predicate - Immediate in the range [0,31].
+/// imm0_7 predicate - Immediate in the range [0,7].
def Imm0_7AsmOperand: AsmOperandClass { let Name = "Imm0_7"; }
def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 8;
@@ -472,7 +523,7 @@ def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm0_7AsmOperand;
}
-/// imm0_15 predicate - Immediate in the range [0,31].
+/// imm0_15 predicate - Immediate in the range [0,15].
def Imm0_15AsmOperand: AsmOperandClass { let Name = "Imm0_15"; }
def imm0_15 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 16;
@@ -481,68 +532,83 @@ def imm0_15 : Operand<i32>, ImmLeaf<i32, [{
}
/// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31].
+def Imm0_31AsmOperand: AsmOperandClass { let Name = "Imm0_31"; }
def imm0_31 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 32;
-}]>;
-
-/// imm0_31_m1 - Matches and prints like imm0_31, but encodes as 'value - 1'.
-def imm0_31_m1 : Operand<i32>, ImmLeaf<i32, [{
- return Imm >= 0 && Imm < 32;
}]> {
- let EncoderMethod = "getImmMinusOneOpValue";
+ let ParserMatchClass = Imm0_31AsmOperand;
+}
+
+/// imm0_255 predicate - Immediate in the range [0,255].
+def Imm0_255AsmOperand : AsmOperandClass { let Name = "Imm0_255"; }
+def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> {
+ let ParserMatchClass = Imm0_255AsmOperand;
}
-// i32imm_hilo16 - For movt/movw - sets the MC Encoder method.
-// The imm is split into imm{15-12}, imm{11-0}
+// imm0_65535_expr - For movt/movw - 16-bit immediate that can also reference
+// a relocatable expression.
//
-def i32imm_hilo16 : Operand<i32> {
+// FIXME: This really needs a Thumb version separate from the ARM version.
+// While the range is the same, and can thus use the same match class,
+// the encoding is different so it should have a different encoder method.
+def Imm0_65535ExprAsmOperand: AsmOperandClass { let Name = "Imm0_65535Expr"; }
+def imm0_65535_expr : Operand<i32> {
let EncoderMethod = "getHiLo16ImmOpValue";
+ let ParserMatchClass = Imm0_65535ExprAsmOperand;
}
+/// imm24b - True if the 32-bit immediate is encodable in 24 bits.
+def Imm24bitAsmOperand: AsmOperandClass { let Name = "Imm24bit"; }
+def imm24b : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm <= 0xffffff;
+}]> {
+ let ParserMatchClass = Imm24bitAsmOperand;
+}
+
+
/// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield
/// e.g., 0xf000ffff
+def BitfieldAsmOperand : AsmOperandClass {
+ let Name = "Bitfield";
+ let ParserMethod = "parseBitfield";
+}
def bf_inv_mask_imm : Operand<i32>,
PatLeaf<(imm), [{
return ARM::isBitFieldInvertedMask(N->getZExtValue());
}] > {
let EncoderMethod = "getBitfieldInvertedMaskOpValue";
let PrintMethod = "printBitfieldInvMaskImmOperand";
+ let DecoderMethod = "DecodeBitfieldMaskOperand";
+ let ParserMatchClass = BitfieldAsmOperand;
}
-/// lsb_pos_imm - position of the lsb bit, used by BFI4p and t2BFI4p
-def lsb_pos_imm : Operand<i32>, ImmLeaf<i32, [{
- return isInt<5>(Imm);
+def imm1_32_XFORM: SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((int)N->getZExtValue() - 1, MVT::i32);
}]>;
-
-/// width_imm - number of bits to be copied, used by BFI4p and t2BFI4p
-def width_imm : Operand<i32>, ImmLeaf<i32, [{
- return Imm > 0 && Imm <= 32;
-}] > {
- let EncoderMethod = "getMsbOpValue";
-}
-
-def ssat_imm : Operand<i32>, ImmLeaf<i32, [{
- return Imm > 0 && Imm <= 32;
-}]> {
- let EncoderMethod = "getSsatBitPosValue";
+def Imm1_32AsmOperand: AsmOperandClass { let Name = "Imm1_32"; }
+def imm1_32 : Operand<i32>, PatLeaf<(imm), [{
+ uint64_t Imm = N->getZExtValue();
+ return Imm > 0 && Imm <= 32;
+ }],
+ imm1_32_XFORM> {
+ let PrintMethod = "printImmPlusOneOperand";
+ let ParserMatchClass = Imm1_32AsmOperand;
+}
+
+def imm1_16_XFORM: SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((int)N->getZExtValue() - 1, MVT::i32);
+}]>;
+def Imm1_16AsmOperand: AsmOperandClass { let Name = "Imm1_16"; }
+def imm1_16 : Operand<i32>, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }],
+ imm1_16_XFORM> {
+ let PrintMethod = "printImmPlusOneOperand";
+ let ParserMatchClass = Imm1_16AsmOperand;
}
// Define ARM specific addressing modes.
-
-def MemMode2AsmOperand : AsmOperandClass {
- let Name = "MemMode2";
- let SuperClasses = [];
- let ParserMethod = "tryParseMemMode2Operand";
-}
-
-def MemMode3AsmOperand : AsmOperandClass {
- let Name = "MemMode3";
- let SuperClasses = [];
- let ParserMethod = "tryParseMemMode3Operand";
-}
-
// addrmode_imm12 := reg +/- imm12
//
+def MemImm12OffsetAsmOperand : AsmOperandClass { let Name = "MemImm12Offset"; }
def addrmode_imm12 : Operand<i32>,
ComplexPattern<i32, 2, "SelectAddrModeImm12", []> {
// 12-bit immediate operand. Note that instructions using this encode
@@ -551,53 +617,129 @@ def addrmode_imm12 : Operand<i32>,
let EncoderMethod = "getAddrModeImm12OpValue";
let PrintMethod = "printAddrModeImm12Operand";
+ let DecoderMethod = "DecodeAddrModeImm12Operand";
+ let ParserMatchClass = MemImm12OffsetAsmOperand;
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
}
// ldst_so_reg := reg +/- reg shop imm
//
+def MemRegOffsetAsmOperand : AsmOperandClass { let Name = "MemRegOffset"; }
def ldst_so_reg : Operand<i32>,
ComplexPattern<i32, 3, "SelectLdStSOReg", []> {
let EncoderMethod = "getLdStSORegOpValue";
// FIXME: Simplify the printer
let PrintMethod = "printAddrMode2Operand";
- let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+ let DecoderMethod = "DecodeSORegMemOperand";
+ let ParserMatchClass = MemRegOffsetAsmOperand;
+ let MIOperandInfo = (ops GPR:$base, GPRnopc:$offsreg, i32imm:$shift);
+}
+
+// postidx_imm8 := +/- [0,255]
+//
+// 9 bit value:
+// {8} 1 is imm8 is non-negative. 0 otherwise.
+// {7-0} [0,255] imm8 value.
+def PostIdxImm8AsmOperand : AsmOperandClass { let Name = "PostIdxImm8"; }
+def postidx_imm8 : Operand<i32> {
+ let PrintMethod = "printPostIdxImm8Operand";
+ let ParserMatchClass = PostIdxImm8AsmOperand;
+ let MIOperandInfo = (ops i32imm);
}
+// postidx_imm8s4 := +/- [0,1020]
+//
+// 9 bit value:
+// {8} 1 is imm8 is non-negative. 0 otherwise.
+// {7-0} [0,255] imm8 value, scaled by 4.
+def PostIdxImm8s4AsmOperand : AsmOperandClass { let Name = "PostIdxImm8s4"; }
+def postidx_imm8s4 : Operand<i32> {
+ let PrintMethod = "printPostIdxImm8s4Operand";
+ let ParserMatchClass = PostIdxImm8s4AsmOperand;
+ let MIOperandInfo = (ops i32imm);
+}
+
+
+// postidx_reg := +/- reg
+//
+def PostIdxRegAsmOperand : AsmOperandClass {
+ let Name = "PostIdxReg";
+ let ParserMethod = "parsePostIdxReg";
+}
+def postidx_reg : Operand<i32> {
+ let EncoderMethod = "getPostIdxRegOpValue";
+ let DecoderMethod = "DecodePostIdxReg";
+ let PrintMethod = "printPostIdxRegOperand";
+ let ParserMatchClass = PostIdxRegAsmOperand;
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+
// addrmode2 := reg +/- imm12
// := reg +/- reg shop imm
//
+// FIXME: addrmode2 should be refactored the rest of the way to always
+// use explicit imm vs. reg versions above (addrmode_imm12 and ldst_so_reg).
+def AddrMode2AsmOperand : AsmOperandClass { let Name = "AddrMode2"; }
def addrmode2 : Operand<i32>,
ComplexPattern<i32, 3, "SelectAddrMode2", []> {
let EncoderMethod = "getAddrMode2OpValue";
let PrintMethod = "printAddrMode2Operand";
- let ParserMatchClass = MemMode2AsmOperand;
+ let ParserMatchClass = AddrMode2AsmOperand;
let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
}
-def am2offset : Operand<i32>,
- ComplexPattern<i32, 2, "SelectAddrMode2Offset",
+def PostIdxRegShiftedAsmOperand : AsmOperandClass {
+ let Name = "PostIdxRegShifted";
+ let ParserMethod = "parsePostIdxReg";
+}
+def am2offset_reg : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode2OffsetReg",
+ [], [SDNPWantRoot]> {
+ let EncoderMethod = "getAddrMode2OffsetOpValue";
+ let PrintMethod = "printAddrMode2OffsetOperand";
+ // When using this for assembly, it's always as a post-index offset.
+ let ParserMatchClass = PostIdxRegShiftedAsmOperand;
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// FIXME: am2offset_imm should only need the immediate, not the GPR. Having
+// the GPR is purely vestigal at this point.
+def AM2OffsetImmAsmOperand : AsmOperandClass { let Name = "AM2OffsetImm"; }
+def am2offset_imm : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode2OffsetImm",
[], [SDNPWantRoot]> {
let EncoderMethod = "getAddrMode2OffsetOpValue";
let PrintMethod = "printAddrMode2OffsetOperand";
+ let ParserMatchClass = AM2OffsetImmAsmOperand;
let MIOperandInfo = (ops GPR, i32imm);
}
+
// addrmode3 := reg +/- reg
// addrmode3 := reg +/- imm8
//
+// FIXME: split into imm vs. reg versions.
+def AddrMode3AsmOperand : AsmOperandClass { let Name = "AddrMode3"; }
def addrmode3 : Operand<i32>,
ComplexPattern<i32, 3, "SelectAddrMode3", []> {
let EncoderMethod = "getAddrMode3OpValue";
let PrintMethod = "printAddrMode3Operand";
- let ParserMatchClass = MemMode3AsmOperand;
+ let ParserMatchClass = AddrMode3AsmOperand;
let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
}
+// FIXME: split into imm vs. reg versions.
+// FIXME: parser method to handle +/- register.
+def AM3OffsetAsmOperand : AsmOperandClass {
+ let Name = "AM3Offset";
+ let ParserMethod = "parseAM3Offset";
+}
def am3offset : Operand<i32>,
ComplexPattern<i32, 2, "SelectAddrMode3Offset",
[], [SDNPWantRoot]> {
let EncoderMethod = "getAddrMode3OffsetOpValue";
let PrintMethod = "printAddrMode3OffsetOperand";
+ let ParserMatchClass = AM3OffsetAsmOperand;
let MIOperandInfo = (ops GPR, i32imm);
}
@@ -608,28 +750,28 @@ def ldstm_mode : OptionalDefOperand<OtherVT, (ops i32), (ops (i32 1))> {
let PrintMethod = "printLdStmModeOperand";
}
-def MemMode5AsmOperand : AsmOperandClass {
- let Name = "MemMode5";
- let SuperClasses = [];
-}
-
// addrmode5 := reg +/- imm8*4
//
+def AddrMode5AsmOperand : AsmOperandClass { let Name = "AddrMode5"; }
def addrmode5 : Operand<i32>,
ComplexPattern<i32, 2, "SelectAddrMode5", []> {
let PrintMethod = "printAddrMode5Operand";
- let MIOperandInfo = (ops GPR:$base, i32imm);
- let ParserMatchClass = MemMode5AsmOperand;
let EncoderMethod = "getAddrMode5OpValue";
+ let DecoderMethod = "DecodeAddrMode5Operand";
+ let ParserMatchClass = AddrMode5AsmOperand;
+ let MIOperandInfo = (ops GPR:$base, i32imm);
}
// addrmode6 := reg with optional alignment
//
+def AddrMode6AsmOperand : AsmOperandClass { let Name = "AlignedMemory"; }
def addrmode6 : Operand<i32>,
ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
let PrintMethod = "printAddrMode6Operand";
- let MIOperandInfo = (ops GPR:$addr, i32imm);
+ let MIOperandInfo = (ops GPR:$addr, i32imm:$align);
let EncoderMethod = "getAddrMode6AddressOpValue";
+ let DecoderMethod = "DecodeAddrMode6Operand";
+ let ParserMatchClass = AddrMode6AsmOperand;
}
def am6offset : Operand<i32>,
@@ -638,6 +780,7 @@ def am6offset : Operand<i32>,
let PrintMethod = "printAddrMode6OffsetOperand";
let MIOperandInfo = (ops GPR);
let EncoderMethod = "getAddrMode6OffsetOpValue";
+ let DecoderMethod = "DecodeGPRRegisterClass";
}
// Special version of addrmode6 to handle alignment encoding for VST1/VLD1
@@ -666,19 +809,15 @@ def addrmodepc : Operand<i32>,
let MIOperandInfo = (ops GPR, i32imm);
}
-def MemMode7AsmOperand : AsmOperandClass {
- let Name = "MemMode7";
- let SuperClasses = [];
-}
-
-// addrmode7 := reg
-// Used by load/store exclusive instructions. Useful to enable right assembly
-// parsing and printing. Not used for any codegen matching.
+// addr_offset_none := reg
//
-def addrmode7 : Operand<i32> {
+def MemNoOffsetAsmOperand : AsmOperandClass { let Name = "MemNoOffset"; }
+def addr_offset_none : Operand<i32>,
+ ComplexPattern<i32, 1, "SelectAddrOffsetNone", []> {
let PrintMethod = "printAddrMode7Operand";
- let MIOperandInfo = (ops GPR);
- let ParserMatchClass = MemMode7AsmOperand;
+ let DecoderMethod = "DecodeAddrMode7Operand";
+ let ParserMatchClass = MemNoOffsetAsmOperand;
+ let MIOperandInfo = (ops GPR:$base);
}
def nohash_imm : Operand<i32> {
@@ -687,25 +826,30 @@ def nohash_imm : Operand<i32> {
def CoprocNumAsmOperand : AsmOperandClass {
let Name = "CoprocNum";
- let SuperClasses = [];
- let ParserMethod = "tryParseCoprocNumOperand";
-}
-
-def CoprocRegAsmOperand : AsmOperandClass {
- let Name = "CoprocReg";
- let SuperClasses = [];
- let ParserMethod = "tryParseCoprocRegOperand";
+ let ParserMethod = "parseCoprocNumOperand";
}
-
def p_imm : Operand<i32> {
let PrintMethod = "printPImmediate";
let ParserMatchClass = CoprocNumAsmOperand;
+ let DecoderMethod = "DecodeCoprocessor";
}
+def CoprocRegAsmOperand : AsmOperandClass {
+ let Name = "CoprocReg";
+ let ParserMethod = "parseCoprocRegOperand";
+}
def c_imm : Operand<i32> {
let PrintMethod = "printCImmediate";
let ParserMatchClass = CoprocRegAsmOperand;
}
+def CoprocOptionAsmOperand : AsmOperandClass {
+ let Name = "CoprocOption";
+ let ParserMethod = "parseCoprocOptionOperand";
+}
+def coproc_option_imm : Operand<i32> {
+ let PrintMethod = "printCoprocOptionImm";
+ let ParserMatchClass = CoprocOptionAsmOperand;
+}
//===----------------------------------------------------------------------===//
@@ -748,16 +892,37 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
let Inst{11-4} = 0b00000000;
let Inst{3-0} = Rm;
}
- def rs : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm,
+
+ def rsi : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_imm:$shift))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+ }
+
+ def rsr : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
iis, opc, "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]> {
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_reg:$shift))]> {
bits<4> Rd;
bits<4> Rn;
bits<12> shift;
let Inst{25} = 0;
let Inst{19-16} = Rn;
let Inst{15-12} = Rd;
- let Inst{11-0} = shift;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
}
// Assembly aliases for optional destination operand when it's the same
@@ -773,56 +938,172 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
cc_out:$s)>,
Requires<[IsARM]>;
def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPR:$Rdn, GPR:$Rdn,
- so_reg:$shift, pred:$p,
+ (!cast<Instruction>(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn,
+ so_reg_imm:$shift, pred:$p,
cc_out:$s)>,
Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn,
+ so_reg_reg:$shift, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+
}
-/// AI1_bin_s_irs - Similar to AsI1_bin_irs except it sets the 's' bit so the
-/// instruction modifies the CPSR register.
-let isCodeGenOnly = 1, Defs = [CPSR] in {
-multiclass AI1_bin_s_irs<bits<4> opcod, string opc,
+/// AsI1_rbin_irs - Same as AsI1_bin_irs except the order of operands are
+/// reversed. The 'rr' form is only defined for the disassembler; for codegen
+/// it is equivalent to the AsI1_bin_irs counterpart.
+multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, bit Commutable = 0> {
- def ri : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ PatFrag opnode, string baseOpc, bit Commutable = 0> {
+ // The register-immediate version is re-materializable. This is useful
+ // in particular for taking the address of a local.
+ let isReMaterializable = 1 in {
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
iii, opc, "\t$Rd, $Rn, $imm",
- [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]> {
+ [(set GPR:$Rd, (opnode so_imm:$imm, GPR:$Rn))]> {
bits<4> Rd;
bits<4> Rn;
bits<12> imm;
let Inst{25} = 1;
- let Inst{20} = 1;
let Inst{19-16} = Rn;
let Inst{15-12} = Rd;
let Inst{11-0} = imm;
}
- def rr : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+ }
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
iir, opc, "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
+ [/* pattern left blank */]> {
bits<4> Rd;
bits<4> Rn;
bits<4> Rm;
- let isCommutable = Commutable;
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ }
+
+ def rsi : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (opnode so_reg_imm:$shift, GPR:$Rn))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
let Inst{25} = 0;
- let Inst{20} = 1;
let Inst{19-16} = Rn;
let Inst{15-12} = Rd;
- let Inst{11-4} = 0b00000000;
- let Inst{3-0} = Rm;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
}
- def rs : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm,
+
+ def rsr : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
iis, opc, "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]> {
+ [(set GPR:$Rd, (opnode so_reg_reg:$shift, GPR:$Rn))]> {
bits<4> Rd;
bits<4> Rn;
bits<12> shift;
let Inst{25} = 0;
- let Inst{20} = 1;
let Inst{19-16} = Rn;
let Inst{15-12} = Rd;
- let Inst{11-0} = shift;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
}
+
+ // Assembly aliases for optional destination operand when it's the same
+ // as the source operand.
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn,
+ so_imm:$imm, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn,
+ GPR:$Rm, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn,
+ so_reg_imm:$shift, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn,
+ so_reg_reg:$shift, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+
+}
+
+/// AsI1_rbin_s_is - Same as AsI1_rbin_s_is except it sets 's' bit by default.
+///
+/// These opcodes will be converted to the real non-S opcodes by
+/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
+let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in {
+multiclass AsI1_rbin_s_is<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> {
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ iii, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>;
+
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+ iir, opc, "\t$Rd, $Rn, $Rm",
+ [/* pattern left blank */]>;
+
+ def rsi : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift, GPR:$Rn))]>;
+
+ def rsr : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift, GPR:$Rn))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
+ }
+}
+}
+
+/// AsI1_bin_s_irs - Same as AsI1_bin_irs except it sets the 's' bit by default.
+///
+/// These opcodes will be converted to the real non-S opcodes by
+/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
+let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in {
+multiclass AsI1_bin_s_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> {
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ iii, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>;
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+ iir, opc, "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]>;
+ def rsi : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_imm:$shift))]>;
+
+ def rsr : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_reg:$shift))]>;
}
}
@@ -857,128 +1138,190 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
let Inst{11-4} = 0b00000000;
let Inst{3-0} = Rm;
}
- def rs : AI1<opcod, (outs), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm, iis,
+ def rsi : AI1<opcod, (outs),
+ (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, iis,
+ opc, "\t$Rn, $shift",
+ [(opnode GPR:$Rn, so_reg_imm:$shift)]> {
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+ }
+ def rsr : AI1<opcod, (outs),
+ (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis,
opc, "\t$Rn, $shift",
- [(opnode GPR:$Rn, so_reg:$shift)]> {
+ [(opnode GPR:$Rn, so_reg_reg:$shift)]> {
bits<4> Rn;
bits<12> shift;
let Inst{25} = 0;
let Inst{20} = 1;
let Inst{19-16} = Rn;
let Inst{15-12} = 0b0000;
- let Inst{11-0} = shift;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
}
+
}
}
/// AI_ext_rrot - A unary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
/// FIXME: Remove the 'r' variant. Its rot_imm is zero.
-multiclass AI_ext_rrot<bits<8> opcod, string opc, PatFrag opnode> {
- def r : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm),
- IIC_iEXTr, opc, "\t$Rd, $Rm",
- [(set GPR:$Rd, (opnode GPR:$Rm))]>,
- Requires<[IsARM, HasV6]> {
- bits<4> Rd;
- bits<4> Rm;
- let Inst{19-16} = 0b1111;
- let Inst{15-12} = Rd;
- let Inst{11-10} = 0b00;
- let Inst{3-0} = Rm;
- }
- def r_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm, rot_imm:$rot),
- IIC_iEXTr, opc, "\t$Rd, $Rm, ror $rot",
- [(set GPR:$Rd, (opnode (rotr GPR:$Rm, rot_imm:$rot)))]>,
- Requires<[IsARM, HasV6]> {
- bits<4> Rd;
- bits<4> Rm;
- bits<2> rot;
- let Inst{19-16} = 0b1111;
- let Inst{15-12} = Rd;
- let Inst{11-10} = rot;
- let Inst{3-0} = Rm;
- }
+class AI_ext_rrot<bits<8> opcod, string opc, PatFrag opnode>
+ : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPRnopc:$Rm, rot_imm:$rot),
+ IIC_iEXTr, opc, "\t$Rd, $Rm$rot",
+ [(set GPRnopc:$Rd, (opnode (rotr GPRnopc:$Rm, rot_imm:$rot)))]>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<2> rot;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rd;
+ let Inst{11-10} = rot;
+ let Inst{3-0} = Rm;
}
-multiclass AI_ext_rrot_np<bits<8> opcod, string opc> {
- def r : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm),
- IIC_iEXTr, opc, "\t$Rd, $Rm",
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV6]> {
- let Inst{19-16} = 0b1111;
- let Inst{11-10} = 0b00;
- }
- def r_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm, rot_imm:$rot),
- IIC_iEXTr, opc, "\t$Rd, $Rm, ror $rot",
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV6]> {
- bits<2> rot;
- let Inst{19-16} = 0b1111;
- let Inst{11-10} = rot;
- }
+class AI_ext_rrot_np<bits<8> opcod, string opc>
+ : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPRnopc:$Rm, rot_imm:$rot),
+ IIC_iEXTr, opc, "\t$Rd, $Rm$rot", []>,
+ Requires<[IsARM, HasV6]> {
+ bits<2> rot;
+ let Inst{19-16} = 0b1111;
+ let Inst{11-10} = rot;
}
/// AI_exta_rrot - A binary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
-multiclass AI_exta_rrot<bits<8> opcod, string opc, PatFrag opnode> {
- def rr : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
- IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
- Requires<[IsARM, HasV6]> {
+class AI_exta_rrot<bits<8> opcod, string opc, PatFrag opnode>
+ : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPR:$Rn, GPRnopc:$Rm, rot_imm:$rot),
+ IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm$rot",
+ [(set GPRnopc:$Rd, (opnode GPR:$Rn,
+ (rotr GPRnopc:$Rm, rot_imm:$rot)))]>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<4> Rn;
+ bits<2> rot;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-10} = rot;
+ let Inst{9-4} = 0b000111;
+ let Inst{3-0} = Rm;
+}
+
+class AI_exta_rrot_np<bits<8> opcod, string opc>
+ : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPR:$Rn, GPRnopc:$Rm, rot_imm:$rot),
+ IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm$rot", []>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rn;
+ bits<2> rot;
+ let Inst{19-16} = Rn;
+ let Inst{11-10} = rot;
+}
+
+/// AI1_adde_sube_irs - Define instructions and patterns for adde and sube.
+multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
+ string baseOpc, bit Commutable = 0> {
+ let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in {
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm, CPSR))]>,
+ Requires<[IsARM]> {
bits<4> Rd;
- bits<4> Rm;
bits<4> Rn;
- let Inst{19-16} = Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
let Inst{15-12} = Rd;
- let Inst{11-10} = 0b00;
- let Inst{9-4} = 0b000111;
- let Inst{3-0} = Rm;
- }
- def rr_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
- rot_imm:$rot),
- IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm, ror $rot",
- [(set GPR:$Rd, (opnode GPR:$Rn,
- (rotr GPR:$Rm, rot_imm:$rot)))]>,
- Requires<[IsARM, HasV6]> {
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
+ }
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm, CPSR))]>,
+ Requires<[IsARM]> {
bits<4> Rd;
+ bits<4> Rn;
bits<4> Rm;
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let isCommutable = Commutable;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ }
+ def rsi : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift),
+ DPSoRegImmFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_imm:$shift, CPSR))]>,
+ Requires<[IsARM]> {
+ bits<4> Rd;
bits<4> Rn;
- bits<2> rot;
+ bits<12> shift;
+ let Inst{25} = 0;
let Inst{19-16} = Rn;
let Inst{15-12} = Rd;
- let Inst{11-10} = rot;
- let Inst{9-4} = 0b000111;
- let Inst{3-0} = Rm;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
}
-}
-
-// For disassembly only.
-multiclass AI_exta_rrot_np<bits<8> opcod, string opc> {
- def rr : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
- IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm",
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV6]> {
- let Inst{11-10} = 0b00;
- }
- def rr_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
- rot_imm:$rot),
- IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm, ror $rot",
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV6]> {
+ def rsr : AsI1<opcod, (outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_reg:$shift),
+ DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_reg:$shift, CPSR))]>,
+ Requires<[IsARM]> {
+ bits<4> Rd;
bits<4> Rn;
- bits<2> rot;
+ bits<12> shift;
+ let Inst{25} = 0;
let Inst{19-16} = Rn;
- let Inst{11-10} = rot;
+ let Inst{15-12} = Rd;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
+ }
}
+
+ // Assembly aliases for optional destination operand when it's the same
+ // as the source operand.
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn,
+ so_imm:$imm, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn,
+ GPR:$Rm, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn,
+ so_reg_imm:$shift, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn,
+ so_reg_reg:$shift, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
}
-/// AI1_adde_sube_irs - Define instructions and patterns for adde and sube.
-multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
- string baseOpc, bit Commutable = 0> {
- let Uses = [CPSR] in {
+/// AI1_rsc_irs - Define instructions and patterns for rsc
+multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode,
+ string baseOpc> {
+ let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in {
def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
- [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>,
+ [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn, CPSR))]>,
Requires<[IsARM]> {
bits<4> Rd;
bits<4> Rn;
@@ -990,31 +1333,48 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
}
def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
- Requires<[IsARM]> {
+ [/* pattern left blank */]> {
bits<4> Rd;
bits<4> Rn;
bits<4> Rm;
let Inst{11-4} = 0b00000000;
let Inst{25} = 0;
- let isCommutable = Commutable;
let Inst{3-0} = Rm;
let Inst{15-12} = Rd;
let Inst{19-16} = Rn;
}
- def rs : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
- DPSoRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>,
+ def rsi : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_imm:$shift),
+ DPSoRegImmFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift, GPR:$Rn, CPSR))]>,
Requires<[IsARM]> {
bits<4> Rd;
bits<4> Rn;
bits<12> shift;
let Inst{25} = 0;
- let Inst{11-0} = shift;
+ let Inst{19-16} = Rn;
let Inst{15-12} = Rd;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+ }
+ def rsr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_reg:$shift),
+ DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift, GPR:$Rn, CPSR))]>,
+ Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
}
}
+
// Assembly aliases for optional destination operand when it's the same
// as the source operand.
def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
@@ -1028,28 +1388,15 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
cc_out:$s)>,
Requires<[IsARM]>;
def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPR:$Rdn, GPR:$Rdn,
- so_reg:$shift, pred:$p,
+ (!cast<Instruction>(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn,
+ so_reg_imm:$shift, pred:$p,
+ cc_out:$s)>,
+ Requires<[IsARM]>;
+ def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn,
+ so_reg_reg:$shift, pred:$p,
cc_out:$s)>,
Requires<[IsARM]>;
-}
-
-// Carry setting variants
-// NOTE: CPSR def omitted because it will be handled by the custom inserter.
-let usesCustomInserter = 1 in {
-multiclass AI1_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> {
- def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
- 4, IIC_iALUi,
- [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>;
- def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
- 4, IIC_iALUr,
- [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
- let isCommutable = Commutable;
- }
- def rs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
- 4, IIC_iALUsr,
- [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>;
-}
}
let canFoldAsLoad = 1, isReMaterializable = 1 in {
@@ -1082,6 +1429,37 @@ multiclass AI_ldr1<bit isByte, string opc, InstrItinClass iii,
}
}
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+multiclass AI_ldr1nopc<bit isByte, string opc, InstrItinClass iii,
+ InstrItinClass iir, PatFrag opnode> {
+ // Note: We use the complex addrmode_imm12 rather than just an input
+ // GPR and a constrained immediate so that we can use this to match
+ // frame index references and avoid matching constant pool references.
+ def i12: AI2ldst<0b010, 1, isByte, (outs GPRnopc:$Rt), (ins addrmode_imm12:$addr),
+ AddrMode_i12, LdFrm, iii, opc, "\t$Rt, $addr",
+ [(set GPRnopc:$Rt, (opnode addrmode_imm12:$addr))]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+ def rs : AI2ldst<0b011, 1, isByte, (outs GPRnopc:$Rt), (ins ldst_so_reg:$shift),
+ AddrModeNone, LdFrm, iir, opc, "\t$Rt, $shift",
+ [(set GPRnopc:$Rt, (opnode ldst_so_reg:$shift))]> {
+ bits<4> Rt;
+ bits<17> shift;
+ let shift{4} = 0; // Inst{4} = 0
+ let Inst{23} = shift{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = shift{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = shift{11-0};
+ }
+}
+}
+
+
multiclass AI_str1<bit isByte, string opc, InstrItinClass iii,
InstrItinClass iir, PatFrag opnode> {
// Note: We use the complex addrmode_imm12 rather than just an input
@@ -1110,6 +1488,37 @@ multiclass AI_str1<bit isByte, string opc, InstrItinClass iii,
let Inst{11-0} = shift{11-0};
}
}
+
+multiclass AI_str1nopc<bit isByte, string opc, InstrItinClass iii,
+ InstrItinClass iir, PatFrag opnode> {
+ // Note: We use the complex addrmode_imm12 rather than just an input
+ // GPR and a constrained immediate so that we can use this to match
+ // frame index references and avoid matching constant pool references.
+ def i12 : AI2ldst<0b010, 0, isByte, (outs),
+ (ins GPRnopc:$Rt, addrmode_imm12:$addr),
+ AddrMode_i12, StFrm, iii, opc, "\t$Rt, $addr",
+ [(opnode GPRnopc:$Rt, addrmode_imm12:$addr)]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+ def rs : AI2ldst<0b011, 0, isByte, (outs), (ins GPRnopc:$Rt, ldst_so_reg:$shift),
+ AddrModeNone, StFrm, iir, opc, "\t$Rt, $shift",
+ [(opnode GPRnopc:$Rt, ldst_so_reg:$shift)]> {
+ bits<4> Rt;
+ bits<17> shift;
+ let shift{4} = 0; // Inst{4} = 0
+ let Inst{23} = shift{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = shift{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = shift{11-0};
+ }
+}
+
+
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
@@ -1140,42 +1549,66 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
[(ARMcallseq_start timm:$amt)]>;
}
-def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "",
- [/* For disassembly only; pattern left blank */]>,
+// Atomic pseudo-insts which will be lowered to ldrexd/strexd loops.
+// (These psuedos use a hand-written selection code).
+let usesCustomInserter = 1, Defs = [CPSR], mayLoad = 1, mayStore = 1 in {
+def ATOMOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMXOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMADD6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMSUB6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMNAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMSWAP6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMCMPXCHG6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2,
+ GPR:$set1, GPR:$set2),
+ NoItinerary, []>;
+}
+
+def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "", []>,
Requires<[IsARM, HasV6T2]> {
let Inst{27-16} = 0b001100100000;
let Inst{15-8} = 0b11110000;
let Inst{7-0} = 0b00000000;
}
-def YIELD : AI<(outs), (ins), MiscFrm, NoItinerary, "yield", "",
- [/* For disassembly only; pattern left blank */]>,
+def YIELD : AI<(outs), (ins), MiscFrm, NoItinerary, "yield", "", []>,
Requires<[IsARM, HasV6T2]> {
let Inst{27-16} = 0b001100100000;
let Inst{15-8} = 0b11110000;
let Inst{7-0} = 0b00000001;
}
-def WFE : AI<(outs), (ins), MiscFrm, NoItinerary, "wfe", "",
- [/* For disassembly only; pattern left blank */]>,
+def WFE : AI<(outs), (ins), MiscFrm, NoItinerary, "wfe", "", []>,
Requires<[IsARM, HasV6T2]> {
let Inst{27-16} = 0b001100100000;
let Inst{15-8} = 0b11110000;
let Inst{7-0} = 0b00000010;
}
-def WFI : AI<(outs), (ins), MiscFrm, NoItinerary, "wfi", "",
- [/* For disassembly only; pattern left blank */]>,
+def WFI : AI<(outs), (ins), MiscFrm, NoItinerary, "wfi", "", []>,
Requires<[IsARM, HasV6T2]> {
let Inst{27-16} = 0b001100100000;
let Inst{15-8} = 0b11110000;
let Inst{7-0} = 0b00000011;
}
-def SEL : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, NoItinerary, "sel",
- "\t$dst, $a, $b",
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV6]> {
+def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel",
+ "\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> {
bits<4> Rd;
bits<4> Rn;
bits<4> Rm;
@@ -1188,8 +1621,7 @@ def SEL : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, NoItinerary, "sel",
}
def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "",
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV6T2]> {
+ []>, Requires<[IsARM, HasV6T2]> {
let Inst{27-16} = 0b001100100000;
let Inst{15-8} = 0b11110000;
let Inst{7-0} = 0b00000100;
@@ -1206,14 +1638,11 @@ def BKPT : AI<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary,
let Inst{7-4} = 0b0111;
}
-// Change Processor State is a system instruction -- for disassembly and
-// parsing only.
-// FIXME: Since the asm parser has currently no clean way to handle optional
-// operands, create 3 versions of the same instruction. Once there's a clean
-// framework to represent optional operands, change this behavior.
+// Change Processor State
+// FIXME: We should use InstAlias to handle the optional operands.
class CPS<dag iops, string asm_ops>
: AXI<(outs), iops, MiscFrm, NoItinerary, !strconcat("cps", asm_ops),
- [/* For disassembly only; pattern left blank */]>, Requires<[IsARM]> {
+ []>, Requires<[IsARM]> {
bits<2> imod;
bits<3> iflags;
bits<5> mode;
@@ -1229,17 +1658,18 @@ class CPS<dag iops, string asm_ops>
let Inst{4-0} = mode;
}
+let DecoderMethod = "DecodeCPSInstruction" in {
let M = 1 in
- def CPS3p : CPS<(ins imod_op:$imod, iflags_op:$iflags, i32imm:$mode),
+ def CPS3p : CPS<(ins imod_op:$imod, iflags_op:$iflags, imm0_31:$mode),
"$imod\t$iflags, $mode">;
let mode = 0, M = 0 in
def CPS2p : CPS<(ins imod_op:$imod, iflags_op:$iflags), "$imod\t$iflags">;
let imod = 0, iflags = 0, M = 1 in
- def CPS1p : CPS<(ins i32imm:$mode), "\t$mode">;
+ def CPS1p : CPS<(ins imm0_31:$mode), "\t$mode">;
+}
// Preload signals the memory system of possible future data/instruction access.
-// These are for disassembly only.
multiclass APreLoad<bits<1> read, bits<1> data, string opc> {
def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, IIC_Preload,
@@ -1271,6 +1701,7 @@ multiclass APreLoad<bits<1> read, bits<1> data, string opc> {
let Inst{19-16} = shift{16-13}; // Rn
let Inst{15-12} = 0b1111;
let Inst{11-0} = shift{11-0};
+ let Inst{4} = 0;
}
}
@@ -1278,10 +1709,8 @@ defm PLD : APreLoad<1, 1, "pld">, Requires<[IsARM]>;
defm PLDW : APreLoad<0, 1, "pldw">, Requires<[IsARM,HasV7,HasMP]>;
defm PLI : APreLoad<1, 0, "pli">, Requires<[IsARM,HasV7]>;
-def SETEND : AXI<(outs),(ins setend_op:$end), MiscFrm, NoItinerary,
- "setend\t$end",
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM]> {
+def SETEND : AXI<(outs), (ins setend_op:$end), MiscFrm, NoItinerary,
+ "setend\t$end", []>, Requires<[IsARM]> {
bits<1> end;
let Inst{31-10} = 0b1111000100000001000000;
let Inst{9} = end;
@@ -1351,14 +1780,17 @@ let neverHasSideEffects = 1, isReMaterializable = 1 in
// the instruction. The {24-21} opcode bits are set by the fixup, as we don't
// know until then which form of the instruction will be used.
def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label),
- MiscFrm, IIC_iALUi, "adr", "\t$Rd, #$label", []> {
+ MiscFrm, IIC_iALUi, "adr", "\t$Rd, $label", []> {
bits<4> Rd;
- bits<12> label;
+ bits<14> label;
let Inst{27-25} = 0b001;
+ let Inst{24} = 0;
+ let Inst{23-22} = label{13-12};
+ let Inst{21} = 0;
let Inst{20} = 0;
let Inst{19-16} = 0b1111;
let Inst{15-12} = Rd;
- let Inst{11-0} = label;
+ let Inst{11-0} = label{11-0};
}
def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p),
4, IIC_iALUi, []>;
@@ -1424,6 +1856,7 @@ let isCall = 1,
let Inst{31-28} = 0b1110;
bits<24> func;
let Inst{23-0} = func;
+ let DecoderMethod = "DecodeBranchImmInstruction";
}
def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func, variable_ops),
@@ -1432,6 +1865,7 @@ let isCall = 1,
Requires<[IsARM, IsNotDarwin]> {
bits<24> func;
let Inst{23-0} = func;
+ let DecoderMethod = "DecodeBranchImmInstruction";
}
// ARMv5T and above
@@ -1516,6 +1950,7 @@ let isBranch = 1, isTerminator = 1 in {
[/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]> {
bits<24> target;
let Inst{23-0} = target;
+ let DecoderMethod = "DecodeBranchImmInstruction";
}
let isBarrier = 1 in {
@@ -1549,9 +1984,9 @@ let isBranch = 1, isTerminator = 1 in {
}
-// BLX (immediate) -- for disassembly only
-def BLXi : AXI<(outs), (ins br_target:$target), BrMiscFrm, NoItinerary,
- "blx\t$target", [/* pattern left blank */]>,
+// BLX (immediate)
+def BLXi : AXI<(outs), (ins blx_target:$target), BrMiscFrm, NoItinerary,
+ "blx\t$target", []>,
Requires<[IsARM, HasV5T]> {
let Inst{31-25} = 0b1111101;
bits<25> target;
@@ -1614,64 +2049,100 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
}
}
-
-
-
-
-// Secure Monitor Call is a system instruction -- for disassembly only
-def SMC : ABI<0b0001, (outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt",
- [/* For disassembly only; pattern left blank */]> {
+// Secure Monitor Call is a system instruction.
+def SMC : ABI<0b0001, (outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
+ []> {
bits<4> opt;
let Inst{23-4} = 0b01100000000000000111;
let Inst{3-0} = opt;
}
-// Supervisor Call (Software Interrupt) -- for disassembly only
+// Supervisor Call (Software Interrupt)
let isCall = 1, Uses = [SP] in {
-def SVC : ABI<0b1111, (outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc",
- [/* For disassembly only; pattern left blank */]> {
+def SVC : ABI<0b1111, (outs), (ins imm24b:$svc), IIC_Br, "svc", "\t$svc", []> {
bits<24> svc;
let Inst{23-0} = svc;
}
}
-// Store Return State is a system instruction -- for disassembly only
-let isCodeGenOnly = 1 in { // FIXME: This should not use submode!
-def SRSW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode),
- NoItinerary, "srs${amode}\tsp!, $mode",
- [/* For disassembly only; pattern left blank */]> {
+// Store Return State
+class SRSI<bit wb, string asm>
+ : XI<(outs), (ins imm0_31:$mode), AddrModeNone, 4, IndexModeNone, BrFrm,
+ NoItinerary, asm, "", []> {
+ bits<5> mode;
let Inst{31-28} = 0b1111;
- let Inst{22-20} = 0b110; // W = 1
- let Inst{19-8} = 0xd05;
- let Inst{7-5} = 0b000;
+ let Inst{27-25} = 0b100;
+ let Inst{22} = 1;
+ let Inst{21} = wb;
+ let Inst{20} = 0;
+ let Inst{19-16} = 0b1101; // SP
+ let Inst{15-5} = 0b00000101000;
+ let Inst{4-0} = mode;
}
-def SRS : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode),
- NoItinerary, "srs${amode}\tsp, $mode",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{31-28} = 0b1111;
- let Inst{22-20} = 0b100; // W = 0
- let Inst{19-8} = 0xd05;
- let Inst{7-5} = 0b000;
+def SRSDA : SRSI<0, "srsda\tsp, $mode"> {
+ let Inst{24-23} = 0;
+}
+def SRSDA_UPD : SRSI<1, "srsda\tsp!, $mode"> {
+ let Inst{24-23} = 0;
+}
+def SRSDB : SRSI<0, "srsdb\tsp, $mode"> {
+ let Inst{24-23} = 0b10;
+}
+def SRSDB_UPD : SRSI<1, "srsdb\tsp!, $mode"> {
+ let Inst{24-23} = 0b10;
+}
+def SRSIA : SRSI<0, "srsia\tsp, $mode"> {
+ let Inst{24-23} = 0b01;
+}
+def SRSIA_UPD : SRSI<1, "srsia\tsp!, $mode"> {
+ let Inst{24-23} = 0b01;
+}
+def SRSIB : SRSI<0, "srsib\tsp, $mode"> {
+ let Inst{24-23} = 0b11;
+}
+def SRSIB_UPD : SRSI<1, "srsib\tsp!, $mode"> {
+ let Inst{24-23} = 0b11;
}
-// Return From Exception is a system instruction -- for disassembly only
-def RFEW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base),
- NoItinerary, "rfe${amode}\t$base!",
- [/* For disassembly only; pattern left blank */]> {
+// Return From Exception
+class RFEI<bit wb, string asm>
+ : XI<(outs), (ins GPR:$Rn), AddrModeNone, 4, IndexModeNone, BrFrm,
+ NoItinerary, asm, "", []> {
+ bits<4> Rn;
let Inst{31-28} = 0b1111;
- let Inst{22-20} = 0b011; // W = 1
- let Inst{15-0} = 0x0a00;
+ let Inst{27-25} = 0b100;
+ let Inst{22} = 0;
+ let Inst{21} = wb;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = 0xa00;
}
-def RFE : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base),
- NoItinerary, "rfe${amode}\t$base",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{31-28} = 0b1111;
- let Inst{22-20} = 0b001; // W = 0
- let Inst{15-0} = 0x0a00;
+def RFEDA : RFEI<0, "rfeda\t$Rn"> {
+ let Inst{24-23} = 0;
+}
+def RFEDA_UPD : RFEI<1, "rfeda\t$Rn!"> {
+ let Inst{24-23} = 0;
+}
+def RFEDB : RFEI<0, "rfedb\t$Rn"> {
+ let Inst{24-23} = 0b10;
+}
+def RFEDB_UPD : RFEI<1, "rfedb\t$Rn!"> {
+ let Inst{24-23} = 0b10;
+}
+def RFEIA : RFEI<0, "rfeia\t$Rn"> {
+ let Inst{24-23} = 0b01;
+}
+def RFEIA_UPD : RFEI<1, "rfeia\t$Rn!"> {
+ let Inst{24-23} = 0b01;
+}
+def RFEIB : RFEI<0, "rfeib\t$Rn"> {
+ let Inst{24-23} = 0b11;
+}
+def RFEIB_UPD : RFEI<1, "rfeib\t$Rn!"> {
+ let Inst{24-23} = 0b11;
}
-} // isCodeGenOnly = 1
//===----------------------------------------------------------------------===//
// Load / store Instructions.
@@ -1682,16 +2153,16 @@ def RFE : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base),
defm LDR : AI_ldr1<0, "ldr", IIC_iLoad_r, IIC_iLoad_si,
UnOpFrag<(load node:$Src)>>;
-defm LDRB : AI_ldr1<1, "ldrb", IIC_iLoad_bh_r, IIC_iLoad_bh_si,
+defm LDRB : AI_ldr1nopc<1, "ldrb", IIC_iLoad_bh_r, IIC_iLoad_bh_si,
UnOpFrag<(zextloadi8 node:$Src)>>;
defm STR : AI_str1<0, "str", IIC_iStore_r, IIC_iStore_si,
BinOpFrag<(store node:$LHS, node:$RHS)>>;
-defm STRB : AI_str1<1, "strb", IIC_iStore_bh_r, IIC_iStore_bh_si,
+defm STRB : AI_str1nopc<1, "strb", IIC_iStore_bh_r, IIC_iStore_bh_si,
BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
// Special LDR for loads from non-pc-relative constpools.
let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1,
- isReMaterializable = 1 in
+ isReMaterializable = 1, isCodeGenOnly = 1 in
def LDRcp : AI2ldst<0b010, 1, 0, (outs GPR:$Rt), (ins addrmode_imm12:$addr),
AddrMode_i12, LdFrm, IIC_iLoad_r, "ldr", "\t$Rt, $addr",
[]> {
@@ -1727,34 +2198,65 @@ def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2),
// Indexed loads
multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> {
- def _PRE : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
- (ins addrmode2:$addr), IndexModePre, LdFrm, itin,
+ def _PRE_IMM : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addrmode_imm12:$addr), IndexModePre, LdFrm, itin,
opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
- // {17-14} Rn
- // {13} 1 == Rm, 0 == imm12
- // {12} isAdd
- // {11-0} imm12/Rm
- bits<18> addr;
- let Inst{25} = addr{13};
+ bits<17> addr;
+ let Inst{25} = 0;
+ let Inst{23} = addr{12};
+ let Inst{19-16} = addr{16-13};
+ let Inst{11-0} = addr{11-0};
+ let DecoderMethod = "DecodeLDRPreImm";
+ let AsmMatchConverter = "cvtLdWriteBackRegAddrModeImm12";
+ }
+
+ def _PRE_REG : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins ldst_so_reg:$addr), IndexModePre, LdFrm, itin,
+ opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ bits<17> addr;
+ let Inst{25} = 1;
let Inst{23} = addr{12};
- let Inst{19-16} = addr{17-14};
+ let Inst{19-16} = addr{16-13};
let Inst{11-0} = addr{11-0};
- let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2";
+ let Inst{4} = 0;
+ let DecoderMethod = "DecodeLDRPreReg";
+ let AsmMatchConverter = "cvtLdWriteBackRegAddrMode2";
}
- def _POST : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb),
- (ins GPR:$Rn, am2offset:$offset),
+
+ def _POST_REG : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, LdFrm, itin,
+ opc, "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+ }
+
+ def _POST_IMM : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_imm:$offset),
IndexModePost, LdFrm, itin,
- opc, "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", []> {
- // {13} 1 == Rm, 0 == imm12
+ opc, "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
// {12} isAdd
// {11-0} imm12/Rm
bits<14> offset;
- bits<4> Rn;
- let Inst{25} = offset{13};
+ bits<4> addr;
+ let Inst{25} = 0;
let Inst{23} = offset{12};
- let Inst{19-16} = Rn;
+ let Inst{19-16} = addr;
let Inst{11-0} = offset{11-0};
+
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
}
+
}
let mayLoad = 1, neverHasSideEffects = 1 in {
@@ -1762,8 +2264,8 @@ defm LDR : AI2_ldridx<0, "ldr", IIC_iLoad_ru>;
defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_ru>;
}
-multiclass AI3_ldridx<bits<4> op, bit op20, string opc, InstrItinClass itin> {
- def _PRE : AI3ldstidx<op, op20, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+multiclass AI3_ldridx<bits<4> op, string opc, InstrItinClass itin> {
+ def _PRE : AI3ldstidx<op, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addrmode3:$addr), IndexModePre,
LdMiscFrm, itin,
opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
@@ -1773,27 +2275,31 @@ multiclass AI3_ldridx<bits<4> op, bit op20, string opc, InstrItinClass itin> {
let Inst{19-16} = addr{12-9}; // Rn
let Inst{11-8} = addr{7-4}; // imm7_4/zero
let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+ let AsmMatchConverter = "cvtLdWriteBackRegAddrMode3";
+ let DecoderMethod = "DecodeAddrMode3Instruction";
}
- def _POST : AI3ldstidx<op, op20, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
- (ins GPR:$Rn, am3offset:$offset), IndexModePost,
- LdMiscFrm, itin,
- opc, "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", []> {
+ def _POST : AI3ldstidx<op, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am3offset:$offset),
+ IndexModePost, LdMiscFrm, itin,
+ opc, "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb",
+ []> {
bits<10> offset;
- bits<4> Rn;
+ bits<4> addr;
let Inst{23} = offset{8}; // U bit
let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm
- let Inst{19-16} = Rn;
+ let Inst{19-16} = addr;
let Inst{11-8} = offset{7-4}; // imm7_4/zero
let Inst{3-0} = offset{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
}
}
let mayLoad = 1, neverHasSideEffects = 1 in {
-defm LDRH : AI3_ldridx<0b1011, 1, "ldrh", IIC_iLoad_bh_ru>;
-defm LDRSH : AI3_ldridx<0b1111, 1, "ldrsh", IIC_iLoad_bh_ru>;
-defm LDRSB : AI3_ldridx<0b1101, 1, "ldrsb", IIC_iLoad_bh_ru>;
+defm LDRH : AI3_ldridx<0b1011, "ldrh", IIC_iLoad_bh_ru>;
+defm LDRSH : AI3_ldridx<0b1111, "ldrsh", IIC_iLoad_bh_ru>;
+defm LDRSB : AI3_ldridx<0b1101, "ldrsb", IIC_iLoad_bh_ru>;
let hasExtraDefRegAllocReq = 1 in {
-def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
+def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
(ins addrmode3:$addr), IndexModePre,
LdMiscFrm, IIC_iLoad_d_ru,
"ldrd", "\t$Rt, $Rt2, $addr!",
@@ -1804,70 +2310,128 @@ def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
let Inst{19-16} = addr{12-9}; // Rn
let Inst{11-8} = addr{7-4}; // imm7_4/zero
let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+ let AsmMatchConverter = "cvtLdrdPre";
}
-def LDRD_POST: AI3ldstidx<0b1101, 0, 1, 0, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
- (ins GPR:$Rn, am3offset:$offset), IndexModePost,
- LdMiscFrm, IIC_iLoad_d_ru,
- "ldrd", "\t$Rt, $Rt2, [$Rn], $offset",
- "$Rn = $Rn_wb", []> {
+def LDRD_POST: AI3ldstidx<0b1101, 0, 0, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am3offset:$offset),
+ IndexModePost, LdMiscFrm, IIC_iLoad_d_ru,
+ "ldrd", "\t$Rt, $Rt2, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
bits<10> offset;
- bits<4> Rn;
+ bits<4> addr;
let Inst{23} = offset{8}; // U bit
let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm
- let Inst{19-16} = Rn;
+ let Inst{19-16} = addr;
let Inst{11-8} = offset{7-4}; // imm7_4/zero
let Inst{3-0} = offset{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
}
} // hasExtraDefRegAllocReq = 1
} // mayLoad = 1, neverHasSideEffects = 1
-// LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT are for disassembly only.
+// LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT.
let mayLoad = 1, neverHasSideEffects = 1 in {
-def LDRT : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$base_wb),
- (ins addrmode2:$addr), IndexModePost, LdFrm, IIC_iLoad_ru,
- "ldrt", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
- // {17-14} Rn
- // {13} 1 == Rm, 0 == imm12
+def LDRT_POST_REG : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, LdFrm, IIC_iLoad_ru,
+ "ldrt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
// {12} isAdd
// {11-0} imm12/Rm
- bits<18> addr;
- let Inst{25} = addr{13};
- let Inst{23} = addr{12};
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
let Inst{21} = 1; // overwrite
- let Inst{19-16} = addr{17-14};
- let Inst{11-0} = addr{11-0};
- let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2";
-}
-def LDRBT : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$base_wb),
- (ins addrmode2:$addr), IndexModePost, LdFrm, IIC_iLoad_bh_ru,
- "ldrbt", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
- // {17-14} Rn
- // {13} 1 == Rm, 0 == imm12
+ let Inst{19-16} = addr;
+ let Inst{11-5} = offset{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = offset{3-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+def LDRT_POST_IMM : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_imm:$offset),
+ IndexModePost, LdFrm, IIC_iLoad_ru,
+ "ldrt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
// {12} isAdd
// {11-0} imm12/Rm
- bits<18> addr;
- let Inst{25} = addr{13};
- let Inst{23} = addr{12};
- let Inst{21} = 1; // overwrite
- let Inst{19-16} = addr{17-14};
- let Inst{11-0} = addr{11-0};
- let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2";
-}
-def LDRSBT : AI3ldstidxT<0b1101, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb),
- (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru,
- "ldrsbt", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
}
-def LDRHT : AI3ldstidxT<0b1011, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb),
- (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru,
- "ldrht", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
+
+def LDRBT_POST_REG : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, LdFrm, IIC_iLoad_bh_ru,
+ "ldrbt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
let Inst{21} = 1; // overwrite
-}
-def LDRSHT : AI3ldstidxT<0b1111, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb),
- (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru,
- "ldrsht", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
+ let Inst{19-16} = addr;
+ let Inst{11-5} = offset{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = offset{3-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+def LDRBT_POST_IMM : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$addr, am2offset_imm:$offset),
+ IndexModePost, LdFrm, IIC_iLoad_bh_ru,
+ "ldrbt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+multiclass AI3ldrT<bits<4> op, string opc> {
+ def i : AI3ldstidxT<op, 1, (outs GPR:$Rt, GPR:$base_wb),
+ (ins addr_offset_none:$addr, postidx_imm8:$offset),
+ IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, opc,
+ "\t$Rt, $addr, $offset", "$addr.base = $base_wb", []> {
+ bits<9> offset;
+ let Inst{23} = offset{8};
+ let Inst{22} = 1;
+ let Inst{11-8} = offset{7-4};
+ let Inst{3-0} = offset{3-0};
+ let AsmMatchConverter = "cvtLdExtTWriteBackImm";
+ }
+ def r : AI3ldstidxT<op, 1, (outs GPR:$Rt, GPR:$base_wb),
+ (ins addr_offset_none:$addr, postidx_reg:$Rm),
+ IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, opc,
+ "\t$Rt, $addr, $Rm", "$addr.base = $base_wb", []> {
+ bits<5> Rm;
+ let Inst{23} = Rm{4};
+ let Inst{22} = 0;
+ let Inst{11-8} = 0;
+ let Inst{3-0} = Rm{3-0};
+ let AsmMatchConverter = "cvtLdExtTWriteBackReg";
+ }
}
+
+defm LDRSBT : AI3ldrT<0b1101, "ldrsbt">;
+defm LDRHT : AI3ldrT<0b1011, "ldrht">;
+defm LDRSHT : AI3ldrT<0b1111, "ldrsht">;
}
// Store
@@ -1881,98 +2445,302 @@ def STRH : AI3str<0b1011, (outs), (ins GPR:$Rt, addrmode3:$addr), StMiscFrm,
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$src2, addrmode3:$addr),
StMiscFrm, IIC_iStore_d_r,
- "strd", "\t$Rt, $src2, $addr", []>, Requires<[IsARM, HasV5TE]>;
+ "strd", "\t$Rt, $src2, $addr", []>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{21} = 0;
+}
// Indexed stores
-def STR_PRE : AI2stridx<0, 1, (outs GPR:$Rn_wb),
- (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
- IndexModePre, StFrm, IIC_iStore_ru,
- "str", "\t$Rt, [$Rn, $offset]!",
- "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
- [(set GPR:$Rn_wb,
- (pre_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>;
-
-def STR_POST : AI2stridx<0, 0, (outs GPR:$Rn_wb),
- (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
- IndexModePost, StFrm, IIC_iStore_ru,
- "str", "\t$Rt, [$Rn], $offset",
- "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
- [(set GPR:$Rn_wb,
- (post_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>;
-
-def STRB_PRE : AI2stridx<1, 1, (outs GPR:$Rn_wb),
- (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
- IndexModePre, StFrm, IIC_iStore_bh_ru,
- "strb", "\t$Rt, [$Rn, $offset]!",
- "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
- [(set GPR:$Rn_wb, (pre_truncsti8 GPR:$Rt,
- GPR:$Rn, am2offset:$offset))]>;
-def STRB_POST: AI2stridx<1, 0, (outs GPR:$Rn_wb),
- (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
- IndexModePost, StFrm, IIC_iStore_bh_ru,
- "strb", "\t$Rt, [$Rn], $offset",
- "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
- [(set GPR:$Rn_wb, (post_truncsti8 GPR:$Rt,
- GPR:$Rn, am2offset:$offset))]>;
-
-def STRH_PRE : AI3stridx<0b1011, 0, 1, (outs GPR:$Rn_wb),
- (ins GPR:$Rt, GPR:$Rn, am3offset:$offset),
- IndexModePre, StMiscFrm, IIC_iStore_ru,
- "strh", "\t$Rt, [$Rn, $offset]!",
- "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
- [(set GPR:$Rn_wb,
- (pre_truncsti16 GPR:$Rt, GPR:$Rn, am3offset:$offset))]>;
-
-def STRH_POST: AI3stridx<0b1011, 0, 0, (outs GPR:$Rn_wb),
- (ins GPR:$Rt, GPR:$Rn, am3offset:$offset),
- IndexModePost, StMiscFrm, IIC_iStore_bh_ru,
- "strh", "\t$Rt, [$Rn], $offset",
- "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
- [(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt,
- GPR:$Rn, am3offset:$offset))]>;
-
-// For disassembly only
+multiclass AI2_stridx<bit isByte, string opc, InstrItinClass itin> {
+ def _PRE_IMM : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addrmode_imm12:$addr), IndexModePre,
+ StFrm, itin,
+ opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ bits<17> addr;
+ let Inst{25} = 0;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{11-0} = addr{11-0}; // imm12
+ let AsmMatchConverter = "cvtStWriteBackRegAddrModeImm12";
+ let DecoderMethod = "DecodeSTRPreImm";
+ }
+
+ def _PRE_REG : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, ldst_so_reg:$addr),
+ IndexModePre, StFrm, itin,
+ opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ bits<17> addr;
+ let Inst{25} = 1;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{11-0} = addr{11-0};
+ let Inst{4} = 0; // Inst{4} = 0
+ let AsmMatchConverter = "cvtStWriteBackRegAddrMode2";
+ let DecoderMethod = "DecodeSTRPreReg";
+ }
+ def _POST_REG : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, StFrm, itin,
+ opc, "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+ }
+
+ def _POST_IMM : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset),
+ IndexModePost, StFrm, itin,
+ opc, "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+ }
+}
+
+let mayStore = 1, neverHasSideEffects = 1 in {
+defm STR : AI2_stridx<0, "str", IIC_iStore_ru>;
+defm STRB : AI2_stridx<1, "strb", IIC_iStore_bh_ru>;
+}
+
+def : ARMPat<(post_store GPR:$Rt, addr_offset_none:$addr,
+ am2offset_reg:$offset),
+ (STR_POST_REG GPR:$Rt, addr_offset_none:$addr,
+ am2offset_reg:$offset)>;
+def : ARMPat<(post_store GPR:$Rt, addr_offset_none:$addr,
+ am2offset_imm:$offset),
+ (STR_POST_IMM GPR:$Rt, addr_offset_none:$addr,
+ am2offset_imm:$offset)>;
+def : ARMPat<(post_truncsti8 GPR:$Rt, addr_offset_none:$addr,
+ am2offset_reg:$offset),
+ (STRB_POST_REG GPR:$Rt, addr_offset_none:$addr,
+ am2offset_reg:$offset)>;
+def : ARMPat<(post_truncsti8 GPR:$Rt, addr_offset_none:$addr,
+ am2offset_imm:$offset),
+ (STRB_POST_IMM GPR:$Rt, addr_offset_none:$addr,
+ am2offset_imm:$offset)>;
+
+// Pseudo-instructions for pattern matching the pre-indexed stores. We can't
+// put the patterns on the instruction definitions directly as ISel wants
+// the address base and offset to be separate operands, not a single
+// complex operand like we represent the instructions themselves. The
+// pseudos map between the two.
+let usesCustomInserter = 1,
+ Constraints = "$Rn = $Rn_wb,@earlyclobber $Rn_wb" in {
+def STRi_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset_imm:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPR:$Rn_wb,
+ (pre_store GPR:$Rt, GPR:$Rn, am2offset_imm:$offset))]>;
+def STRr_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset_reg:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPR:$Rn_wb,
+ (pre_store GPR:$Rt, GPR:$Rn, am2offset_reg:$offset))]>;
+def STRBi_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset_imm:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPR:$Rn_wb,
+ (pre_truncsti8 GPR:$Rt, GPR:$Rn, am2offset_imm:$offset))]>;
+def STRBr_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset_reg:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPR:$Rn_wb,
+ (pre_truncsti8 GPR:$Rt, GPR:$Rn, am2offset_reg:$offset))]>;
+def STRH_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am3offset:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPR:$Rn_wb,
+ (pre_truncsti16 GPR:$Rt, GPR:$Rn, am3offset:$offset))]>;
+}
+
+
+
+def STRH_PRE : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addrmode3:$addr), IndexModePre,
+ StMiscFrm, IIC_iStore_bh_ru,
+ "strh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ bits<14> addr;
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+ let AsmMatchConverter = "cvtStWriteBackRegAddrMode3";
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+}
+
+def STRH_POST : AI3ldstidx<0b1011, 0, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am3offset:$offset),
+ IndexModePost, StMiscFrm, IIC_iStore_bh_ru,
+ "strh", "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb",
+ [(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt,
+ addr_offset_none:$addr,
+ am3offset:$offset))]> {
+ bits<10> offset;
+ bits<4> addr;
+ let Inst{23} = offset{8}; // U bit
+ let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr;
+ let Inst{11-8} = offset{7-4}; // imm7_4/zero
+ let Inst{3-0} = offset{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+}
+
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
-def STRD_PRE : AI3stdpr<(outs GPR:$base_wb),
- (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset),
- StMiscFrm, IIC_iStore_d_ru,
- "strd", "\t$src1, $src2, [$base, $offset]!",
- "$base = $base_wb", []>;
-
-// For disassembly only
-def STRD_POST: AI3stdpo<(outs GPR:$base_wb),
- (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset),
- StMiscFrm, IIC_iStore_d_ru,
- "strd", "\t$src1, $src2, [$base], $offset",
- "$base = $base_wb", []>;
+def STRD_PRE : AI3ldstidx<0b1111, 0, 1, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rt2, addrmode3:$addr),
+ IndexModePre, StMiscFrm, IIC_iStore_d_ru,
+ "strd", "\t$Rt, $Rt2, $addr!",
+ "$addr.base = $Rn_wb", []> {
+ bits<14> addr;
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+ let AsmMatchConverter = "cvtStrdPre";
+}
+
+def STRD_POST: AI3ldstidx<0b1111, 0, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rt2, addr_offset_none:$addr,
+ am3offset:$offset),
+ IndexModePost, StMiscFrm, IIC_iStore_d_ru,
+ "strd", "\t$Rt, $Rt2, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ bits<10> offset;
+ bits<4> addr;
+ let Inst{23} = offset{8}; // U bit
+ let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr;
+ let Inst{11-8} = offset{7-4}; // imm7_4/zero
+ let Inst{3-0} = offset{3-0}; // imm3_0/Rm
+ let DecoderMethod = "DecodeAddrMode3Instruction";
+}
} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
-// STRT, STRBT, and STRHT are for disassembly only.
+// STRT, STRBT, and STRHT
-def STRT : AI2stridxT<0, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addrmode2:$addr),
- IndexModePost, StFrm, IIC_iStore_ru,
- "strt", "\t$Rt, $addr", "$addr.base = $Rn_wb",
- [/* For disassembly only; pattern left blank */]> {
+def STRBT_POST_REG : AI2ldstidx<0, 1, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, StFrm, IIC_iStore_bh_ru,
+ "strbt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
let Inst{21} = 1; // overwrite
- let AsmMatchConverter = "CvtStWriteBackRegAddrMode2";
+ let Inst{19-16} = addr;
+ let Inst{11-5} = offset{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = offset{3-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+def STRBT_POST_IMM : AI2ldstidx<0, 1, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset),
+ IndexModePost, StFrm, IIC_iStore_bh_ru,
+ "strbt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
}
-def STRBT : AI2stridxT<1, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addrmode2:$addr),
- IndexModePost, StFrm, IIC_iStore_bh_ru,
- "strbt", "\t$Rt, $addr", "$addr.base = $Rn_wb",
- [/* For disassembly only; pattern left blank */]> {
+let mayStore = 1, neverHasSideEffects = 1 in {
+def STRT_POST_REG : AI2ldstidx<0, 0, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset),
+ IndexModePost, StFrm, IIC_iStore_ru,
+ "strt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 1;
+ let Inst{23} = offset{12};
+ let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr;
+ let Inst{11-5} = offset{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = offset{3-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
+
+def STRT_POST_IMM : AI2ldstidx<0, 0, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset),
+ IndexModePost, StFrm, IIC_iStore_ru,
+ "strt", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb", []> {
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> addr;
+ let Inst{25} = 0;
+ let Inst{23} = offset{12};
let Inst{21} = 1; // overwrite
- let AsmMatchConverter = "CvtStWriteBackRegAddrMode2";
+ let Inst{19-16} = addr;
+ let Inst{11-0} = offset{11-0};
+ let DecoderMethod = "DecodeAddrMode2IdxInstruction";
+}
}
-def STRHT: AI3sthpo<(outs GPR:$base_wb), (ins GPR:$Rt, addrmode3:$addr),
- StMiscFrm, IIC_iStore_bh_ru,
- "strht", "\t$Rt, $addr", "$addr.base = $base_wb",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{21} = 1; // overwrite
- let AsmMatchConverter = "CvtStWriteBackRegAddrMode3";
+
+multiclass AI3strT<bits<4> op, string opc> {
+ def i : AI3ldstidxT<op, 0, (outs GPR:$base_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, postidx_imm8:$offset),
+ IndexModePost, StMiscFrm, IIC_iStore_bh_ru, opc,
+ "\t$Rt, $addr, $offset", "$addr.base = $base_wb", []> {
+ bits<9> offset;
+ let Inst{23} = offset{8};
+ let Inst{22} = 1;
+ let Inst{11-8} = offset{7-4};
+ let Inst{3-0} = offset{3-0};
+ let AsmMatchConverter = "cvtStExtTWriteBackImm";
+ }
+ def r : AI3ldstidxT<op, 0, (outs GPR:$base_wb),
+ (ins GPR:$Rt, addr_offset_none:$addr, postidx_reg:$Rm),
+ IndexModePost, StMiscFrm, IIC_iStore_bh_ru, opc,
+ "\t$Rt, $addr, $Rm", "$addr.base = $base_wb", []> {
+ bits<5> Rm;
+ let Inst{23} = Rm{4};
+ let Inst{22} = 0;
+ let Inst{11-8} = 0;
+ let Inst{3-0} = Rm{3-0};
+ let AsmMatchConverter = "cvtStExtTWriteBackReg";
+ }
}
+
+defm STRHT : AI3strT<0b1011, "strht">;
+
+
//===----------------------------------------------------------------------===//
// Load / store multiple Instructions.
//
@@ -1996,6 +2764,8 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
let Inst{24-23} = 0b01; // Increment After
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
+
+ let DecoderMethod = "DecodeMemMultipleWritebackInstruction";
}
def DA :
AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
@@ -2012,6 +2782,8 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
let Inst{24-23} = 0b00; // Decrement After
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
+
+ let DecoderMethod = "DecodeMemMultipleWritebackInstruction";
}
def DB :
AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
@@ -2028,6 +2800,8 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
let Inst{24-23} = 0b10; // Decrement Before
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
+
+ let DecoderMethod = "DecodeMemMultipleWritebackInstruction";
}
def IB :
AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
@@ -2044,6 +2818,8 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
let Inst{24-23} = 0b11; // Increment Before
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
+
+ let DecoderMethod = "DecodeMemMultipleWritebackInstruction";
}
}
@@ -2084,6 +2860,9 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
let Inst{15-12} = Rd;
}
+def : ARMInstAlias<"movs${p} $Rd, $Rm",
+ (MOVr GPR:$Rd, GPR:$Rm, pred:$p, CPSR)>;
+
// A version for the smaller set of tail call registers.
let neverHasSideEffects = 1 in
def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
@@ -2097,15 +2876,33 @@ def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
let Inst{15-12} = Rd;
}
-def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg:$src),
- DPSoRegFrm, IIC_iMOVsr,
- "mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg:$src)]>,
+def MOVsr : AsI1<0b1101, (outs GPRnopc:$Rd), (ins shift_so_reg_reg:$src),
+ DPSoRegRegFrm, IIC_iMOVsr,
+ "mov", "\t$Rd, $src",
+ [(set GPRnopc:$Rd, shift_so_reg_reg:$src)]>, UnaryDP {
+ bits<4> Rd;
+ bits<12> src;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = 0b0000;
+ let Inst{11-8} = src{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = src{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = src{3-0};
+ let Inst{25} = 0;
+}
+
+def MOVsi : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg_imm:$src),
+ DPSoRegImmFrm, IIC_iMOVsr,
+ "mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg_imm:$src)]>,
UnaryDP {
bits<4> Rd;
bits<12> src;
let Inst{15-12} = Rd;
let Inst{19-16} = 0b0000;
- let Inst{11-0} = src;
+ let Inst{11-5} = src{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = src{3-0};
let Inst{25} = 0;
}
@@ -2121,7 +2918,7 @@ def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMOVi,
}
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
-def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins i32imm_hilo16:$imm),
+def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins imm0_65535_expr:$imm),
DPFrm, IIC_iMOVi,
"movw", "\t$Rd, $imm",
[(set GPR:$Rd, imm0_65535:$imm)]>,
@@ -2133,16 +2930,22 @@ def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins i32imm_hilo16:$imm),
let Inst{19-16} = imm{15-12};
let Inst{20} = 0;
let Inst{25} = 1;
+ let DecoderMethod = "DecodeArmMOVTWInstruction";
}
+def : InstAlias<"mov${p} $Rd, $imm",
+ (MOVi16 GPR:$Rd, imm0_65535_expr:$imm, pred:$p)>,
+ Requires<[IsARM]>;
+
def MOVi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
(ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
let Constraints = "$src = $Rd" in {
-def MOVTi16 : AI1<0b1010, (outs GPR:$Rd), (ins GPR:$src, i32imm_hilo16:$imm),
+def MOVTi16 : AI1<0b1010, (outs GPRnopc:$Rd),
+ (ins GPR:$src, imm0_65535_expr:$imm),
DPFrm, IIC_iMOVi,
"movt", "\t$Rd, $imm",
- [(set GPR:$Rd,
+ [(set GPRnopc:$Rd,
(or (and GPR:$src, 0xffff),
lo16AllZero:$imm))]>, UnaryDP,
Requires<[IsARM, HasV6T2]> {
@@ -2153,6 +2956,7 @@ def MOVTi16 : AI1<0b1010, (outs GPR:$Rd), (ins GPR:$src, i32imm_hilo16:$imm),
let Inst{19-16} = imm{15-12};
let Inst{20} = 0;
let Inst{25} = 1;
+ let DecoderMethod = "DecodeArmMOVTWInstruction";
}
def MOVTi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
@@ -2186,30 +2990,28 @@ def MOVsra_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
// Sign extenders
-defm SXTB : AI_ext_rrot<0b01101010,
+def SXTB : AI_ext_rrot<0b01101010,
"sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>;
-defm SXTH : AI_ext_rrot<0b01101011,
+def SXTH : AI_ext_rrot<0b01101011,
"sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>;
-defm SXTAB : AI_exta_rrot<0b01101010,
+def SXTAB : AI_exta_rrot<0b01101010,
"sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
-defm SXTAH : AI_exta_rrot<0b01101011,
+def SXTAH : AI_exta_rrot<0b01101011,
"sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
-// For disassembly only
-defm SXTB16 : AI_ext_rrot_np<0b01101000, "sxtb16">;
+def SXTB16 : AI_ext_rrot_np<0b01101000, "sxtb16">;
-// For disassembly only
-defm SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">;
+def SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">;
// Zero extenders
let AddedComplexity = 16 in {
-defm UXTB : AI_ext_rrot<0b01101110,
+def UXTB : AI_ext_rrot<0b01101110,
"uxtb" , UnOpFrag<(and node:$Src, 0x000000FF)>>;
-defm UXTH : AI_ext_rrot<0b01101111,
+def UXTH : AI_ext_rrot<0b01101111,
"uxth" , UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
-defm UXTB16 : AI_ext_rrot<0b01101100,
+def UXTB16 : AI_ext_rrot<0b01101100,
"uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
@@ -2217,23 +3019,22 @@ defm UXTB16 : AI_ext_rrot<0b01101100,
// instead so we can include a check for masking back in the upper
// eight bits of the source into the lower eight bits of the result.
//def : ARMV6Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF),
-// (UXTB16r_rot GPR:$Src, 24)>;
+// (UXTB16r_rot GPR:$Src, 3)>;
def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
- (UXTB16r_rot GPR:$Src, 8)>;
+ (UXTB16 GPR:$Src, 1)>;
-defm UXTAB : AI_exta_rrot<0b01101110, "uxtab",
+def UXTAB : AI_exta_rrot<0b01101110, "uxtab",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
-defm UXTAH : AI_exta_rrot<0b01101111, "uxtah",
+def UXTAH : AI_exta_rrot<0b01101111, "uxtah",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
}
// This isn't safe in general, the add is two 16-bit units, not a 32-bit add.
-// For disassembly only
-defm UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">;
+def UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">;
-def SBFX : I<(outs GPR:$Rd),
- (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width),
+def SBFX : I<(outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, imm0_31:$lsb, imm1_32:$width),
AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
"sbfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
Requires<[IsARM, HasV6T2]> {
@@ -2250,7 +3051,7 @@ def SBFX : I<(outs GPR:$Rd),
}
def UBFX : I<(outs GPR:$Rd),
- (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width),
+ (ins GPR:$Rn, imm0_31:$lsb, imm1_32:$width),
AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
"ubfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
Requires<[IsARM, HasV6T2]> {
@@ -2278,148 +3079,58 @@ defm SUB : AsI1_bin_irs<0b0010, "sub",
BinOpFrag<(sub node:$LHS, node:$RHS)>, "SUB">;
// ADD and SUB with 's' bit set.
-defm ADDS : AI1_bin_s_irs<0b0100, "adds",
+//
+// Currently, t2ADDS/t2SUBS are pseudo opcodes that exist only in the
+// selection DAG. They are "lowered" to real t2ADD/t2SUB opcodes by
+// AdjustInstrPostInstrSelection where we determine whether or not to
+// set the "s" bit based on CPSR liveness.
+//
+// FIXME: Eliminate t2ADDS/t2SUBS pseudo opcodes after adding tablegen
+// support for an optional CPSR definition that corresponds to the DAG
+// node's second value. We can then eliminate the implicit def of CPSR.
+defm ADDS : AsI1_bin_s_irs<0b0100, "add",
IIC_iALUi, IIC_iALUr, IIC_iALUsr,
- BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>;
-defm SUBS : AI1_bin_s_irs<0b0010, "subs",
+ BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>;
+defm SUBS : AsI1_bin_s_irs<0b0010, "sub",
IIC_iALUi, IIC_iALUr, IIC_iALUsr,
- BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+ BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
defm ADC : AI1_adde_sube_irs<0b0101, "adc",
- BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>,
+ BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>,
"ADC", 1>;
defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
- BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>,
+ BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>,
"SBC">;
-// ADC and SUBC with 's' bit set.
-let usesCustomInserter = 1 in {
-defm ADCS : AI1_adde_sube_s_irs<
- BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
-defm SBCS : AI1_adde_sube_s_irs<
- BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>;
-}
-
-def RSBri : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
- IIC_iALUi, "rsb", "\t$Rd, $Rn, $imm",
- [(set GPR:$Rd, (sub so_imm:$imm, GPR:$Rn))]> {
- bits<4> Rd;
- bits<4> Rn;
- bits<12> imm;
- let Inst{25} = 1;
- let Inst{15-12} = Rd;
- let Inst{19-16} = Rn;
- let Inst{11-0} = imm;
-}
-
-// The reg/reg form is only defined for the disassembler; for codegen it is
-// equivalent to SUBrr.
-def RSBrr : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
- IIC_iALUr, "rsb", "\t$Rd, $Rn, $Rm",
- [/* For disassembly only; pattern left blank */]> {
- bits<4> Rd;
- bits<4> Rn;
- bits<4> Rm;
- let Inst{11-4} = 0b00000000;
- let Inst{25} = 0;
- let Inst{3-0} = Rm;
- let Inst{15-12} = Rd;
- let Inst{19-16} = Rn;
-}
-
-def RSBrs : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
- DPSoRegFrm, IIC_iALUsr, "rsb", "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, (sub so_reg:$shift, GPR:$Rn))]> {
- bits<4> Rd;
- bits<4> Rn;
- bits<12> shift;
- let Inst{25} = 0;
- let Inst{11-0} = shift;
- let Inst{15-12} = Rd;
- let Inst{19-16} = Rn;
-}
+defm RSB : AsI1_rbin_irs <0b0011, "rsb",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(sub node:$LHS, node:$RHS)>, "RSB">;
-// RSB with 's' bit set.
-// NOTE: CPSR def omitted because it will be handled by the custom inserter.
-let usesCustomInserter = 1 in {
-def RSBSri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
- 4, IIC_iALUi,
- [(set GPR:$Rd, (subc so_imm:$imm, GPR:$Rn))]>;
-def RSBSrr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
- 4, IIC_iALUr,
- [/* For disassembly only; pattern left blank */]>;
-def RSBSrs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
- 4, IIC_iALUsr,
- [(set GPR:$Rd, (subc so_reg:$shift, GPR:$Rn))]>;
-}
-
-let Uses = [CPSR] in {
-def RSCri : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
- DPFrm, IIC_iALUi, "rsc", "\t$Rd, $Rn, $imm",
- [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>,
- Requires<[IsARM]> {
- bits<4> Rd;
- bits<4> Rn;
- bits<12> imm;
- let Inst{25} = 1;
- let Inst{15-12} = Rd;
- let Inst{19-16} = Rn;
- let Inst{11-0} = imm;
-}
-// The reg/reg form is only defined for the disassembler; for codegen it is
-// equivalent to SUBrr.
-def RSCrr : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
- DPFrm, IIC_iALUr, "rsc", "\t$Rd, $Rn, $Rm",
- [/* For disassembly only; pattern left blank */]> {
- bits<4> Rd;
- bits<4> Rn;
- bits<4> Rm;
- let Inst{11-4} = 0b00000000;
- let Inst{25} = 0;
- let Inst{3-0} = Rm;
- let Inst{15-12} = Rd;
- let Inst{19-16} = Rn;
-}
-def RSCrs : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
- DPSoRegFrm, IIC_iALUsr, "rsc", "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>,
- Requires<[IsARM]> {
- bits<4> Rd;
- bits<4> Rn;
- bits<12> shift;
- let Inst{25} = 0;
- let Inst{11-0} = shift;
- let Inst{15-12} = Rd;
- let Inst{19-16} = Rn;
-}
-}
+// FIXME: Eliminate them if we can write def : Pat patterns which defines
+// CPSR and the implicit def of CPSR is not needed.
+defm RSBS : AsI1_rbin_s_is<0b0011, "rsb",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
-// NOTE: CPSR def omitted because it will be handled by the custom inserter.
-let usesCustomInserter = 1, Uses = [CPSR] in {
-def RSCSri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
- 4, IIC_iALUi,
- [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>;
-def RSCSrs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
- 4, IIC_iALUsr,
- [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>;
-}
+defm RSC : AI1_rsc_irs<0b0111, "rsc",
+ BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>,
+ "RSC">;
// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
// The assume-no-carry-in form uses the negation of the input since add/sub
// assume opposite meanings of the carry flag (i.e., carry == !borrow).
// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory
// details.
-def : ARMPat<(add GPR:$src, so_imm_neg:$imm),
- (SUBri GPR:$src, so_imm_neg:$imm)>;
-def : ARMPat<(addc GPR:$src, so_imm_neg:$imm),
- (SUBSri GPR:$src, so_imm_neg:$imm)>;
+def : ARMPat<(add GPR:$src, so_imm_neg:$imm),
+ (SUBri GPR:$src, so_imm_neg:$imm)>;
+def : ARMPat<(ARMaddc GPR:$src, so_imm_neg:$imm),
+ (SUBSri GPR:$src, so_imm_neg:$imm)>;
+
// The with-carry-in form matches bitwise not instead of the negation.
// Effectively, the inverse interpretation of the carry flag already accounts
// for part of the negation.
-def : ARMPat<(adde_dead_carry GPR:$src, so_imm_not:$imm),
- (SBCri GPR:$src, so_imm_not:$imm)>;
-def : ARMPat<(adde_live_carry GPR:$src, so_imm_not:$imm),
- (SBCSri GPR:$src, so_imm_not:$imm)>;
+def : ARMPat<(ARMadde GPR:$src, so_imm_not:$imm, CPSR),
+ (SBCri GPR:$src, so_imm_not:$imm)>;
// Note: These are implemented in C++ code, because they have to generate
// ADD/SUBrs instructions, which use a complex pattern that a xform function
@@ -2427,12 +3138,13 @@ def : ARMPat<(adde_live_carry GPR:$src, so_imm_not:$imm),
// (mul X, 2^n+1) -> (add (X << n), X)
// (mul X, 2^n-1) -> (rsb X, (X << n))
-// ARM Arithmetic Instruction -- for disassembly only
+// ARM Arithmetic Instruction
// GPR:$dst = GPR:$a op GPR:$b
class AAI<bits<8> op27_20, bits<8> op11_4, string opc,
- list<dag> pattern = [/* For disassembly only; pattern left blank */],
- dag iops = (ins GPR:$Rn, GPR:$Rm), string asm = "\t$Rd, $Rn, $Rm">
- : AI<(outs GPR:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern> {
+ list<dag> pattern = [],
+ dag iops = (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ string asm = "\t$Rd, $Rn, $Rm">
+ : AI<(outs GPRnopc:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern> {
bits<4> Rn;
bits<4> Rd;
bits<4> Rm;
@@ -2443,17 +3155,19 @@ class AAI<bits<8> op27_20, bits<8> op11_4, string opc,
let Inst{3-0} = Rm;
}
-// Saturating add/subtract -- for disassembly only
+// Saturating add/subtract
def QADD : AAI<0b00010000, 0b00000101, "qadd",
- [(set GPR:$Rd, (int_arm_qadd GPR:$Rm, GPR:$Rn))],
- (ins GPR:$Rm, GPR:$Rn), "\t$Rd, $Rm, $Rn">;
+ [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))],
+ (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">;
def QSUB : AAI<0b00010010, 0b00000101, "qsub",
- [(set GPR:$Rd, (int_arm_qsub GPR:$Rm, GPR:$Rn))],
- (ins GPR:$Rm, GPR:$Rn), "\t$Rd, $Rm, $Rn">;
-def QDADD : AAI<0b00010100, 0b00000101, "qdadd", [], (ins GPR:$Rm, GPR:$Rn),
+ [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, GPRnopc:$Rn))],
+ (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">;
+def QDADD : AAI<0b00010100, 0b00000101, "qdadd", [],
+ (ins GPRnopc:$Rm, GPRnopc:$Rn),
"\t$Rd, $Rm, $Rn">;
-def QDSUB : AAI<0b00010110, 0b00000101, "qdsub", [], (ins GPR:$Rm, GPR:$Rn),
+def QDSUB : AAI<0b00010110, 0b00000101, "qdsub", [],
+ (ins GPRnopc:$Rm, GPRnopc:$Rn),
"\t$Rd, $Rm, $Rn">;
def QADD16 : AAI<0b01100010, 0b11110001, "qadd16">;
@@ -2469,7 +3183,7 @@ def UQSAX : AAI<0b01100110, 0b11110101, "uqsax">;
def UQSUB16 : AAI<0b01100110, 0b11110111, "uqsub16">;
def UQSUB8 : AAI<0b01100110, 0b11111111, "uqsub8">;
-// Signed/Unsigned add/subtract -- for disassembly only
+// Signed/Unsigned add/subtract
def SASX : AAI<0b01100001, 0b11110011, "sasx">;
def SADD16 : AAI<0b01100001, 0b11110001, "sadd16">;
@@ -2484,7 +3198,7 @@ def USAX : AAI<0b01100101, 0b11110101, "usax">;
def USUB16 : AAI<0b01100101, 0b11110111, "usub16">;
def USUB8 : AAI<0b01100101, 0b11111111, "usub8">;
-// Signed/Unsigned halving add/subtract -- for disassembly only
+// Signed/Unsigned halving add/subtract
def SHASX : AAI<0b01100011, 0b11110011, "shasx">;
def SHADD16 : AAI<0b01100011, 0b11110001, "shadd16">;
@@ -2499,7 +3213,7 @@ def UHSAX : AAI<0b01100111, 0b11110101, "uhsax">;
def UHSUB16 : AAI<0b01100111, 0b11110111, "uhsub16">;
def UHSUB8 : AAI<0b01100111, 0b11111111, "uhsub8">;
-// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only
+// Unsigned Sum of Absolute Differences [and Accumulate].
def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
MulFrm /* for convenience */, NoItinerary, "usad8",
@@ -2531,11 +3245,11 @@ def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
let Inst{3-0} = Rn;
}
-// Signed/Unsigned saturate -- for disassembly only
+// Signed/Unsigned saturate
-def SSAT : AI<(outs GPR:$Rd), (ins ssat_imm:$sat_imm, GPR:$a, shift_imm:$sh),
- SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $a$sh",
- [/* For disassembly only; pattern left blank */]> {
+def SSAT : AI<(outs GPRnopc:$Rd),
+ (ins imm1_32:$sat_imm, GPRnopc:$Rn, shift_imm:$sh),
+ SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []> {
bits<4> Rd;
bits<5> sat_imm;
bits<4> Rn;
@@ -2544,14 +3258,14 @@ def SSAT : AI<(outs GPR:$Rd), (ins ssat_imm:$sat_imm, GPR:$a, shift_imm:$sh),
let Inst{5-4} = 0b01;
let Inst{20-16} = sat_imm;
let Inst{15-12} = Rd;
- let Inst{11-7} = sh{7-3};
- let Inst{6} = sh{0};
+ let Inst{11-7} = sh{4-0};
+ let Inst{6} = sh{5};
let Inst{3-0} = Rn;
}
-def SSAT16 : AI<(outs GPR:$Rd), (ins ssat_imm:$sat_imm, GPR:$Rn), SatFrm,
- NoItinerary, "ssat16", "\t$Rd, $sat_imm, $Rn",
- [/* For disassembly only; pattern left blank */]> {
+def SSAT16 : AI<(outs GPRnopc:$Rd),
+ (ins imm1_16:$sat_imm, GPRnopc:$Rn), SatFrm,
+ NoItinerary, "ssat16", "\t$Rd, $sat_imm, $Rn", []> {
bits<4> Rd;
bits<4> sat_imm;
bits<4> Rn;
@@ -2562,9 +3276,9 @@ def SSAT16 : AI<(outs GPR:$Rd), (ins ssat_imm:$sat_imm, GPR:$Rn), SatFrm,
let Inst{3-0} = Rn;
}
-def USAT : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a, shift_imm:$sh),
- SatFrm, NoItinerary, "usat", "\t$Rd, $sat_imm, $a$sh",
- [/* For disassembly only; pattern left blank */]> {
+def USAT : AI<(outs GPRnopc:$Rd),
+ (ins imm0_31:$sat_imm, GPRnopc:$Rn, shift_imm:$sh),
+ SatFrm, NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []> {
bits<4> Rd;
bits<5> sat_imm;
bits<4> Rn;
@@ -2572,15 +3286,15 @@ def USAT : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a, shift_imm:$sh),
let Inst{27-21} = 0b0110111;
let Inst{5-4} = 0b01;
let Inst{15-12} = Rd;
- let Inst{11-7} = sh{7-3};
- let Inst{6} = sh{0};
+ let Inst{11-7} = sh{4-0};
+ let Inst{6} = sh{5};
let Inst{20-16} = sat_imm;
let Inst{3-0} = Rn;
}
-def USAT16 : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a), SatFrm,
- NoItinerary, "usat16", "\t$Rd, $sat_imm, $a",
- [/* For disassembly only; pattern left blank */]> {
+def USAT16 : AI<(outs GPRnopc:$Rd),
+ (ins imm0_15:$sat_imm, GPRnopc:$Rn), SatFrm,
+ NoItinerary, "usat16", "\t$Rd, $sat_imm, $Rn", []> {
bits<4> Rd;
bits<4> sat_imm;
bits<4> Rn;
@@ -2591,8 +3305,10 @@ def USAT16 : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a), SatFrm,
let Inst{3-0} = Rn;
}
-def : ARMV6Pat<(int_arm_ssat GPR:$a, imm:$pos), (SSAT imm:$pos, GPR:$a, 0)>;
-def : ARMV6Pat<(int_arm_usat GPR:$a, imm:$pos), (USAT imm:$pos, GPR:$a, 0)>;
+def : ARMV6Pat<(int_arm_ssat GPRnopc:$a, imm:$pos),
+ (SSAT imm:$pos, GPRnopc:$a, 0)>;
+def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm:$pos),
+ (USAT imm:$pos, GPRnopc:$a, 0)>;
//===----------------------------------------------------------------------===//
// Bitwise Instructions.
@@ -2611,6 +3327,10 @@ defm BIC : AsI1_bin_irs<0b1110, "bic",
IIC_iBITi, IIC_iBITr, IIC_iBITsr,
BinOpFrag<(and node:$LHS, (not node:$RHS))>, "BIC">;
+// FIXME: bf_inv_mask_imm should be two operands, the lsb and the msb, just
+// like in the actual instruction encoding. The complexity of mapping the mask
+// to the lsb/msb pair should be handled by ISel, not encapsulated in the
+// instruction description.
def BFC : I<(outs GPR:$Rd), (ins GPR:$src, bf_inv_mask_imm:$imm),
AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
"bfc", "\t$Rd, $imm", "$src = $Rd",
@@ -2622,16 +3342,16 @@ def BFC : I<(outs GPR:$Rd), (ins GPR:$src, bf_inv_mask_imm:$imm),
let Inst{6-0} = 0b0011111;
let Inst{15-12} = Rd;
let Inst{11-7} = imm{4-0}; // lsb
- let Inst{20-16} = imm{9-5}; // width
+ let Inst{20-16} = imm{9-5}; // msb
}
// A8.6.18 BFI - Bitfield insert (Encoding A1)
-def BFI : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn, bf_inv_mask_imm:$imm),
- AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
- "bfi", "\t$Rd, $Rn, $imm", "$src = $Rd",
- [(set GPR:$Rd, (ARMbfi GPR:$src, GPR:$Rn,
- bf_inv_mask_imm:$imm))]>,
- Requires<[IsARM, HasV6T2]> {
+def BFI:I<(outs GPRnopc:$Rd), (ins GPRnopc:$src, GPR:$Rn, bf_inv_mask_imm:$imm),
+ AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "bfi", "\t$Rd, $Rn, $imm", "$src = $Rd",
+ [(set GPRnopc:$Rd, (ARMbfi GPRnopc:$src, GPR:$Rn,
+ bf_inv_mask_imm:$imm))]>,
+ Requires<[IsARM, HasV6T2]> {
bits<4> Rd;
bits<4> Rn;
bits<10> imm;
@@ -2643,25 +3363,6 @@ def BFI : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn, bf_inv_mask_imm:$imm),
let Inst{3-0} = Rn;
}
-// GNU as only supports this form of bfi (w/ 4 arguments)
-let isAsmParserOnly = 1 in
-def BFI4p : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn,
- lsb_pos_imm:$lsb, width_imm:$width),
- AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
- "bfi", "\t$Rd, $Rn, $lsb, $width", "$src = $Rd",
- []>, Requires<[IsARM, HasV6T2]> {
- bits<4> Rd;
- bits<4> Rn;
- bits<5> lsb;
- bits<5> width;
- let Inst{27-21} = 0b0111110;
- let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15
- let Inst{15-12} = Rd;
- let Inst{11-7} = lsb;
- let Inst{20-16} = width; // Custom encoder => lsb+width-1
- let Inst{3-0} = Rn;
-}
-
def MVNr : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr,
"mvn", "\t$Rd, $Rm",
[(set GPR:$Rd, (not GPR:$Rm))]>, UnaryDP {
@@ -2673,15 +3374,31 @@ def MVNr : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr,
let Inst{15-12} = Rd;
let Inst{3-0} = Rm;
}
-def MVNs : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg:$shift), DPSoRegFrm,
- IIC_iMVNsr, "mvn", "\t$Rd, $shift",
- [(set GPR:$Rd, (not so_reg:$shift))]>, UnaryDP {
+def MVNsi : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_imm:$shift),
+ DPSoRegImmFrm, IIC_iMVNsr, "mvn", "\t$Rd, $shift",
+ [(set GPR:$Rd, (not so_reg_imm:$shift))]>, UnaryDP {
+ bits<4> Rd;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{19-16} = 0b0000;
+ let Inst{15-12} = Rd;
+ let Inst{11-5} = shift{11-5};
+ let Inst{4} = 0;
+ let Inst{3-0} = shift{3-0};
+}
+def MVNsr : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_reg:$shift),
+ DPSoRegRegFrm, IIC_iMVNsr, "mvn", "\t$Rd, $shift",
+ [(set GPR:$Rd, (not so_reg_reg:$shift))]>, UnaryDP {
bits<4> Rd;
bits<12> shift;
let Inst{25} = 0;
let Inst{19-16} = 0b0000;
let Inst{15-12} = Rd;
- let Inst{11-0} = shift;
+ let Inst{11-8} = shift{11-8};
+ let Inst{7} = 0;
+ let Inst{6-5} = shift{6-5};
+ let Inst{4} = 1;
+ let Inst{3-0} = shift{3-0};
}
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm,
@@ -2820,8 +3537,8 @@ def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi),
bits<4> RdHi;
bits<4> Rm;
bits<4> Rn;
- let Inst{19-16} = RdLo;
- let Inst{15-12} = RdHi;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
let Inst{11-8} = Rm;
let Inst{3-0} = Rn;
}
@@ -2855,8 +3572,7 @@ def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
}
def SMMULR : AMul2I <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
- IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm",
- [/* For disassembly only; pattern left blank */]>,
+ IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm", []>,
Requires<[IsARM, HasV6]> {
let Inst{15-12} = 0b1111;
}
@@ -2869,8 +3585,7 @@ def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd),
def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
- IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra",
- [/* For disassembly only; pattern left blank */]>,
+ IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>,
Requires<[IsARM, HasV6]>;
def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd),
@@ -2881,8 +3596,7 @@ def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd),
def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
- IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra",
- [/* For disassembly only; pattern left blank */]>,
+ IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>,
Requires<[IsARM, HasV6]>;
multiclass AI_smul<string opc, PatFrag opnode> {
@@ -2925,92 +3639,95 @@ multiclass AI_smul<string opc, PatFrag opnode> {
multiclass AI_smla<string opc, PatFrag opnode> {
- def BB : AMulxyIa<0b0001000, 0b00, (outs GPR:$Rd),
- (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ let DecoderMethod = "DecodeSMLAInstruction" in {
+ def BB : AMulxyIa<0b0001000, 0b00, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set GPR:$Rd, (add GPR:$Ra,
- (opnode (sext_inreg GPR:$Rn, i16),
- (sext_inreg GPR:$Rm, i16))))]>,
+ [(set GPRnopc:$Rd, (add GPR:$Ra,
+ (opnode (sext_inreg GPRnopc:$Rn, i16),
+ (sext_inreg GPRnopc:$Rm, i16))))]>,
Requires<[IsARM, HasV5TE]>;
- def BT : AMulxyIa<0b0001000, 0b10, (outs GPR:$Rd),
- (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set GPR:$Rd, (add GPR:$Ra, (opnode (sext_inreg GPR:$Rn, i16),
- (sra GPR:$Rm, (i32 16)))))]>,
+ [(set GPRnopc:$Rd,
+ (add GPR:$Ra, (opnode (sext_inreg GPRnopc:$Rn, i16),
+ (sra GPRnopc:$Rm, (i32 16)))))]>,
Requires<[IsARM, HasV5TE]>;
- def TB : AMulxyIa<0b0001000, 0b01, (outs GPR:$Rd),
- (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set GPR:$Rd, (add GPR:$Ra, (opnode (sra GPR:$Rn, (i32 16)),
- (sext_inreg GPR:$Rm, i16))))]>,
+ [(set GPRnopc:$Rd,
+ (add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)),
+ (sext_inreg GPRnopc:$Rm, i16))))]>,
Requires<[IsARM, HasV5TE]>;
- def TT : AMulxyIa<0b0001000, 0b11, (outs GPR:$Rd),
- (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set GPR:$Rd, (add GPR:$Ra, (opnode (sra GPR:$Rn, (i32 16)),
- (sra GPR:$Rm, (i32 16)))))]>,
+ [(set GPRnopc:$Rd,
+ (add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)),
+ (sra GPRnopc:$Rm, (i32 16)))))]>,
Requires<[IsARM, HasV5TE]>;
- def WB : AMulxyIa<0b0001001, 0b00, (outs GPR:$Rd),
- (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set GPR:$Rd, (add GPR:$Ra, (sra (opnode GPR:$Rn,
- (sext_inreg GPR:$Rm, i16)), (i32 16))))]>,
+ [(set GPRnopc:$Rd,
+ (add GPR:$Ra, (sra (opnode GPRnopc:$Rn,
+ (sext_inreg GPRnopc:$Rm, i16)), (i32 16))))]>,
Requires<[IsARM, HasV5TE]>;
- def WT : AMulxyIa<0b0001001, 0b10, (outs GPR:$Rd),
- (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set GPR:$Rd, (add GPR:$Ra, (sra (opnode GPR:$Rn,
- (sra GPR:$Rm, (i32 16))), (i32 16))))]>,
+ [(set GPRnopc:$Rd,
+ (add GPR:$Ra, (sra (opnode GPRnopc:$Rn,
+ (sra GPRnopc:$Rm, (i32 16))), (i32 16))))]>,
Requires<[IsARM, HasV5TE]>;
+ }
}
defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
-// Halfword multiply accumulate long: SMLAL<x><y> -- for disassembly only
-def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPR:$RdLo, GPR:$RdHi),
- (ins GPR:$Rn, GPR:$Rm),
- IIC_iMAC64, "smlalbb", "\t$RdLo, $RdHi, $Rn, $Rm",
- [/* For disassembly only; pattern left blank */]>,
+// Halfword multiply accumulate long: SMLAL<x><y>.
+def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ IIC_iMAC64, "smlalbb", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
Requires<[IsARM, HasV5TE]>;
-def SMLALBT : AMulxyI64<0b0001010, 0b10, (outs GPR:$RdLo, GPR:$RdHi),
- (ins GPR:$Rn, GPR:$Rm),
- IIC_iMAC64, "smlalbt", "\t$RdLo, $RdHi, $Rn, $Rm",
- [/* For disassembly only; pattern left blank */]>,
+def SMLALBT : AMulxyI64<0b0001010, 0b10, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ IIC_iMAC64, "smlalbt", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
Requires<[IsARM, HasV5TE]>;
-def SMLALTB : AMulxyI64<0b0001010, 0b01, (outs GPR:$RdLo, GPR:$RdHi),
- (ins GPR:$Rn, GPR:$Rm),
- IIC_iMAC64, "smlaltb", "\t$RdLo, $RdHi, $Rn, $Rm",
- [/* For disassembly only; pattern left blank */]>,
+def SMLALTB : AMulxyI64<0b0001010, 0b01, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ IIC_iMAC64, "smlaltb", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
Requires<[IsARM, HasV5TE]>;
-def SMLALTT : AMulxyI64<0b0001010, 0b11, (outs GPR:$RdLo, GPR:$RdHi),
- (ins GPR:$Rn, GPR:$Rm),
- IIC_iMAC64, "smlaltt", "\t$RdLo, $RdHi, $Rn, $Rm",
- [/* For disassembly only; pattern left blank */]>,
+def SMLALTT : AMulxyI64<0b0001010, 0b11, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ IIC_iMAC64, "smlaltt", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
Requires<[IsARM, HasV5TE]>;
-// Helper class for AI_smld -- for disassembly only
+// Helper class for AI_smld.
class AMulDualIbase<bit long, bit sub, bit swap, dag oops, dag iops,
InstrItinClass itin, string opc, string asm>
: AI<oops, iops, MulFrm, itin, opc, asm, []>, Requires<[IsARM, HasV6]> {
bits<4> Rn;
bits<4> Rm;
- let Inst{4} = 1;
- let Inst{5} = swap;
- let Inst{6} = sub;
- let Inst{7} = 0;
- let Inst{21-20} = 0b00;
- let Inst{22} = long;
let Inst{27-23} = 0b01110;
+ let Inst{22} = long;
+ let Inst{21-20} = 0b00;
let Inst{11-8} = Rm;
+ let Inst{7} = 0;
+ let Inst{6} = sub;
+ let Inst{5} = swap;
+ let Inst{4} = 1;
let Inst{3-0} = Rn;
}
class AMulDualI<bit long, bit sub, bit swap, dag oops, dag iops,
@@ -3024,6 +3741,8 @@ class AMulDualIa<bit long, bit sub, bit swap, dag oops, dag iops,
InstrItinClass itin, string opc, string asm>
: AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
bits<4> Ra;
+ bits<4> Rd;
+ let Inst{19-16} = Rd;
let Inst{15-12} = Ra;
}
class AMulDualI64<bit long, bit sub, bit swap, dag oops, dag iops,
@@ -3037,18 +3756,20 @@ class AMulDualI64<bit long, bit sub, bit swap, dag oops, dag iops,
multiclass AI_smld<bit sub, string opc> {
- def D : AMulDualIa<0, sub, 0, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ def D : AMulDualIa<0, sub, 0, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm, $Ra">;
- def DX: AMulDualIa<0, sub, 1, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ def DX: AMulDualIa<0, sub, 1, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">;
- def LD: AMulDualI64<1, sub, 0, (outs GPR:$RdLo,GPR:$RdHi),
- (ins GPR:$Rn, GPR:$Rm), NoItinerary,
+ def LD: AMulDualI64<1, sub, 0, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary,
!strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">;
- def LDX : AMulDualI64<1, sub, 1, (outs GPR:$RdLo,GPR:$RdHi),
- (ins GPR:$Rn, GPR:$Rm), NoItinerary,
+ def LDX : AMulDualI64<1, sub, 1, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary,
!strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">;
}
@@ -3058,10 +3779,10 @@ defm SMLS : AI_smld<1, "smls">;
multiclass AI_sdml<bit sub, string opc> {
- def D : AMulDualI<0, sub, 0, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
- NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">;
- def DX : AMulDualI<0, sub, 1, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
- NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">;
+ def D:AMulDualI<0, sub, 0, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm),
+ NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">;
+ def DX:AMulDualI<0, sub, 1, (outs GPRnopc:$Rd),(ins GPRnopc:$Rn, GPRnopc:$Rm),
+ NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">;
}
defm SMUA : AI_sdml<0, "smua">;
@@ -3100,55 +3821,38 @@ def : ARMV6Pat<(or (sra (shl GPR:$Rm, (i32 24)), (i32 16)),
(and (srl GPR:$Rm, (i32 8)), 0xFF)),
(REVSH GPR:$Rm)>;
-def lsl_shift_imm : SDNodeXForm<imm, [{
- unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::lsl, N->getZExtValue());
- return CurDAG->getTargetConstant(Sh, MVT::i32);
-}]>;
-
-def lsl_amt : ImmLeaf<i32, [{
- return Imm > 0 && Imm < 32;
-}], lsl_shift_imm>;
-
-def PKHBT : APKHI<0b01101000, 0, (outs GPR:$Rd),
- (ins GPR:$Rn, GPR:$Rm, shift_imm:$sh),
+def PKHBT : APKHI<0b01101000, 0, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, pkh_lsl_amt:$sh),
IIC_iALUsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh",
- [(set GPR:$Rd, (or (and GPR:$Rn, 0xFFFF),
- (and (shl GPR:$Rm, lsl_amt:$sh),
- 0xFFFF0000)))]>,
+ [(set GPRnopc:$Rd, (or (and GPRnopc:$Rn, 0xFFFF),
+ (and (shl GPRnopc:$Rm, pkh_lsl_amt:$sh),
+ 0xFFFF0000)))]>,
Requires<[IsARM, HasV6]>;
// Alternate cases for PKHBT where identities eliminate some nodes.
-def : ARMV6Pat<(or (and GPR:$Rn, 0xFFFF), (and GPR:$Rm, 0xFFFF0000)),
- (PKHBT GPR:$Rn, GPR:$Rm, 0)>;
-def : ARMV6Pat<(or (and GPR:$Rn, 0xFFFF), (shl GPR:$Rm, imm16_31:$sh)),
- (PKHBT GPR:$Rn, GPR:$Rm, (lsl_shift_imm imm16_31:$sh))>;
-
-def asr_shift_imm : SDNodeXForm<imm, [{
- unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::asr, N->getZExtValue());
- return CurDAG->getTargetConstant(Sh, MVT::i32);
-}]>;
-
-def asr_amt : ImmLeaf<i32, [{
- return Imm > 0 && Imm <= 32;
-}], asr_shift_imm>;
+def : ARMV6Pat<(or (and GPRnopc:$Rn, 0xFFFF), (and GPRnopc:$Rm, 0xFFFF0000)),
+ (PKHBT GPRnopc:$Rn, GPRnopc:$Rm, 0)>;
+def : ARMV6Pat<(or (and GPRnopc:$Rn, 0xFFFF), (shl GPRnopc:$Rm, imm16_31:$sh)),
+ (PKHBT GPRnopc:$Rn, GPRnopc:$Rm, imm16_31:$sh)>;
// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
// will match the pattern below.
-def PKHTB : APKHI<0b01101000, 1, (outs GPR:$Rd),
- (ins GPR:$Rn, GPR:$Rm, shift_imm:$sh),
+def PKHTB : APKHI<0b01101000, 1, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, pkh_asr_amt:$sh),
IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh",
- [(set GPR:$Rd, (or (and GPR:$Rn, 0xFFFF0000),
- (and (sra GPR:$Rm, asr_amt:$sh),
- 0xFFFF)))]>,
+ [(set GPRnopc:$Rd, (or (and GPRnopc:$Rn, 0xFFFF0000),
+ (and (sra GPRnopc:$Rm, pkh_asr_amt:$sh),
+ 0xFFFF)))]>,
Requires<[IsARM, HasV6]>;
// Alternate cases for PKHTB where identities eliminate some nodes. Note that
// a shift amount of 0 is *not legal* here, it is PKHBT instead.
-def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, imm16_31:$sh)),
- (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm16_31:$sh))>;
-def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000),
- (and (srl GPR:$src2, imm1_15:$sh), 0xFFFF)),
- (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm1_15:$sh))>;
+def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000),
+ (srl GPRnopc:$src2, imm16_31:$sh)),
+ (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm16_31:$sh)>;
+def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000),
+ (and (srl GPRnopc:$src2, imm1_15:$sh), 0xFFFF)),
+ (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm1_15:$sh)>;
//===----------------------------------------------------------------------===//
// Comparison Instructions...
@@ -3163,8 +3867,10 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_imm:$imm),
(CMPri GPR:$src, so_imm:$imm)>;
def : ARMPat<(ARMcmpZ GPR:$src, GPR:$rhs),
(CMPrr GPR:$src, GPR:$rhs)>;
-def : ARMPat<(ARMcmpZ GPR:$src, so_reg:$rhs),
- (CMPrs GPR:$src, so_reg:$rhs)>;
+def : ARMPat<(ARMcmpZ GPR:$src, so_reg_imm:$rhs),
+ (CMPrsi GPR:$src, so_reg_imm:$rhs)>;
+def : ARMPat<(ARMcmpZ GPR:$src, so_reg_reg:$rhs),
+ (CMPrsr GPR:$src, so_reg_reg:$rhs)>;
// FIXME: We have to be careful when using the CMN instruction and comparison
// with 0. One would expect these two pieces of code should give identical
@@ -3250,15 +3956,23 @@ def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p),
4, IIC_iCMOVr,
[/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
RegConstraint<"$false = $Rd">;
-def MOVCCs : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, so_reg:$shift, pred:$p),
+def MOVCCsi : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, so_reg_imm:$shift, pred:$p),
+ 4, IIC_iCMOVsr,
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_imm:$shift,
+ imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">;
+def MOVCCsr : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, so_reg_reg:$shift, pred:$p),
4, IIC_iCMOVsr,
- [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg:$shift, imm:$cc, CCR:$ccr))*/]>,
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_reg:$shift,
+ imm:$cc, CCR:$ccr))*/]>,
RegConstraint<"$false = $Rd">;
+
let isMoveImm = 1 in
def MOVCCi16 : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, i32imm_hilo16:$imm, pred:$p),
+ (ins GPR:$false, imm0_65535_expr:$imm, pred:$p),
4, IIC_iMOVi,
[]>,
RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>;
@@ -3288,9 +4002,14 @@ def MVNCCi : ARMPseudoInst<(outs GPR:$Rd),
// Atomic operations intrinsics
//
+def MemBarrierOptOperand : AsmOperandClass {
+ let Name = "MemBarrierOpt";
+ let ParserMethod = "parseMemBarrierOptOperand";
+}
def memb_opt : Operand<i32> {
let PrintMethod = "printMemBOption";
let ParserMatchClass = MemBarrierOptOperand;
+ let DecoderMethod = "DecodeMemBarrierOption";
}
// memory barriers protect the atomic sequences
@@ -3321,8 +4040,16 @@ def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
let Inst{3-0} = opt;
}
+// Pseudo isntruction that combines movs + predicated rsbmi
+// to implement integer ABS
+let usesCustomInserter = 1, Defs = [CPSR] in {
+def ABS : ARMPseudoInst<
+ (outs GPR:$dst), (ins GPR:$src),
+ 8, NoItinerary, []>;
+}
+
let usesCustomInserter = 1 in {
- let Uses = [CPSR] in {
+ let Defs = [CPSR] in {
def ATOMIC_LOAD_ADD_I8 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
[(set GPR:$dst, (atomic_load_add_8 GPR:$ptr, GPR:$incr))]>;
@@ -3437,44 +4164,47 @@ let usesCustomInserter = 1 in {
}
let mayLoad = 1 in {
-def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary,
+def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary,
"ldrexb", "\t$Rt, $addr", []>;
-def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary,
- "ldrexh", "\t$Rt, $addr", []>;
-def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary,
- "ldrex", "\t$Rt, $addr", []>;
+def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldrexh", "\t$Rt, $addr", []>;
+def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldrex", "\t$Rt, $addr", []>;
let hasExtraDefRegAllocReq = 1 in
- def LDREXD : AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2), (ins addrmode7:$addr),
- NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", []>;
+def LDREXD: AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2),(ins addr_offset_none:$addr),
+ NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", []> {
+ let DecoderMethod = "DecodeDoubleRegLoad";
+}
}
let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
-def STREXB : AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr),
+def STREXB: AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
NoItinerary, "strexb", "\t$Rd, $Rt, $addr", []>;
-def STREXH : AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr),
+def STREXH: AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
NoItinerary, "strexh", "\t$Rd, $Rt, $addr", []>;
-def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr),
+def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>;
}
let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in
def STREXD : AIstrex<0b01, (outs GPR:$Rd),
- (ins GPR:$Rt, GPR:$Rt2, addrmode7:$addr),
- NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []>;
+ (ins GPR:$Rt, GPR:$Rt2, addr_offset_none:$addr),
+ NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []> {
+ let DecoderMethod = "DecodeDoubleRegStore";
+}
-// Clear-Exclusive is for disassembly only.
-def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex",
- [/* For disassembly only; pattern left blank */]>,
+def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", []>,
Requires<[IsARM, HasV7]> {
let Inst{31-0} = 0b11110101011111111111000000011111;
}
-// SWP/SWPB are deprecated in V6/V7 and for disassembly only.
-let mayLoad = 1 in {
-def SWP : AIswp<0, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swp",
- [/* For disassembly only; pattern left blank */]>;
-def SWPB : AIswp<1, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swpb",
- [/* For disassembly only; pattern left blank */]>;
+// SWP/SWPB are deprecated in V6/V7.
+let mayLoad = 1, mayStore = 1 in {
+def SWP : AIswp<0, (outs GPR:$Rt), (ins GPR:$Rt2, addr_offset_none:$addr),
+ "swp", []>;
+def SWPB: AIswp<1, (outs GPR:$Rt), (ins GPR:$Rt2, addr_offset_none:$addr),
+ "swpb", []>;
}
//===----------------------------------------------------------------------===//
@@ -3526,108 +4256,171 @@ def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
class ACI<dag oops, dag iops, string opc, string asm,
IndexMode im = IndexModeNone>
+ : I<oops, iops, AddrModeNone, 4, im, BrFrm, NoItinerary,
+ opc, asm, "", []> {
+ let Inst{27-25} = 0b110;
+}
+class ACInoP<dag oops, dag iops, string opc, string asm,
+ IndexMode im = IndexModeNone>
: InoP<oops, iops, AddrModeNone, 4, im, BrFrm, NoItinerary,
- opc, asm, "", [/* For disassembly only; pattern left blank */]> {
+ opc, asm, "", []> {
+ let Inst{31-28} = 0b1111;
let Inst{27-25} = 0b110;
}
-
-multiclass LdStCop<bits<4> op31_28, bit load, dag ops, string opc, string cond>{
-
- def _OFFSET : ACI<(outs),
- !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
- !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr"> {
- let Inst{31-28} = op31_28;
+multiclass LdStCop<bit load, bit Dbit, string asm> {
+ def _OFFSET : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ asm, "\t$cop, $CRd, $addr"> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
let Inst{21} = 0; // W = 0
- let Inst{22} = 0; // D = 0
let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
}
-
- def _PRE : ACI<(outs),
- !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
- !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr!", IndexModePre> {
- let Inst{31-28} = op31_28;
+ def _PRE : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ asm, "\t$cop, $CRd, $addr!", IndexModePre> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
let Inst{21} = 1; // W = 1
- let Inst{22} = 0; // D = 0
let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
}
-
- def _POST : ACI<(outs),
- !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
- !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr", IndexModePost> {
- let Inst{31-28} = op31_28;
+ def _POST: ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ postidx_imm8s4:$offset),
+ asm, "\t$cop, $CRd, $addr, $offset", IndexModePost> {
+ bits<9> offset;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
let Inst{24} = 0; // P = 0
+ let Inst{23} = offset{8};
+ let Inst{22} = Dbit;
let Inst{21} = 1; // W = 1
- let Inst{22} = 0; // D = 0
let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = offset{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
}
-
def _OPTION : ACI<(outs),
- !con((ins nohash_imm:$cop,nohash_imm:$CRd,GPR:$base, nohash_imm:$option),
- ops),
- !strconcat(opc, cond), "\tp$cop, cr$CRd, [$base], \\{$option\\}"> {
- let Inst{31-28} = op31_28;
+ (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ coproc_option_imm:$option),
+ asm, "\t$cop, $CRd, $addr, $option"> {
+ bits<8> option;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
let Inst{24} = 0; // P = 0
let Inst{23} = 1; // U = 1
+ let Inst{22} = Dbit;
let Inst{21} = 0; // W = 0
- let Inst{22} = 0; // D = 0
let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = option;
+ let DecoderMethod = "DecodeCopMemInstruction";
}
-
- def L_OFFSET : ACI<(outs),
- !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
- !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr"> {
- let Inst{31-28} = op31_28;
+}
+multiclass LdSt2Cop<bit load, bit Dbit, string asm> {
+ def _OFFSET : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ asm, "\t$cop, $CRd, $addr"> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
let Inst{21} = 0; // W = 0
- let Inst{22} = 1; // D = 1
let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
}
-
- def L_PRE : ACI<(outs),
- !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
- !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr!",
- IndexModePre> {
- let Inst{31-28} = op31_28;
+ def _PRE : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ asm, "\t$cop, $CRd, $addr!", IndexModePre> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
let Inst{21} = 1; // W = 1
- let Inst{22} = 1; // D = 1
let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
}
-
- def L_POST : ACI<(outs),
- !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
- !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr",
- IndexModePost> {
- let Inst{31-28} = op31_28;
+ def _POST: ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ postidx_imm8s4:$offset),
+ asm, "\t$cop, $CRd, $addr, $offset", IndexModePost> {
+ bits<9> offset;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
let Inst{24} = 0; // P = 0
+ let Inst{23} = offset{8};
+ let Inst{22} = Dbit;
let Inst{21} = 1; // W = 1
- let Inst{22} = 1; // D = 1
let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = offset{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
}
-
- def L_OPTION : ACI<(outs),
- !con((ins nohash_imm:$cop, nohash_imm:$CRd,GPR:$base,nohash_imm:$option),
- ops),
- !strconcat(!strconcat(opc, "l"), cond),
- "\tp$cop, cr$CRd, [$base], \\{$option\\}"> {
- let Inst{31-28} = op31_28;
+ def _OPTION : ACInoP<(outs),
+ (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ coproc_option_imm:$option),
+ asm, "\t$cop, $CRd, $addr, $option"> {
+ bits<8> option;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
let Inst{24} = 0; // P = 0
let Inst{23} = 1; // U = 1
+ let Inst{22} = Dbit;
let Inst{21} = 0; // W = 0
- let Inst{22} = 1; // D = 1
let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = option;
+ let DecoderMethod = "DecodeCopMemInstruction";
}
}
-defm LDC : LdStCop<{?,?,?,?}, 1, (ins pred:$p), "ldc", "${p}">;
-defm LDC2 : LdStCop<0b1111, 1, (ins), "ldc2", "">;
-defm STC : LdStCop<{?,?,?,?}, 0, (ins pred:$p), "stc", "${p}">;
-defm STC2 : LdStCop<0b1111, 0, (ins), "stc2", "">;
+defm LDC : LdStCop <1, 0, "ldc">;
+defm LDCL : LdStCop <1, 1, "ldcl">;
+defm STC : LdStCop <0, 0, "stc">;
+defm STCL : LdStCop <0, 1, "stcl">;
+defm LDC2 : LdSt2Cop<1, 0, "ldc2">;
+defm LDC2L : LdSt2Cop<1, 1, "ldc2l">;
+defm STC2 : LdSt2Cop<0, 0, "stc2">;
+defm STC2L : LdSt2Cop<0, 1, "stc2l">;
//===----------------------------------------------------------------------===//
-// Move between coprocessor and ARM core register -- for disassembly only
+// Move between coprocessor and ARM core register.
//
class MovRCopro<string opc, bit direction, dag oops, dag iops,
@@ -3660,8 +4453,8 @@ def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
imm:$CRm, imm:$opc2)]>;
def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */,
(outs GPR:$Rt),
- (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, c_imm:$CRm,
- i32imm:$opc2), []>;
+ (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
+ imm0_7:$opc2), []>;
def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
(MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
@@ -3697,15 +4490,14 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
imm:$CRm, imm:$opc2)]>;
def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */,
(outs GPR:$Rt),
- (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, c_imm:$CRm,
- i32imm:$opc2), []>;
+ (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
+ imm0_7:$opc2), []>;
def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn,
imm:$CRm, imm:$opc2),
(MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
-class MovRRCopro<string opc, bit direction,
- list<dag> pattern = [/* For disassembly only */]>
+class MovRRCopro<string opc, bit direction, list<dag> pattern = []>
: ABI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1,
GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> {
@@ -3730,8 +4522,7 @@ def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */,
imm:$CRm)]>;
def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>;
-class MovRRCopro2<string opc, bit direction,
- list<dag> pattern = [/* For disassembly only */]>
+class MovRRCopro2<string opc, bit direction, list<dag> pattern = []>
: ABXI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1,
GPR:$Rt, GPR:$Rt2, c_imm:$CRm), NoItinerary,
!strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> {
@@ -3758,20 +4549,22 @@ def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */,
def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */>;
//===----------------------------------------------------------------------===//
-// Move between special register and ARM core register -- for disassembly only
+// Move between special register and ARM core register
//
// Move to ARM core register from Special Register
-def MRS : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, cpsr",
- [/* For disassembly only; pattern left blank */]> {
+def MRS : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary,
+ "mrs", "\t$Rd, apsr", []> {
bits<4> Rd;
let Inst{23-16} = 0b00001111;
let Inst{15-12} = Rd;
let Inst{7-4} = 0b0000;
}
-def MRSsys : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary,"mrs","\t$Rd, spsr",
- [/* For disassembly only; pattern left blank */]> {
+def : InstAlias<"mrs${p} $Rd, cpsr", (MRS GPR:$Rd, pred:$p)>, Requires<[IsARM]>;
+
+def MRSsys : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary,
+ "mrs", "\t$Rd, spsr", []> {
bits<4> Rd;
let Inst{23-16} = 0b01001111;
let Inst{15-12} = Rd;
@@ -3785,8 +4578,7 @@ def MRSsys : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary,"mrs","\t$Rd, spsr",
// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains
// the mask with the fields to be accessed in the special register.
def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary,
- "msr", "\t$mask, $Rn",
- [/* For disassembly only; pattern left blank */]> {
+ "msr", "\t$mask, $Rn", []> {
bits<5> mask;
bits<4> Rn;
@@ -3800,8 +4592,7 @@ def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary,
}
def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary,
- "msr", "\t$mask, $a",
- [/* For disassembly only; pattern left blank */]> {
+ "msr", "\t$mask, $a", []> {
bits<5> mask;
bits<12> a;
@@ -4030,6 +4821,47 @@ def : ARMV5TEPat<(add GPR:$acc,
def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>,
Requires<[IsARM, HasV6]>;
+// SXT/UXT with no rotate
+let AddedComplexity = 16 in {
+def : ARMV6Pat<(and GPR:$Src, 0x000000FF), (UXTB GPR:$Src, 0)>;
+def : ARMV6Pat<(and GPR:$Src, 0x0000FFFF), (UXTH GPR:$Src, 0)>;
+def : ARMV6Pat<(and GPR:$Src, 0x00FF00FF), (UXTB16 GPR:$Src, 0)>;
+def : ARMV6Pat<(add GPR:$Rn, (and GPR:$Rm, 0x00FF)),
+ (UXTAB GPR:$Rn, GPR:$Rm, 0)>;
+def : ARMV6Pat<(add GPR:$Rn, (and GPR:$Rm, 0xFFFF)),
+ (UXTAH GPR:$Rn, GPR:$Rm, 0)>;
+}
+
+def : ARMV6Pat<(sext_inreg GPR:$Src, i8), (SXTB GPR:$Src, 0)>;
+def : ARMV6Pat<(sext_inreg GPR:$Src, i16), (SXTH GPR:$Src, 0)>;
+
+def : ARMV6Pat<(add GPR:$Rn, (sext_inreg GPRnopc:$Rm, i8)),
+ (SXTAB GPR:$Rn, GPRnopc:$Rm, 0)>;
+def : ARMV6Pat<(add GPR:$Rn, (sext_inreg GPRnopc:$Rm, i16)),
+ (SXTAH GPR:$Rn, GPRnopc:$Rm, 0)>;
+
+// Atomic load/store patterns
+def : ARMPat<(atomic_load_8 ldst_so_reg:$src),
+ (LDRBrs ldst_so_reg:$src)>;
+def : ARMPat<(atomic_load_8 addrmode_imm12:$src),
+ (LDRBi12 addrmode_imm12:$src)>;
+def : ARMPat<(atomic_load_16 addrmode3:$src),
+ (LDRH addrmode3:$src)>;
+def : ARMPat<(atomic_load_32 ldst_so_reg:$src),
+ (LDRrs ldst_so_reg:$src)>;
+def : ARMPat<(atomic_load_32 addrmode_imm12:$src),
+ (LDRi12 addrmode_imm12:$src)>;
+def : ARMPat<(atomic_store_8 ldst_so_reg:$ptr, GPR:$val),
+ (STRBrs GPR:$val, ldst_so_reg:$ptr)>;
+def : ARMPat<(atomic_store_8 addrmode_imm12:$ptr, GPR:$val),
+ (STRBi12 GPR:$val, addrmode_imm12:$ptr)>;
+def : ARMPat<(atomic_store_16 addrmode3:$ptr, GPR:$val),
+ (STRH GPR:$val, addrmode3:$ptr)>;
+def : ARMPat<(atomic_store_32 ldst_so_reg:$ptr, GPR:$val),
+ (STRrs GPR:$val, ldst_so_reg:$ptr)>;
+def : ARMPat<(atomic_store_32 addrmode_imm12:$ptr, GPR:$val),
+ (STRi12 GPR:$val, addrmode_imm12:$ptr)>;
+
//===----------------------------------------------------------------------===//
// Thumb Support
@@ -4070,7 +4902,103 @@ def : MnemonicAlias<"swi", "svc">;
// Load / Store Multiple
def : MnemonicAlias<"ldmfd", "ldm">;
def : MnemonicAlias<"ldmia", "ldm">;
+def : MnemonicAlias<"ldmea", "ldmdb">;
def : MnemonicAlias<"stmfd", "stmdb">;
def : MnemonicAlias<"stmia", "stm">;
def : MnemonicAlias<"stmea", "stm">;
+// PKHBT/PKHTB with default shift amount. PKHTB is equivalent to PKHBT when the
+// shift amount is zero (i.e., unspecified).
+def : InstAlias<"pkhbt${p} $Rd, $Rn, $Rm",
+ (PKHBT GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, 0, pred:$p)>,
+ Requires<[IsARM, HasV6]>;
+def : InstAlias<"pkhtb${p} $Rd, $Rn, $Rm",
+ (PKHBT GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, 0, pred:$p)>,
+ Requires<[IsARM, HasV6]>;
+
+// PUSH/POP aliases for STM/LDM
+def : ARMInstAlias<"push${p} $regs", (STMDB_UPD SP, pred:$p, reglist:$regs)>;
+def : ARMInstAlias<"pop${p} $regs", (LDMIA_UPD SP, pred:$p, reglist:$regs)>;
+
+// SSAT/USAT optional shift operand.
+def : ARMInstAlias<"ssat${p} $Rd, $sat_imm, $Rn",
+ (SSAT GPRnopc:$Rd, imm1_32:$sat_imm, GPRnopc:$Rn, 0, pred:$p)>;
+def : ARMInstAlias<"usat${p} $Rd, $sat_imm, $Rn",
+ (USAT GPRnopc:$Rd, imm0_31:$sat_imm, GPRnopc:$Rn, 0, pred:$p)>;
+
+
+// Extend instruction optional rotate operand.
+def : ARMInstAlias<"sxtab${p} $Rd, $Rn, $Rm",
+ (SXTAB GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"sxtah${p} $Rd, $Rn, $Rm",
+ (SXTAH GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"sxtab16${p} $Rd, $Rn, $Rm",
+ (SXTAB16 GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"sxtb${p} $Rd, $Rm",
+ (SXTB GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"sxtb16${p} $Rd, $Rm",
+ (SXTB16 GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"sxth${p} $Rd, $Rm",
+ (SXTH GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+
+def : ARMInstAlias<"uxtab${p} $Rd, $Rn, $Rm",
+ (UXTAB GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"uxtah${p} $Rd, $Rn, $Rm",
+ (UXTAH GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"uxtab16${p} $Rd, $Rn, $Rm",
+ (UXTAB16 GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"uxtb${p} $Rd, $Rm",
+ (UXTB GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"uxtb16${p} $Rd, $Rm",
+ (UXTB16 GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+def : ARMInstAlias<"uxth${p} $Rd, $Rm",
+ (UXTH GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>;
+
+
+// RFE aliases
+def : MnemonicAlias<"rfefa", "rfeda">;
+def : MnemonicAlias<"rfeea", "rfedb">;
+def : MnemonicAlias<"rfefd", "rfeia">;
+def : MnemonicAlias<"rfeed", "rfeib">;
+def : MnemonicAlias<"rfe", "rfeia">;
+
+// SRS aliases
+def : MnemonicAlias<"srsfa", "srsda">;
+def : MnemonicAlias<"srsea", "srsdb">;
+def : MnemonicAlias<"srsfd", "srsia">;
+def : MnemonicAlias<"srsed", "srsib">;
+def : MnemonicAlias<"srs", "srsia">;
+
+// QSAX == QSUBADDX
+def : MnemonicAlias<"qsubaddx", "qsax">;
+// SASX == SADDSUBX
+def : MnemonicAlias<"saddsubx", "sasx">;
+// SHASX == SHADDSUBX
+def : MnemonicAlias<"shaddsubx", "shasx">;
+// SHSAX == SHSUBADDX
+def : MnemonicAlias<"shsubaddx", "shsax">;
+// SSAX == SSUBADDX
+def : MnemonicAlias<"ssubaddx", "ssax">;
+// UASX == UADDSUBX
+def : MnemonicAlias<"uaddsubx", "uasx">;
+// UHASX == UHADDSUBX
+def : MnemonicAlias<"uhaddsubx", "uhasx">;
+// UHSAX == UHSUBADDX
+def : MnemonicAlias<"uhsubaddx", "uhsax">;
+// UQASX == UQADDSUBX
+def : MnemonicAlias<"uqaddsubx", "uqasx">;
+// UQSAX == UQSUBADDX
+def : MnemonicAlias<"uqsubaddx", "uqsax">;
+// USAX == USUBADDX
+def : MnemonicAlias<"usubaddx", "usax">;
+
+// LDRSBT/LDRHT/LDRSHT post-index offset if optional.
+// Note that the write-back output register is a dummy operand for MC (it's
+// only meaningful for codegen), so we just pass zero here.
+// FIXME: tblgen not cooperating with argument conversions.
+//def : InstAlias<"ldrsbt${p} $Rt, $addr",
+// (LDRSBTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0,pred:$p)>;
+//def : InstAlias<"ldrht${p} $Rt, $addr",
+// (LDRHTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0, pred:$p)>;
+//def : InstAlias<"ldrsht${p} $Rt, $addr",
+// (LDRSHTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0, pred:$p)>;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 0df62f4..7aad186 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -11,6 +11,35 @@
//
//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// NEON-specific Operands.
+//===----------------------------------------------------------------------===//
+def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; }
+def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; }
+def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; }
+def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{
+ return ((uint64_t)Imm) < 8;
+}]> {
+ let ParserMatchClass = VectorIndex8Operand;
+ let PrintMethod = "printVectorIndex";
+ let MIOperandInfo = (ops i32imm);
+}
+def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{
+ return ((uint64_t)Imm) < 4;
+}]> {
+ let ParserMatchClass = VectorIndex16Operand;
+ let PrintMethod = "printVectorIndex";
+ let MIOperandInfo = (ops i32imm);
+}
+def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
+ return ((uint64_t)Imm) < 2;
+}]> {
+ let ParserMatchClass = VectorIndex32Operand;
+ let PrintMethod = "printVectorIndex";
+ let MIOperandInfo = (ops i32imm);
+}
+
//===----------------------------------------------------------------------===//
// NEON-specific DAG Nodes.
//===----------------------------------------------------------------------===//
@@ -175,7 +204,8 @@ class VLDQQWBPseudo<InstrItinClass itin>
(ins addrmode6:$addr, am6offset:$offset), itin,
"$addr.addr = $wb">;
class VLDQQQQPseudo<InstrItinClass itin>
- : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,"">;
+ : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
+ "$src = $dst">;
class VLDQQQQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
@@ -190,6 +220,7 @@ class VLD1D<bits<4> op7_4, string Dt>
"vld1", Dt, "\\{$Vd\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
class VLD1Q<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2),
@@ -197,6 +228,7 @@ class VLD1Q<bits<4> op7_4, string Dt>
"vld1", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
def VLD1d8 : VLD1D<{0,0,0,?}, "8">;
@@ -221,6 +253,7 @@ class VLD1DWB<bits<4> op7_4, string Dt>
"vld1", Dt, "\\{$Vd\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
class VLD1QWB<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
@@ -228,6 +261,7 @@ class VLD1QWB<bits<4> op7_4, string Dt>
"vld1", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
def VLD1d8_UPD : VLD1DWB<{0,0,0,?}, "8">;
@@ -252,12 +286,14 @@ class VLD1D3<bits<4> op7_4, string Dt>
"\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
class VLD1D3WB<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x3u, "vld1", Dt,
"\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
def VLD1d8T : VLD1D3<{0,0,0,?}, "8">;
@@ -280,6 +316,7 @@ class VLD1D4<bits<4> op7_4, string Dt>
"\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
class VLD1D4WB<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0010,op7_4,
@@ -288,6 +325,7 @@ class VLD1D4WB<bits<4> op7_4, string Dt>
"\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb",
[]> {
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">;
@@ -310,6 +348,7 @@ class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt>
"vld2", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
class VLD2Q<bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, 0b0011, op7_4,
@@ -318,6 +357,7 @@ class VLD2Q<bits<4> op7_4, string Dt>
"vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
def VLD2d8 : VLD2D<0b1000, {0,0,?,?}, "8">;
@@ -343,6 +383,7 @@ class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
"vld2", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
class VLD2QWB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, 0b0011, op7_4,
@@ -351,6 +392,7 @@ class VLD2QWB<bits<4> op7_4, string Dt>
"vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
def VLD2d8_UPD : VLD2DWB<0b1000, {0,0,?,?}, "8">;
@@ -384,6 +426,7 @@ class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
"vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">;
@@ -402,6 +445,7 @@ class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
"vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">;
@@ -441,6 +485,7 @@ class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
"vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">;
@@ -459,6 +504,7 @@ class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
"vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
}
def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">;
@@ -530,6 +576,7 @@ class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
(i32 (LoadOp addrmode6:$Rn)),
imm:$lane))]> {
let Rm = 0b1111;
+ let DecoderMethod = "DecodeVLD1LN";
}
class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
PatFrag LoadOp>
@@ -541,6 +588,7 @@ class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
(i32 (LoadOp addrmode6oneL32:$Rn)),
imm:$lane))]> {
let Rm = 0b1111;
+ let DecoderMethod = "DecodeVLD1LN";
}
class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> {
let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
@@ -580,7 +628,9 @@ class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
"\\{$Vd[$lane]\\}, $Rn$Rm",
- "$src = $Vd, $Rn.addr = $wb", []>;
+ "$src = $Vd, $Rn.addr = $wb", []> {
+ let DecoderMethod = "DecodeVLD1LN";
+}
def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> {
let Inst{7-5} = lane{2-0};
@@ -607,6 +657,7 @@ class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
"$src1 = $Vd, $src2 = $dst2", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD2LN";
}
def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> {
@@ -642,6 +693,7 @@ class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
"\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
"$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD2LN";
}
def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> {
@@ -676,6 +728,7 @@ class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> {
let Rm = 0b1111;
+ let DecoderMethod = "DecodeVLD3LN";
}
def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> {
@@ -712,7 +765,9 @@ class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
IIC_VLD3lnu, "vld3", Dt,
"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
- []>;
+ []> {
+ let DecoderMethod = "DecodeVLD3LN";
+}
def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> {
let Inst{7-5} = lane{2-0};
@@ -748,6 +803,7 @@ class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD4LN";
}
def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> {
@@ -788,6 +844,7 @@ class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
[]> {
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD4LN" ;
}
def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> {
@@ -825,6 +882,7 @@ class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
[(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
}
class VLD1QDUPPseudo<ValueType Ty, PatFrag LoadOp> : VLDQPseudo<IIC_VLD1dup> {
let Pattern = [(set QPR:$dst,
@@ -852,6 +910,7 @@ class VLD1QDUP<bits<4> op7_4, string Dt>
"vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
}
def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8">;
@@ -864,12 +923,14 @@ class VLD1DUPWB<bits<4> op7_4, string Dt>
(ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
"vld1", Dt, "\\{$Vd[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
}
class VLD1QDUPWB<bits<4> op7_4, string Dt>
: NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
(ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
"vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
}
def VLD1DUPd8_UPD : VLD1DUPWB<{0,0,0,0}, "8">;
@@ -891,6 +952,7 @@ class VLD2DUP<bits<4> op7_4, string Dt>
"vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD2DupInstruction";
}
def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8">;
@@ -912,6 +974,7 @@ class VLD2DUPWB<bits<4> op7_4, string Dt>
(ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD2dupu,
"vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD2DupInstruction";
}
def VLD2DUPd8_UPD : VLD2DUPWB<{0,0,0,0}, "8">;
@@ -932,7 +995,8 @@ class VLD3DUP<bits<4> op7_4, string Dt>
(ins addrmode6dup:$Rn), IIC_VLD3dup,
"vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> {
let Rm = 0b1111;
- let Inst{4} = Rn{4};
+ let Inst{4} = 0;
+ let DecoderMethod = "DecodeVLD3DupInstruction";
}
def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">;
@@ -954,7 +1018,8 @@ class VLD3DUPWB<bits<4> op7_4, string Dt>
(ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu,
"vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
- let Inst{4} = Rn{4};
+ let Inst{4} = 0;
+ let DecoderMethod = "DecodeVLD3DupInstruction";
}
def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">;
@@ -977,6 +1042,7 @@ class VLD4DUP<bits<4> op7_4, string Dt>
"vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD4DupInstruction";
}
def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">;
@@ -1000,6 +1066,7 @@ class VLD4DUPWB<bits<4> op7_4, string Dt>
"vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD4DupInstruction";
}
def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">;
@@ -1045,6 +1112,7 @@ class VST1D<bits<4> op7_4, string Dt>
IIC_VST1, "vst1", Dt, "\\{$Vd\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
class VST1Q<bits<4> op7_4, string Dt>
: NLdSt<0,0b00,0b1010,op7_4, (outs),
@@ -1052,6 +1120,7 @@ class VST1Q<bits<4> op7_4, string Dt>
"vst1", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
def VST1d8 : VST1D<{0,0,0,?}, "8">;
@@ -1075,6 +1144,7 @@ class VST1DWB<bits<4> op7_4, string Dt>
(ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd), IIC_VST1u,
"vst1", Dt, "\\{$Vd\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
class VST1QWB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb),
@@ -1082,6 +1152,7 @@ class VST1QWB<bits<4> op7_4, string Dt>
IIC_VST1x2u, "vst1", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
def VST1d8_UPD : VST1DWB<{0,0,0,?}, "8">;
@@ -1106,6 +1177,7 @@ class VST1D3<bits<4> op7_4, string Dt>
IIC_VST1x3, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
class VST1D3WB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb),
@@ -1114,6 +1186,7 @@ class VST1D3WB<bits<4> op7_4, string Dt>
IIC_VST1x3u, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
def VST1d8T : VST1D3<{0,0,0,?}, "8">;
@@ -1137,6 +1210,7 @@ class VST1D4<bits<4> op7_4, string Dt>
[]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
class VST1D4WB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb),
@@ -1145,6 +1219,7 @@ class VST1D4WB<bits<4> op7_4, string Dt>
"vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
def VST1d8Q : VST1D4<{0,0,?,?}, "8">;
@@ -1167,6 +1242,7 @@ class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
IIC_VST2, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
class VST2Q<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0011, op7_4, (outs),
@@ -1175,6 +1251,7 @@ class VST2Q<bits<4> op7_4, string Dt>
"", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
def VST2d8 : VST2D<0b1000, {0,0,?,?}, "8">;
@@ -1200,6 +1277,7 @@ class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
IIC_VST2u, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
class VST2QWB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
@@ -1208,6 +1286,7 @@ class VST2QWB<bits<4> op7_4, string Dt>
"vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
def VST2d8_UPD : VST2DWB<0b1000, {0,0,?,?}, "8">;
@@ -1241,6 +1320,7 @@ class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
"vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">;
@@ -1259,6 +1339,7 @@ class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
"vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">;
@@ -1298,6 +1379,7 @@ class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
"", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">;
@@ -1316,6 +1398,7 @@ class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
"vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
}
def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">;
@@ -1381,6 +1464,7 @@ class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
[(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> {
let Rm = 0b1111;
+ let DecoderMethod = "DecodeVST1LN";
}
class VST1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
PatFrag StoreOp, SDNode ExtractOp>
@@ -1389,6 +1473,7 @@ class VST1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
[(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6oneL32:$Rn)]>{
let Rm = 0b1111;
+ let DecoderMethod = "DecodeVST1LN";
}
class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
: VSTQLNPseudo<IIC_VST1ln> {
@@ -1429,7 +1514,9 @@ class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
"\\{$Vd[$lane]\\}, $Rn$Rm",
"$Rn.addr = $wb",
[(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
- addrmode6:$Rn, am6offset:$Rm))]>;
+ addrmode6:$Rn, am6offset:$Rm))]> {
+ let DecoderMethod = "DecodeVST1LN";
+}
class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
: VSTQLNWBPseudo<IIC_VST1lnu> {
let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
@@ -1465,6 +1552,7 @@ class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
"", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVST2LN";
}
def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> {
@@ -1502,6 +1590,7 @@ class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
"\\{$src1[$lane], $src2[$lane]\\}, $addr$offset",
"$addr.addr = $wb", []> {
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVST2LN";
}
def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> {
@@ -1535,6 +1624,7 @@ class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
"\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> {
let Rm = 0b1111;
+ let DecoderMethod = "DecodeVST3LN";
}
def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> {
@@ -1569,7 +1659,9 @@ class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
IIC_VST3lnu, "vst3", Dt,
"\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
- "$Rn.addr = $wb", []>;
+ "$Rn.addr = $wb", []> {
+ let DecoderMethod = "DecodeVST3LN";
+}
def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> {
let Inst{7-5} = lane{2-0};
@@ -1604,6 +1696,7 @@ class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
"", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVST4LN";
}
def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> {
@@ -1642,6 +1735,7 @@ class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
"\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVST4LN";
}
def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> {
@@ -4039,6 +4133,7 @@ class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
: N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
ResTy, OpTy, OpNode> {
let Inst{21-16} = op21_16;
+ let DecoderMethod = "DecodeVSHLMaxInstruction";
}
def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
v8i16, v8i8, NEONvshlli>;
@@ -4219,16 +4314,6 @@ def : InstAlias<"vmov${p} $Vd, $Vm",
def : InstAlias<"vmov${p} $Vd, $Vm",
(VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
-let neverHasSideEffects = 1 in {
-// Pseudo vector move instructions for QQ and QQQQ registers. This should
-// be expanded after register allocation is completed.
-def VMOVQQ : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src),
- NoItinerary, []>;
-
-def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src),
- NoItinerary, []>;
-} // neverHasSideEffects
-
// VMOV : Vector Move (Immediate)
let isReMaterializable = 1 in {
@@ -4462,36 +4547,42 @@ def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>;
// VDUP : Vector Duplicate Lane (from scalar to all elements)
class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
- ValueType Ty>
- : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, nohash_imm:$lane),
- IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm[$lane]",
+ ValueType Ty, Operand IdxTy>
+ : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
+ IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane",
[(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>;
class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy>
- : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, nohash_imm:$lane),
- IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm[$lane]",
+ ValueType ResTy, ValueType OpTy, Operand IdxTy>
+ : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
+ IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane",
[(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm),
- imm:$lane)))]>;
+ VectorIndex32:$lane)))]>;
// Inst{19-16} is partially specified depending on the element size.
-def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8> {
+def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> {
+ bits<3> lane;
let Inst{19-17} = lane{2-0};
}
-def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16> {
+def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> {
+ bits<2> lane;
let Inst{19-18} = lane{1-0};
}
-def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32> {
+def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> {
+ bits<1> lane;
let Inst{19} = lane{0};
}
-def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8> {
+def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> {
+ bits<3> lane;
let Inst{19-17} = lane{2-0};
}
-def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16> {
+def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> {
+ bits<2> lane;
let Inst{19-18} = lane{1-0};
}
-def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32> {
+def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> {
+ bits<1> lane;
let Inst{19} = lane{0};
}
@@ -4753,6 +4844,7 @@ def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
// Vector Table Lookup and Table Extension.
// VTBL : Vector Table Lookup
+let DecoderMethod = "DecodeTBLInstruction" in {
def VTBL1
: N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
(ins DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
@@ -4815,6 +4907,7 @@ def VTBX3Pseudo
def VTBX4Pseudo
: PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
IIC_VTBX4, "$orig = $dst", []>;
+} // DecoderMethod = "DecodeTBLInstruction"
//===----------------------------------------------------------------------===//
// NEON instructions for single-precision FP math
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index bfe83ec..cedb547 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -19,6 +19,19 @@ def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+def imm_sr_XFORM: SDNodeXForm<imm, [{
+ unsigned Imm = N->getZExtValue();
+ return CurDAG->getTargetConstant((Imm == 32 ? 0 : Imm), MVT::i32);
+}]>;
+def ThumbSRImmAsmOperand: AsmOperandClass { let Name = "ImmThumbSR"; }
+def imm_sr : Operand<i32>, PatLeaf<(imm), [{
+ uint64_t Imm = N->getZExtValue();
+ return Imm > 0 && Imm <= 32;
+}], imm_sr_XFORM> {
+ let PrintMethod = "printThumbSRImm";
+ let ParserMatchClass = ThumbSRImmAsmOperand;
+}
+
def imm_neg_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
}]>;
@@ -30,10 +43,6 @@ def imm0_7_neg : PatLeaf<(i32 imm), [{
return (uint32_t)-N->getZExtValue() < 8;
}], imm_neg_XFORM>;
-def imm0_255_asmoperand : AsmOperandClass { let Name = "Imm0_255"; }
-def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> {
- let ParserMatchClass = imm0_255_asmoperand;
-}
def imm0_255_comp : PatLeaf<(i32 imm), [{
return ~((uint32_t)N->getZExtValue()) < 256;
}]>;
@@ -69,8 +78,17 @@ def t_adrlabel : Operand<i32> {
}
// Scaled 4 immediate.
-def t_imm_s4 : Operand<i32> {
+def t_imm0_1020s4_asmoperand: AsmOperandClass { let Name = "Imm0_1020s4"; }
+def t_imm0_1020s4 : Operand<i32> {
+ let PrintMethod = "printThumbS4ImmOperand";
+ let ParserMatchClass = t_imm0_1020s4_asmoperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+def t_imm0_508s4_asmoperand: AsmOperandClass { let Name = "Imm0_508s4"; }
+def t_imm0_508s4 : Operand<i32> {
let PrintMethod = "printThumbS4ImmOperand";
+ let ParserMatchClass = t_imm0_508s4_asmoperand;
let OperandType = "OPERAND_IMMEDIATE";
}
@@ -79,113 +97,129 @@ def t_imm_s4 : Operand<i32> {
let OperandType = "OPERAND_PCREL" in {
def t_brtarget : Operand<OtherVT> {
let EncoderMethod = "getThumbBRTargetOpValue";
+ let DecoderMethod = "DecodeThumbBROperand";
}
def t_bcctarget : Operand<i32> {
let EncoderMethod = "getThumbBCCTargetOpValue";
+ let DecoderMethod = "DecodeThumbBCCTargetOperand";
}
def t_cbtarget : Operand<i32> {
let EncoderMethod = "getThumbCBTargetOpValue";
+ let DecoderMethod = "DecodeThumbCmpBROperand";
}
def t_bltarget : Operand<i32> {
let EncoderMethod = "getThumbBLTargetOpValue";
+ let DecoderMethod = "DecodeThumbBLTargetOperand";
}
def t_blxtarget : Operand<i32> {
let EncoderMethod = "getThumbBLXTargetOpValue";
+ let DecoderMethod = "DecodeThumbBLXOffset";
}
}
-def MemModeRegThumbAsmOperand : AsmOperandClass {
- let Name = "MemModeRegThumb";
- let SuperClasses = [];
-}
-
-def MemModeImmThumbAsmOperand : AsmOperandClass {
- let Name = "MemModeImmThumb";
- let SuperClasses = [];
-}
-
// t_addrmode_rr := reg + reg
//
+def t_addrmode_rr_asm_operand : AsmOperandClass { let Name = "MemThumbRR"; }
def t_addrmode_rr : Operand<i32>,
ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> {
let EncoderMethod = "getThumbAddrModeRegRegOpValue";
let PrintMethod = "printThumbAddrModeRROperand";
+ let DecoderMethod = "DecodeThumbAddrModeRR";
+ let ParserMatchClass = t_addrmode_rr_asm_operand;
let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
}
// t_addrmode_rrs := reg + reg
//
+// We use separate scaled versions because the Select* functions need
+// to explicitly check for a matching constant and return false here so that
+// the reg+imm forms will match instead. This is a horrible way to do that,
+// as it forces tight coupling between the methods, but it's how selectiondag
+// currently works.
def t_addrmode_rrs1 : Operand<i32>,
ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S1", []> {
let EncoderMethod = "getThumbAddrModeRegRegOpValue";
let PrintMethod = "printThumbAddrModeRROperand";
+ let DecoderMethod = "DecodeThumbAddrModeRR";
+ let ParserMatchClass = t_addrmode_rr_asm_operand;
let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
- let ParserMatchClass = MemModeRegThumbAsmOperand;
}
def t_addrmode_rrs2 : Operand<i32>,
ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S2", []> {
let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeRR";
let PrintMethod = "printThumbAddrModeRROperand";
+ let ParserMatchClass = t_addrmode_rr_asm_operand;
let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
- let ParserMatchClass = MemModeRegThumbAsmOperand;
}
def t_addrmode_rrs4 : Operand<i32>,
ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S4", []> {
let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeRR";
let PrintMethod = "printThumbAddrModeRROperand";
+ let ParserMatchClass = t_addrmode_rr_asm_operand;
let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
- let ParserMatchClass = MemModeRegThumbAsmOperand;
}
// t_addrmode_is4 := reg + imm5 * 4
//
+def t_addrmode_is4_asm_operand : AsmOperandClass { let Name = "MemThumbRIs4"; }
def t_addrmode_is4 : Operand<i32>,
ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S4", []> {
let EncoderMethod = "getAddrModeISOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeIS";
let PrintMethod = "printThumbAddrModeImm5S4Operand";
+ let ParserMatchClass = t_addrmode_is4_asm_operand;
let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
- let ParserMatchClass = MemModeImmThumbAsmOperand;
}
// t_addrmode_is2 := reg + imm5 * 2
//
+def t_addrmode_is2_asm_operand : AsmOperandClass { let Name = "MemThumbRIs2"; }
def t_addrmode_is2 : Operand<i32>,
ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S2", []> {
let EncoderMethod = "getAddrModeISOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeIS";
let PrintMethod = "printThumbAddrModeImm5S2Operand";
+ let ParserMatchClass = t_addrmode_is2_asm_operand;
let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
- let ParserMatchClass = MemModeImmThumbAsmOperand;
}
// t_addrmode_is1 := reg + imm5
//
+def t_addrmode_is1_asm_operand : AsmOperandClass { let Name = "MemThumbRIs1"; }
def t_addrmode_is1 : Operand<i32>,
ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S1", []> {
let EncoderMethod = "getAddrModeISOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeIS";
let PrintMethod = "printThumbAddrModeImm5S1Operand";
+ let ParserMatchClass = t_addrmode_is1_asm_operand;
let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
- let ParserMatchClass = MemModeImmThumbAsmOperand;
}
// t_addrmode_sp := sp + imm8 * 4
//
+// FIXME: This really shouldn't have an explicit SP operand at all. It should
+// be implicit, just like in the instruction encoding itself.
+def t_addrmode_sp_asm_operand : AsmOperandClass { let Name = "MemThumbSPI"; }
def t_addrmode_sp : Operand<i32>,
ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> {
let EncoderMethod = "getAddrModeThumbSPOpValue";
+ let DecoderMethod = "DecodeThumbAddrModeSP";
let PrintMethod = "printThumbAddrModeSPOperand";
+ let ParserMatchClass = t_addrmode_sp_asm_operand;
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
- let ParserMatchClass = MemModeImmThumbAsmOperand;
}
// t_addrmode_pc := <label> => pc + imm8 * 4
//
def t_addrmode_pc : Operand<i32> {
let EncoderMethod = "getAddrModePCOpValue";
- let ParserMatchClass = MemModeImmThumbAsmOperand;
+ let DecoderMethod = "DecodeThumbAddrModePC";
}
//===----------------------------------------------------------------------===//
@@ -207,68 +241,52 @@ def tADJCALLSTACKDOWN :
Requires<[IsThumb, IsThumb1Only]>;
}
-// T1Disassembly - A simple class to make encoding some disassembly patterns
-// easier and less verbose.
-class T1Disassembly<bits<2> op1, bits<8> op2>
+class T1SystemEncoding<bits<8> opc>
: T1Encoding<0b101111> {
- let Inst{9-8} = op1;
- let Inst{7-0} = op2;
+ let Inst{9-8} = 0b11;
+ let Inst{7-0} = opc;
}
-def tNOP : T1pI<(outs), (ins), NoItinerary, "nop", "",
- [/* For disassembly only; pattern left blank */]>,
- T1Disassembly<0b11, 0x00>; // A8.6.110
+def tNOP : T1pI<(outs), (ins), NoItinerary, "nop", "", []>,
+ T1SystemEncoding<0x00>, // A8.6.110
+ Requires<[IsThumb2]>;
-def tYIELD : T1pI<(outs), (ins), NoItinerary, "yield", "",
- [/* For disassembly only; pattern left blank */]>,
- T1Disassembly<0b11, 0x10>; // A8.6.410
+def tYIELD : T1pI<(outs), (ins), NoItinerary, "yield", "", []>,
+ T1SystemEncoding<0x10>; // A8.6.410
-def tWFE : T1pI<(outs), (ins), NoItinerary, "wfe", "",
- [/* For disassembly only; pattern left blank */]>,
- T1Disassembly<0b11, 0x20>; // A8.6.408
+def tWFE : T1pI<(outs), (ins), NoItinerary, "wfe", "", []>,
+ T1SystemEncoding<0x20>; // A8.6.408
-def tWFI : T1pI<(outs), (ins), NoItinerary, "wfi", "",
- [/* For disassembly only; pattern left blank */]>,
- T1Disassembly<0b11, 0x30>; // A8.6.409
+def tWFI : T1pI<(outs), (ins), NoItinerary, "wfi", "", []>,
+ T1SystemEncoding<0x30>; // A8.6.409
-def tSEV : T1pI<(outs), (ins), NoItinerary, "sev", "",
- [/* For disassembly only; pattern left blank */]>,
- T1Disassembly<0b11, 0x40>; // A8.6.157
+def tSEV : T1pI<(outs), (ins), NoItinerary, "sev", "", []>,
+ T1SystemEncoding<0x40>; // A8.6.157
-// The i32imm operand $val can be used by a debugger to store more information
+// The imm operand $val can be used by a debugger to store more information
// about the breakpoint.
-def tBKPT : T1I<(outs), (ins i32imm:$val), NoItinerary, "bkpt\t$val",
- [/* For disassembly only; pattern left blank */]>,
- T1Disassembly<0b10, {?,?,?,?,?,?,?,?}> {
+def tBKPT : T1I<(outs), (ins imm0_255:$val), NoItinerary, "bkpt\t$val",
+ []>,
+ T1Encoding<0b101111> {
+ let Inst{9-8} = 0b10;
// A8.6.22
bits<8> val;
let Inst{7-0} = val;
}
-def tSETENDBE : T1I<(outs), (ins), NoItinerary, "setend\tbe",
- [/* For disassembly only; pattern left blank */]>,
- T1Encoding<0b101101> {
- // A8.6.156
- let Inst{9-5} = 0b10010;
- let Inst{4} = 1;
- let Inst{3} = 1; // Big-Endian
- let Inst{2-0} = 0b000;
-}
-
-def tSETENDLE : T1I<(outs), (ins), NoItinerary, "setend\tle",
- [/* For disassembly only; pattern left blank */]>,
- T1Encoding<0b101101> {
+def tSETEND : T1I<(outs), (ins setend_op:$end), NoItinerary, "setend\t$end",
+ []>, T1Encoding<0b101101> {
+ bits<1> end;
// A8.6.156
let Inst{9-5} = 0b10010;
let Inst{4} = 1;
- let Inst{3} = 0; // Little-Endian
+ let Inst{3} = end;
let Inst{2-0} = 0b000;
}
// Change Processor State is a system instruction -- for disassembly only.
def tCPS : T1I<(outs), (ins imod_op:$imod, iflags_op:$iflags),
- NoItinerary, "cps$imod $iflags",
- [/* For disassembly only; pattern left blank */]>,
+ NoItinerary, "cps$imod $iflags", []>,
T1Misc<0b0110011> {
// A8.6.38 & B6.1.1
bit imod;
@@ -277,6 +295,7 @@ def tCPS : T1I<(outs), (ins imod_op:$imod, iflags_op:$iflags),
let Inst{4} = imod;
let Inst{3} = 0;
let Inst{2-0} = iflags;
+ let DecoderMethod = "DecodeThumbCPS";
}
// For both thumb1 and thumb2.
@@ -290,70 +309,70 @@ def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, "",
let Inst{2-0} = dst;
}
-// PC relative add (ADR).
-def tADDrPCi : T1I<(outs tGPR:$dst), (ins t_imm_s4:$rhs), IIC_iALUi,
- "add\t$dst, pc, $rhs", []>,
- T1Encoding<{1,0,1,0,0,?}> {
- // A6.2 & A8.6.10
- bits<3> dst;
- bits<8> rhs;
- let Inst{10-8} = dst;
- let Inst{7-0} = rhs;
-}
-
// ADD <Rd>, sp, #<imm8>
-// This is rematerializable, which is particularly useful for taking the
-// address of locals.
-let isReMaterializable = 1 in
-def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, t_imm_s4:$rhs), IIC_iALUi,
- "add\t$dst, $sp, $rhs", []>,
+// FIXME: This should not be marked as having side effects, and it should be
+// rematerializable. Clearing the side effect bit causes miscompilations,
+// probably because the instruction can be moved around.
+def tADDrSPi : T1pI<(outs tGPR:$dst), (ins GPRsp:$sp, t_imm0_1020s4:$imm),
+ IIC_iALUi, "add", "\t$dst, $sp, $imm", []>,
T1Encoding<{1,0,1,0,1,?}> {
// A6.2 & A8.6.8
bits<3> dst;
- bits<8> rhs;
+ bits<8> imm;
let Inst{10-8} = dst;
- let Inst{7-0} = rhs;
+ let Inst{7-0} = imm;
+ let DecoderMethod = "DecodeThumbAddSpecialReg";
}
// ADD sp, sp, #<imm7>
-def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi,
- "add\t$dst, $rhs", []>,
+def tADDspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm),
+ IIC_iALUi, "add", "\t$Rdn, $imm", []>,
T1Misc<{0,0,0,0,0,?,?}> {
// A6.2.5 & A8.6.8
- bits<7> rhs;
- let Inst{6-0} = rhs;
+ bits<7> imm;
+ let Inst{6-0} = imm;
+ let DecoderMethod = "DecodeThumbAddSPImm";
}
// SUB sp, sp, #<imm7>
// FIXME: The encoding and the ASM string don't match up.
-def tSUBspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi,
- "sub\t$dst, $rhs", []>,
+def tSUBspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm),
+ IIC_iALUi, "sub", "\t$Rdn, $imm", []>,
T1Misc<{0,0,0,0,1,?,?}> {
// A6.2.5 & A8.6.214
- bits<7> rhs;
- let Inst{6-0} = rhs;
+ bits<7> imm;
+ let Inst{6-0} = imm;
+ let DecoderMethod = "DecodeThumbAddSPImm";
}
+// Can optionally specify SP as a three operand instruction.
+def : tInstAlias<"add${p} sp, sp, $imm",
+ (tADDspi SP, t_imm0_508s4:$imm, pred:$p)>;
+def : tInstAlias<"sub${p} sp, sp, $imm",
+ (tSUBspi SP, t_imm0_508s4:$imm, pred:$p)>;
+
// ADD <Rm>, sp
-def tADDrSP : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
- "add\t$dst, $rhs", []>,
+def tADDrSP : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPRsp:$sp), IIC_iALUr,
+ "add", "\t$Rdn, $sp, $Rn", []>,
T1Special<{0,0,?,?}> {
// A8.6.9 Encoding T1
- bits<4> dst;
- let Inst{7} = dst{3};
+ bits<4> Rdn;
+ let Inst{7} = Rdn{3};
let Inst{6-3} = 0b1101;
- let Inst{2-0} = dst{2-0};
+ let Inst{2-0} = Rdn{2-0};
+ let DecoderMethod = "DecodeThumbAddSPReg";
}
// ADD sp, <Rm>
-def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
- "add\t$dst, $rhs", []>,
+def tADDspr : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, GPR:$Rm), IIC_iALUr,
+ "add", "\t$Rdn, $Rm", []>,
T1Special<{0,0,?,?}> {
// A8.6.9 Encoding T2
- bits<4> dst;
+ bits<4> Rm;
let Inst{7} = 1;
- let Inst{6-3} = dst;
+ let Inst{6-3} = Rm;
let Inst{2-0} = 0b101;
+ let DecoderMethod = "DecodeThumbAddSPReg";
}
//===----------------------------------------------------------------------===//
@@ -390,11 +409,12 @@ let isCall = 1,
Uses = [SP] in {
// Also used for Thumb2
def tBL : TIx2<0b11110, 0b11, 1,
- (outs), (ins t_bltarget:$func, variable_ops), IIC_Br,
- "bl\t$func",
+ (outs), (ins pred:$p, t_bltarget:$func, variable_ops), IIC_Br,
+ "bl${p}\t$func",
[(ARMtcall tglobaladdr:$func)]>,
Requires<[IsThumb, IsNotDarwin]> {
- bits<21> func;
+ bits<22> func;
+ let Inst{26} = func{21};
let Inst{25-16} = func{20-11};
let Inst{13} = 1;
let Inst{11} = 1;
@@ -403,8 +423,8 @@ let isCall = 1,
// ARMv5T and above, also used for Thumb2
def tBLXi : TIx2<0b11110, 0b11, 0,
- (outs), (ins t_blxtarget:$func, variable_ops), IIC_Br,
- "blx\t$func",
+ (outs), (ins pred:$p, t_blxtarget:$func, variable_ops), IIC_Br,
+ "blx${p}\t$func",
[(ARMcall tglobaladdr:$func)]>,
Requires<[IsThumb, HasV5T, IsNotDarwin]> {
bits<21> func;
@@ -416,8 +436,8 @@ let isCall = 1,
}
// Also used for Thumb2
- def tBLXr : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br,
- "blx\t$func",
+ def tBLXr : TI<(outs), (ins pred:$p, GPR:$func, variable_ops), IIC_Br,
+ "blx${p}\t$func",
[(ARMtcall GPR:$func)]>,
Requires<[IsThumb, HasV5T, IsNotDarwin]>,
T1Special<{1,1,1,?}> { // A6.2.3 & A8.6.24;
@@ -440,43 +460,22 @@ let isCall = 1,
Defs = [R0, R1, R2, R3, R9, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
Uses = [R7, SP] in {
// Also used for Thumb2
- def tBLr9 : TIx2<0b11110, 0b11, 1,
- (outs), (ins pred:$p, t_bltarget:$func, variable_ops),
- IIC_Br, "bl${p}\t$func",
- [(ARMtcall tglobaladdr:$func)]>,
- Requires<[IsThumb, IsDarwin]> {
- bits<21> func;
- let Inst{25-16} = func{20-11};
- let Inst{13} = 1;
- let Inst{11} = 1;
- let Inst{10-0} = func{10-0};
- }
+ def tBLr9 : tPseudoExpand<(outs), (ins pred:$p, t_bltarget:$func, variable_ops),
+ 4, IIC_Br, [(ARMtcall tglobaladdr:$func)],
+ (tBL pred:$p, t_bltarget:$func)>,
+ Requires<[IsThumb, IsDarwin]>;
// ARMv5T and above, also used for Thumb2
- def tBLXi_r9 : TIx2<0b11110, 0b11, 0,
- (outs), (ins pred:$p, t_blxtarget:$func, variable_ops),
- IIC_Br, "blx${p}\t$func",
- [(ARMcall tglobaladdr:$func)]>,
- Requires<[IsThumb, HasV5T, IsDarwin]> {
- bits<21> func;
- let Inst{25-16} = func{20-11};
- let Inst{13} = 1;
- let Inst{11} = 1;
- let Inst{10-1} = func{10-1};
- let Inst{0} = 0; // func{0} is assumed zero
- }
+ def tBLXi_r9 : tPseudoExpand<(outs), (ins pred:$p, t_blxtarget:$func, variable_ops),
+ 4, IIC_Br, [(ARMcall tglobaladdr:$func)],
+ (tBLXi pred:$p, t_blxtarget:$func)>,
+ Requires<[IsThumb, HasV5T, IsDarwin]>;
// Also used for Thumb2
- def tBLXr_r9 : TI<(outs), (ins pred:$p, GPR:$func, variable_ops), IIC_Br,
- "blx${p}\t$func",
- [(ARMtcall GPR:$func)]>,
- Requires<[IsThumb, HasV5T, IsDarwin]>,
- T1Special<{1,1,1,?}> {
- // A6.2.3 & A8.6.24
- bits<4> func;
- let Inst{6-3} = func;
- let Inst{2-0} = 0b000;
- }
+ def tBLXr_r9 : tPseudoExpand<(outs), (ins pred:$p, GPR:$func, variable_ops),
+ 2, IIC_Br, [(ARMtcall GPR:$func)],
+ (tBLXr pred:$p, GPR:$func)>,
+ Requires<[IsThumb, HasV5T, IsDarwin]>;
// ARMv4T
def tBXr9_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops),
@@ -487,8 +486,8 @@ let isCall = 1,
let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
let isPredicable = 1 in
- def tB : T1I<(outs), (ins t_brtarget:$target), IIC_Br,
- "b\t$target", [(br bb:$target)]>,
+ def tB : T1pI<(outs), (ins t_brtarget:$target), IIC_Br,
+ "b", "\t$target", [(br bb:$target)]>,
T1Encoding<{1,1,1,0,0,?}> {
bits<11> target;
let Inst{10-0} = target;
@@ -498,8 +497,8 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
// Just a pseudo for a tBL instruction. Needed to let regalloc know about
// the clobber of LR.
let Defs = [LR] in
- def tBfar : tPseudoExpand<(outs), (ins t_bltarget:$target),
- 4, IIC_Br, [], (tBL t_bltarget:$target)>;
+ def tBfar : tPseudoExpand<(outs), (ins t_bltarget:$target, pred:$p),
+ 4, IIC_Br, [], (tBL pred:$p, t_bltarget:$target)>;
def tBR_JTr : tPseudoInst<(outs),
(ins tGPR:$target, i32imm:$jt, i32imm:$id),
@@ -522,31 +521,6 @@ let isBranch = 1, isTerminator = 1 in
let Inst{7-0} = target;
}
-// Compare and branch on zero / non-zero
-let isBranch = 1, isTerminator = 1 in {
- def tCBZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br,
- "cbz\t$Rn, $target", []>,
- T1Misc<{0,0,?,1,?,?,?}> {
- // A8.6.27
- bits<6> target;
- bits<3> Rn;
- let Inst{9} = target{5};
- let Inst{7-3} = target{4-0};
- let Inst{2-0} = Rn;
- }
-
- def tCBNZ : T1I<(outs), (ins tGPR:$cmp, t_cbtarget:$target), IIC_Br,
- "cbnz\t$cmp, $target", []>,
- T1Misc<{1,0,?,1,?,?,?}> {
- // A8.6.27
- bits<6> target;
- bits<3> Rn;
- let Inst{9} = target{5};
- let Inst{7-3} = target{4-0};
- let Inst{2-0} = Rn;
- }
-}
-
// Tail calls
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
// Darwin versions.
@@ -562,9 +536,10 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
// Non-Darwin versions (the difference is R9).
let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC],
Uses = [SP] in {
- def tTAILJMPdND : tPseudoExpand<(outs), (ins t_brtarget:$dst, variable_ops),
+ def tTAILJMPdND : tPseudoExpand<(outs),
+ (ins t_brtarget:$dst, pred:$p, variable_ops),
4, IIC_Br, [],
- (tB t_brtarget:$dst)>,
+ (tB t_brtarget:$dst, pred:$p)>,
Requires<[IsThumb, IsNotDarwin]>;
def tTAILJMPrND : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
4, IIC_Br, [],
@@ -574,11 +549,11 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
}
-// A8.6.218 Supervisor Call (Software Interrupt) -- for disassembly only
+// A8.6.218 Supervisor Call (Software Interrupt)
// A8.6.16 B: Encoding T1
// If Inst{11-8} == 0b1111 then SEE SVC
let isCall = 1, Uses = [SP] in
-def tSVC : T1pI<(outs), (ins i32imm:$imm), IIC_Br,
+def tSVC : T1pI<(outs), (ins imm0_255:$imm), IIC_Br,
"svc", "\t$imm", []>, Encoding16 {
bits<8> imm;
let Inst{15-12} = 0b1101;
@@ -653,17 +628,17 @@ defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rrs2,
let AddedComplexity = 10 in
def tLDRSB : // A8.6.80
- T1pILdStEncode<0b011, (outs tGPR:$dst), (ins t_addrmode_rr:$addr),
+ T1pILdStEncode<0b011, (outs tGPR:$Rt), (ins t_addrmode_rr:$addr),
AddrModeT1_1, IIC_iLoad_bh_r,
- "ldrsb", "\t$dst, $addr",
- [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>;
+ "ldrsb", "\t$Rt, $addr",
+ [(set tGPR:$Rt, (sextloadi8 t_addrmode_rr:$addr))]>;
let AddedComplexity = 10 in
def tLDRSH : // A8.6.84
- T1pILdStEncode<0b111, (outs tGPR:$dst), (ins t_addrmode_rr:$addr),
+ T1pILdStEncode<0b111, (outs tGPR:$Rt), (ins t_addrmode_rr:$addr),
AddrModeT1_2, IIC_iLoad_bh_r,
- "ldrsh", "\t$dst, $addr",
- [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>;
+ "ldrsh", "\t$Rt, $addr",
+ [(set tGPR:$Rt, (sextloadi16 t_addrmode_rr:$addr))]>;
let canFoldAsLoad = 1 in
def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
@@ -678,7 +653,7 @@ def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
// Load tconstpool
// FIXME: Use ldr.n to work around a Darwin assembler bug.
-let canFoldAsLoad = 1, isReMaterializable = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1, isCodeGenOnly = 1 in
def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
"ldr", ".n\t$Rt, $addr",
[(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>,
@@ -736,42 +711,53 @@ def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
// Load / store multiple Instructions.
//
-multiclass thumb_ldst_mult<string asm, InstrItinClass itin,
- InstrItinClass itin_upd, bits<6> T1Enc,
- bit L_bit> {
- def IA :
- T1I<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
- itin, !strconcat(asm, "ia${p}\t$Rn, $regs"), []>,
- T1Encoding<T1Enc> {
- bits<3> Rn;
- bits<8> regs;
- let Inst{10-8} = Rn;
- let Inst{7-0} = regs;
- }
- def IA_UPD :
- T1It<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
- itin_upd, !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []>,
- T1Encoding<T1Enc> {
- bits<3> Rn;
- bits<8> regs;
- let Inst{10-8} = Rn;
- let Inst{7-0} = regs;
- }
-}
-
// These require base address to be written back or one of the loaded regs.
let neverHasSideEffects = 1 in {
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
-defm tLDM : thumb_ldst_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu,
- {1,1,0,0,1,?}, 1>;
-
+def tLDMIA : T1I<(outs), (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IIC_iLoad_m, "ldm${p}\t$Rn, $regs", []>, T1Encoding<{1,1,0,0,1,?}> {
+ bits<3> Rn;
+ bits<8> regs;
+ let Inst{10-8} = Rn;
+ let Inst{7-0} = regs;
+}
+
+// Writeback version is just a pseudo, as there's no encoding difference.
+// Writeback happens iff the base register is not in the destination register
+// list.
+def tLDMIA_UPD :
+ InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo, GenericDomain,
+ "$Rn = $wb", IIC_iLoad_mu>,
+ PseudoInstExpansion<(tLDMIA tGPR:$Rn, pred:$p, reglist:$regs)> {
+ let Size = 2;
+ let OutOperandList = (outs GPR:$wb);
+ let InOperandList = (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops);
+ let Pattern = [];
+ let isCodeGenOnly = 1;
+ let isPseudo = 1;
+ list<Predicate> Predicates = [IsThumb];
+}
+
+// There is no non-writeback version of STM for Thumb.
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
-defm tSTM : thumb_ldst_mult<"stm", IIC_iStore_m, IIC_iStore_mu,
- {1,1,0,0,0,?}, 0>;
+def tSTMIA_UPD : Thumb1I<(outs GPR:$wb),
+ (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ AddrModeNone, 2, IIC_iStore_mu,
+ "stm${p}\t$Rn!, $regs", "$Rn = $wb", []>,
+ T1Encoding<{1,1,0,0,0,?}> {
+ bits<3> Rn;
+ bits<8> regs;
+ let Inst{10-8} = Rn;
+ let Inst{7-0} = regs;
+}
} // neverHasSideEffects
+def : InstAlias<"ldm${p} $Rn!, $regs",
+ (tLDMIA tGPR:$Rn, pred:$p, reglist:$regs)>,
+ Requires<[IsThumb, IsThumb1Only]>;
+
let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in
def tPOP : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
IIC_iPop,
@@ -876,7 +862,7 @@ def tADC : // A8.6.2
// Add immediate
def tADDi3 : // A8.6.4 T1
- T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm3),
+ T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3),
IIC_iALUi,
"add", "\t$Rd, $Rm, $imm3",
[(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]> {
@@ -885,8 +871,8 @@ def tADDi3 : // A8.6.4 T1
}
def tADDi8 : // A8.6.4 T2
- T1sItGenEncodeImm<{1,1,0,?,?}, (outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$imm8),
- IIC_iALUi,
+ T1sItGenEncodeImm<{1,1,0,?,?}, (outs tGPR:$Rdn),
+ (ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi,
"add", "\t$Rdn, $imm8",
[(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255:$imm8))]>;
@@ -920,10 +906,10 @@ def tAND : // A8.6.12
// ASR immediate
def tASRri : // A8.6.14
- T1sIGenEncodeImm<{0,1,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5),
+ T1sIGenEncodeImm<{0,1,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm_sr:$imm5),
IIC_iMOVsi,
"asr", "\t$Rd, $Rm, $imm5",
- [(set tGPR:$Rd, (sra tGPR:$Rm, (i32 imm:$imm5)))]> {
+ [(set tGPR:$Rd, (sra tGPR:$Rm, (i32 imm_sr:$imm5)))]> {
bits<5> imm5;
let Inst{10-6} = imm5;
}
@@ -962,7 +948,7 @@ def tCMNz : // A8.6.33
// CMP immediate
let isCompare = 1, Defs = [CPSR] in {
-def tCMPi8 : T1pI<(outs), (ins tGPR:$Rn, i32imm:$imm8), IIC_iCMPi,
+def tCMPi8 : T1pI<(outs), (ins tGPR:$Rn, imm0_255:$imm8), IIC_iCMPi,
"cmp", "\t$Rn, $imm8",
[(ARMcmp tGPR:$Rn, imm0_255:$imm8)]>,
T1General<{1,0,1,?,?}> {
@@ -1003,7 +989,7 @@ def tEOR : // A8.6.45
// LSL immediate
def tLSLri : // A8.6.88
- T1sIGenEncodeImm<{0,0,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5),
+ T1sIGenEncodeImm<{0,0,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_31:$imm5),
IIC_iMOVsi,
"lsl", "\t$Rd, $Rm, $imm5",
[(set tGPR:$Rd, (shl tGPR:$Rm, (i32 imm:$imm5)))]> {
@@ -1020,10 +1006,10 @@ def tLSLrr : // A8.6.89
// LSR immediate
def tLSRri : // A8.6.90
- T1sIGenEncodeImm<{0,0,1,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5),
+ T1sIGenEncodeImm<{0,0,1,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm_sr:$imm5),
IIC_iMOVsi,
"lsr", "\t$Rd, $Rm, $imm5",
- [(set tGPR:$Rd, (srl tGPR:$Rm, (i32 imm:$imm5)))]> {
+ [(set tGPR:$Rd, (srl tGPR:$Rm, (i32 imm_sr:$imm5)))]> {
bits<5> imm5;
let Inst{10-6} = imm5;
}
@@ -1047,6 +1033,10 @@ def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins imm0_255:$imm8), IIC_iMOVi,
let Inst{10-8} = Rd;
let Inst{7-0} = imm8;
}
+// Because we have an explicit tMOVSr below, we need an alias to handle
+// the immediate "movs" form here. Blech.
+def : tInstAlias <"movs $Rdn, $imm",
+ (tMOVi8 tGPR:$Rdn, CPSR, imm0_255:$imm, 14, 0)>;
// A7-73: MOV(2) - mov setting flag.
@@ -1077,10 +1067,19 @@ def tMOVSr : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr,
// Multiply register
let isCommutable = 1 in
def tMUL : // A8.6.105 T1
- T1sItDPEncode<0b1101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
- IIC_iMUL32,
- "mul", "\t$Rdn, $Rm, $Rdn",
- [(set tGPR:$Rdn, (mul tGPR:$Rn, tGPR:$Rm))]>;
+ Thumb1sI<(outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm), AddrModeNone, 2,
+ IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", "$Rm = $Rd",
+ [(set tGPR:$Rd, (mul tGPR:$Rn, tGPR:$Rm))]>,
+ T1DataProcessing<0b1101> {
+ bits<3> Rd;
+ bits<3> Rn;
+ let Inst{5-3} = Rn;
+ let Inst{2-0} = Rd;
+ let AsmMatchConverter = "cvtThumbMultiply";
+}
+
+def :tInstAlias<"mul${s}${p} $Rdm, $Rn", (tMUL tGPR:$Rdm, s_cc_out:$s, tGPR:$Rn,
+ pred:$p)>;
// Move inverse register
def tMVN : // A8.6.107
@@ -1132,6 +1131,9 @@ def tRSB : // A8.6.141
"rsb", "\t$Rd, $Rn, #0",
[(set tGPR:$Rd, (ineg tGPR:$Rn))]>;
+def : tInstAlias<"neg${s}${p} $Rd, $Rm",
+ (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>;
+
// Subtract with carry register
let Uses = [CPSR] in
def tSBC : // A8.6.151
@@ -1142,7 +1144,7 @@ def tSBC : // A8.6.151
// Subtract immediate
def tSUBi3 : // A8.6.210 T1
- T1sIGenEncodeImm<0b01111, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm3),
+ T1sIGenEncodeImm<0b01111, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3),
IIC_iALUi,
"sub", "\t$Rd, $Rm, $imm3",
[(set tGPR:$Rd, (add tGPR:$Rm, imm0_7_neg:$imm3))]> {
@@ -1151,8 +1153,8 @@ def tSUBi3 : // A8.6.210 T1
}
def tSUBi8 : // A8.6.210 T2
- T1sItGenEncodeImm<{1,1,1,?,?}, (outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$imm8),
- IIC_iALUi,
+ T1sItGenEncodeImm<{1,1,1,?,?}, (outs tGPR:$Rdn),
+ (ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi,
"sub", "\t$Rdn, $imm8",
[(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255_neg:$imm8))]>;
@@ -1163,8 +1165,6 @@ def tSUBrr : // A8.6.212
"sub", "\t$Rd, $Rn, $Rm",
[(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>;
-// TODO: A7-96: STMIA - store multiple.
-
// Sign-extend byte
def tSXTB : // A8.6.222
T1pIMiscEncode<{0,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
@@ -1216,12 +1216,13 @@ let usesCustomInserter = 1 in // Expanded after instruction selection.
// assembler.
def tADR : T1I<(outs tGPR:$Rd), (ins t_adrlabel:$addr, pred:$p),
- IIC_iALUi, "adr{$p}\t$Rd, #$addr", []>,
+ IIC_iALUi, "adr{$p}\t$Rd, $addr", []>,
T1Encoding<{1,0,1,0,0,?}> {
bits<3> Rd;
bits<8> addr;
let Inst{10-8} = Rd;
let Inst{7-0} = addr;
+ let DecoderMethod = "DecodeThumbAddSpecialReg";
}
let neverHasSideEffects = 1, isReMaterializable = 1 in
@@ -1361,6 +1362,31 @@ def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr),
def : T1Pat<(sextloadi16 t_addrmode_is2:$addr),
(tASRri (tLSLri (tLDRHi t_addrmode_is2:$addr), 16), 16)>;
+def : T1Pat<(atomic_load_8 t_addrmode_is1:$src),
+ (tLDRBi t_addrmode_is1:$src)>;
+def : T1Pat<(atomic_load_8 t_addrmode_rrs1:$src),
+ (tLDRBr t_addrmode_rrs1:$src)>;
+def : T1Pat<(atomic_load_16 t_addrmode_is2:$src),
+ (tLDRHi t_addrmode_is2:$src)>;
+def : T1Pat<(atomic_load_16 t_addrmode_rrs2:$src),
+ (tLDRHr t_addrmode_rrs2:$src)>;
+def : T1Pat<(atomic_load_32 t_addrmode_is4:$src),
+ (tLDRi t_addrmode_is4:$src)>;
+def : T1Pat<(atomic_load_32 t_addrmode_rrs4:$src),
+ (tLDRr t_addrmode_rrs4:$src)>;
+def : T1Pat<(atomic_store_8 t_addrmode_is1:$ptr, tGPR:$val),
+ (tSTRBi tGPR:$val, t_addrmode_is1:$ptr)>;
+def : T1Pat<(atomic_store_8 t_addrmode_rrs1:$ptr, tGPR:$val),
+ (tSTRBr tGPR:$val, t_addrmode_rrs1:$ptr)>;
+def : T1Pat<(atomic_store_16 t_addrmode_is2:$ptr, tGPR:$val),
+ (tSTRHi tGPR:$val, t_addrmode_is2:$ptr)>;
+def : T1Pat<(atomic_store_16 t_addrmode_rrs2:$ptr, tGPR:$val),
+ (tSTRHr tGPR:$val, t_addrmode_rrs2:$ptr)>;
+def : T1Pat<(atomic_store_32 t_addrmode_is4:$ptr, tGPR:$val),
+ (tSTRi tGPR:$val, t_addrmode_is4:$ptr)>;
+def : T1Pat<(atomic_store_32 t_addrmode_rrs4:$ptr, tGPR:$val),
+ (tSTRr tGPR:$val, t_addrmode_rrs4:$ptr)>;
+
// Large immediate handling.
// Two piece imms.
@@ -1395,3 +1421,16 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
2, IIC_Br, [(brind GPR:$Rm)],
(tMOVr PC, GPR:$Rm, pred:$p)>;
}
+
+
+// In Thumb1, "nop" is encoded as a "mov r8, r8". Technically, the bf00
+// encoding is available on ARMv6K, but we don't differentiate that finely.
+def : InstAlias<"nop", (tMOVr R8, R8, 14, 0)>,Requires<[IsThumb, IsThumb1Only]>;
+
+
+// For round-trip assembly/disassembly, we have to handle a CPS instruction
+// without any iflags. That's not, strictly speaking, valid syntax, but it's
+// a useful extention and assembles to defined behaviour (the insn does
+// nothing).
+def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>;
+def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index c2c6cbc..471ec29 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -12,13 +12,32 @@
//===----------------------------------------------------------------------===//
// IT block predicate field
+def it_pred_asmoperand : AsmOperandClass {
+ let Name = "ITCondCode";
+ let ParserMethod = "parseITCondCode";
+}
def it_pred : Operand<i32> {
let PrintMethod = "printMandatoryPredicateOperand";
+ let ParserMatchClass = it_pred_asmoperand;
}
// IT block condition mask
+def it_mask_asmoperand : AsmOperandClass { let Name = "ITMask"; }
def it_mask : Operand<i32> {
let PrintMethod = "printThumbITMask";
+ let ParserMatchClass = it_mask_asmoperand;
+}
+
+// t2_shift_imm: An integer that encodes a shift amount and the type of shift
+// (asr or lsl). The 6-bit immediate encodes as:
+// {5} 0 ==> lsl
+// 1 asr
+// {4-0} imm5 shift amount.
+// asr #32 not allowed
+def t2_shift_imm : Operand<i32> {
+ let PrintMethod = "printShiftImmOperand";
+ let ParserMatchClass = ShifterImmAsmOperand;
+ let DecoderMethod = "DecodeT2ShifterImmOperand";
}
// Shifted operands. No register controlled shifts for Thumb2.
@@ -28,6 +47,8 @@ def t2_so_reg : Operand<i32>, // reg imm
[shl,srl,sra,rotr]> {
let EncoderMethod = "getT2SORegOpValue";
let PrintMethod = "printT2SOOperand";
+ let DecoderMethod = "DecodeSORegImmOperand";
+ let ParserMatchClass = ShiftedImmAsmOperand;
let MIOperandInfo = (ops rGPR, i32imm);
}
@@ -50,6 +71,7 @@ def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{
}]> {
let ParserMatchClass = t2_so_imm_asmoperand;
let EncoderMethod = "getT2SOImmOpValue";
+ let DecoderMethod = "DecodeT2SOImm";
}
// t2_so_imm_not - Match an immediate that is a complement
@@ -65,11 +87,6 @@ def t2_so_imm_neg : Operand<i32>,
return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1;
}], t2_so_imm_neg_XFORM>;
-/// imm1_31 predicate - True if the 32-bit immediate is in the range [1,31].
-def imm1_31 : ImmLeaf<i32, [{
- return (int32_t)Imm >= 1 && (int32_t)Imm < 32;
-}]>;
-
/// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095].
def imm0_4095 : Operand<i32>,
ImmLeaf<i32, [{
@@ -96,17 +113,20 @@ def lo5AllOne : PatLeaf<(i32 imm), [{
// Define Thumb2 specific addressing modes.
// t2addrmode_imm12 := reg + imm12
+def t2addrmode_imm12_asmoperand : AsmOperandClass {let Name="MemUImm12Offset";}
def t2addrmode_imm12 : Operand<i32>,
ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> {
let PrintMethod = "printAddrModeImm12Operand";
let EncoderMethod = "getAddrModeImm12OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm12";
+ let ParserMatchClass = t2addrmode_imm12_asmoperand;
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
- let ParserMatchClass = MemMode5AsmOperand;
}
// t2ldrlabel := imm12
def t2ldrlabel : Operand<i32> {
let EncoderMethod = "getAddrModeImm12OpValue";
+ let PrintMethod = "printT2LdrLabelOperand";
}
@@ -116,13 +136,36 @@ def t2adrlabel : Operand<i32> {
}
+// t2addrmode_posimm8 := reg + imm8
+def MemPosImm8OffsetAsmOperand : AsmOperandClass {let Name="MemPosImm8Offset";}
+def t2addrmode_posimm8 : Operand<i32> {
+ let PrintMethod = "printT2AddrModeImm8Operand";
+ let EncoderMethod = "getT2AddrModeImm8OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm8";
+ let ParserMatchClass = MemPosImm8OffsetAsmOperand;
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+// t2addrmode_negimm8 := reg - imm8
+def MemNegImm8OffsetAsmOperand : AsmOperandClass {let Name="MemNegImm8Offset";}
+def t2addrmode_negimm8 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
+ let PrintMethod = "printT2AddrModeImm8Operand";
+ let EncoderMethod = "getT2AddrModeImm8OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm8";
+ let ParserMatchClass = MemNegImm8OffsetAsmOperand;
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
// t2addrmode_imm8 := reg +/- imm8
+def MemImm8OffsetAsmOperand : AsmOperandClass { let Name = "MemImm8Offset"; }
def t2addrmode_imm8 : Operand<i32>,
ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
let PrintMethod = "printT2AddrModeImm8Operand";
let EncoderMethod = "getT2AddrModeImm8OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm8";
+ let ParserMatchClass = MemImm8OffsetAsmOperand;
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
- let ParserMatchClass = MemMode5AsmOperand;
}
def t2am_imm8_offset : Operand<i32>,
@@ -130,38 +173,61 @@ def t2am_imm8_offset : Operand<i32>,
[], [SDNPWantRoot]> {
let PrintMethod = "printT2AddrModeImm8OffsetOperand";
let EncoderMethod = "getT2AddrModeImm8OffsetOpValue";
- let ParserMatchClass = MemMode5AsmOperand;
+ let DecoderMethod = "DecodeT2Imm8";
}
// t2addrmode_imm8s4 := reg +/- (imm8 << 2)
+def MemImm8s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm8s4Offset";}
def t2addrmode_imm8s4 : Operand<i32> {
let PrintMethod = "printT2AddrModeImm8s4Operand";
let EncoderMethod = "getT2AddrModeImm8s4OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm8s4";
+ let ParserMatchClass = MemImm8s4OffsetAsmOperand;
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
- let ParserMatchClass = MemMode5AsmOperand;
}
+def t2am_imm8s4_offset_asmoperand : AsmOperandClass { let Name = "Imm8s4"; }
def t2am_imm8s4_offset : Operand<i32> {
let PrintMethod = "printT2AddrModeImm8s4OffsetOperand";
+ let EncoderMethod = "getT2Imm8s4OpValue";
+ let DecoderMethod = "DecodeT2Imm8S4";
+}
+
+// t2addrmode_imm0_1020s4 := reg + (imm8 << 2)
+def MemImm0_1020s4OffsetAsmOperand : AsmOperandClass {
+ let Name = "MemImm0_1020s4Offset";
+}
+def t2addrmode_imm0_1020s4 : Operand<i32> {
+ let PrintMethod = "printT2AddrModeImm0_1020s4Operand";
+ let EncoderMethod = "getT2AddrModeImm0_1020s4OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm0_1020s4";
+ let ParserMatchClass = MemImm0_1020s4OffsetAsmOperand;
+ let MIOperandInfo = (ops GPRnopc:$base, i32imm:$offsimm);
}
// t2addrmode_so_reg := reg + (reg << imm2)
+def t2addrmode_so_reg_asmoperand : AsmOperandClass {let Name="T2MemRegOffset";}
def t2addrmode_so_reg : Operand<i32>,
ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> {
let PrintMethod = "printT2AddrModeSoRegOperand";
let EncoderMethod = "getT2AddrModeSORegOpValue";
+ let DecoderMethod = "DecodeT2AddrModeSOReg";
+ let ParserMatchClass = t2addrmode_so_reg_asmoperand;
let MIOperandInfo = (ops GPR:$base, rGPR:$offsreg, i32imm:$offsimm);
- let ParserMatchClass = MemMode5AsmOperand;
}
-// t2addrmode_reg := reg
-// Used by load/store exclusive instructions. Useful to enable right assembly
-// parsing and printing. Not used for any codegen matching.
-//
-def t2addrmode_reg : Operand<i32> {
- let PrintMethod = "printAddrMode7Operand";
- let MIOperandInfo = (ops GPR);
- let ParserMatchClass = MemMode7AsmOperand;
+// Addresses for the TBB/TBH instructions.
+def addrmode_tbb_asmoperand : AsmOperandClass { let Name = "MemTBB"; }
+def addrmode_tbb : Operand<i32> {
+ let PrintMethod = "printAddrModeTBB";
+ let ParserMatchClass = addrmode_tbb_asmoperand;
+ let MIOperandInfo = (ops GPR:$Rn, rGPR:$Rm);
+}
+def addrmode_tbh_asmoperand : AsmOperandClass { let Name = "MemTBH"; }
+def addrmode_tbh : Operand<i32> {
+ let PrintMethod = "printAddrModeTBH";
+ let ParserMatchClass = addrmode_tbh_asmoperand;
+ let MIOperandInfo = (ops GPR:$Rn, rGPR:$Rm);
}
//===----------------------------------------------------------------------===//
@@ -419,47 +485,6 @@ class T2MulLong<bits<3> opc22_20, bits<4> opc7_4,
}
-/// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
-/// unary operation that produces a value. These are predicable and can be
-/// changed to modify CPSR.
-multiclass T2I_un_irs<bits<4> opcod, string opc,
- InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, bit Cheap = 0, bit ReMat = 0> {
- // shifted imm
- def i : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), iii,
- opc, "\t$Rd, $imm",
- [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]> {
- let isAsCheapAsAMove = Cheap;
- let isReMaterializable = ReMat;
- let Inst{31-27} = 0b11110;
- let Inst{25} = 0;
- let Inst{24-21} = opcod;
- let Inst{19-16} = 0b1111; // Rn
- let Inst{15} = 0;
- }
- // register
- def r : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), iir,
- opc, ".w\t$Rd, $Rm",
- [(set rGPR:$Rd, (opnode rGPR:$Rm))]> {
- let Inst{31-27} = 0b11101;
- let Inst{26-25} = 0b01;
- let Inst{24-21} = opcod;
- let Inst{19-16} = 0b1111; // Rn
- let Inst{14-12} = 0b000; // imm3
- let Inst{7-6} = 0b00; // imm2
- let Inst{5-4} = 0b00; // type
- }
- // shifted register
- def s : T2sOneRegShiftedReg<(outs rGPR:$Rd), (ins t2_so_reg:$ShiftedRm), iis,
- opc, ".w\t$Rd, $ShiftedRm",
- [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm))]> {
- let Inst{31-27} = 0b11101;
- let Inst{26-25} = 0b01;
- let Inst{24-21} = opcod;
- let Inst{19-16} = 0b1111; // Rn
- }
-}
-
/// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
/// binary operation that produces a value. These are predicable and can be
/// changed to modify CPSR.
@@ -500,21 +525,18 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc,
}
// Assembly aliases for optional destination operand when it's the same
// as the source operand.
- def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
+ def : t2InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
(!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
t2_so_imm:$imm, pred:$p,
- cc_out:$s)>,
- Requires<[IsThumb2]>;
- def : InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $Rm"),
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $Rm"),
(!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
rGPR:$Rm, pred:$p,
- cc_out:$s)>,
- Requires<[IsThumb2]>;
- def : InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $shift"),
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $shift"),
(!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn,
t2_so_reg:$shift, pred:$p,
- cc_out:$s)>,
- Requires<[IsThumb2]>;
+ cc_out:$s)>;
}
/// T2I_bin_w_irs - Same as T2I_bin_irs except these operations need
@@ -522,7 +544,27 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc,
multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
PatFrag opnode, string baseOpc, bit Commutable = 0> :
- T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, baseOpc, Commutable, ".w">;
+ T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, baseOpc, Commutable, ".w"> {
+ // Assembler aliases w/o the ".w" suffix.
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn,
+ rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rd, rGPR:$Rn,
+ t2_so_reg:$shift, pred:$p,
+ cc_out:$s)>;
+
+ // and with the optional destination operand, too.
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
+ rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn,
+ t2_so_reg:$shift, pred:$p,
+ cc_out:$s)>;
+}
/// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are
/// reversed. The 'rr' form is only defined for the disassembler; for codegen
@@ -563,45 +605,28 @@ multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> {
/// T2I_bin_s_irs - Similar to T2I_bin_irs except it sets the 's' bit so the
/// instruction modifies the CPSR register.
-let isCodeGenOnly = 1, Defs = [CPSR] in {
+///
+/// These opcodes will be converted to the real non-S opcodes by
+/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
+let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in {
multiclass T2I_bin_s_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
PatFrag opnode, bit Commutable = 0> {
// shifted imm
- def ri : T2TwoRegImm<
+ def ri : T2sTwoRegImm<
(outs rGPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), iii,
- !strconcat(opc, "s"), ".w\t$Rd, $Rn, $imm",
- [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_imm:$imm))]> {
- let Inst{31-27} = 0b11110;
- let Inst{25} = 0;
- let Inst{24-21} = opcod;
- let Inst{20} = 1; // The S bit.
- let Inst{15} = 0;
- }
+ opc, ".w\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, CPSR, (opnode GPR:$Rn, t2_so_imm:$imm))]>;
// register
- def rr : T2ThreeReg<
+ def rr : T2sThreeReg<
(outs rGPR:$Rd), (ins GPR:$Rn, rGPR:$Rm), iir,
- !strconcat(opc, "s"), ".w\t$Rd, $Rn, $Rm",
- [(set rGPR:$Rd, (opnode GPR:$Rn, rGPR:$Rm))]> {
- let isCommutable = Commutable;
- let Inst{31-27} = 0b11101;
- let Inst{26-25} = 0b01;
- let Inst{24-21} = opcod;
- let Inst{20} = 1; // The S bit.
- let Inst{14-12} = 0b000; // imm3
- let Inst{7-6} = 0b00; // imm2
- let Inst{5-4} = 0b00; // type
- }
+ opc, ".w\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, CPSR, (opnode GPR:$Rn, rGPR:$Rm))]>;
// shifted register
- def rs : T2TwoRegShiftedReg<
+ def rs : T2sTwoRegShiftedReg<
(outs rGPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), iis,
- !strconcat(opc, "s"), ".w\t$Rd, $Rn, $ShiftedRm",
- [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]> {
- let Inst{31-27} = 0b11101;
- let Inst{26-25} = 0b01;
- let Inst{24-21} = opcod;
- let Inst{20} = 1; // The S bit.
- }
+ opc, ".w\t$Rd, $Rn, $ShiftedRm",
+ [(set rGPR:$Rd, CPSR, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]>;
}
}
@@ -614,9 +639,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
// in particular for taking the address of a local.
let isReMaterializable = 1 in {
def ri : T2sTwoRegImm<
- (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
- opc, ".w\t$Rd, $Rn, $imm",
- [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_imm:$imm))]> {
+ (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, t2_so_imm:$imm), IIC_iALUi,
+ opc, ".w\t$Rd, $Rn, $imm",
+ [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_imm:$imm))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24} = 1;
@@ -626,9 +651,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
}
// 12-bit imm
def ri12 : T2I<
- (outs rGPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm), IIC_iALUi,
+ (outs GPRnopc:$Rd), (ins GPR:$Rn, imm0_4095:$imm), IIC_iALUi,
!strconcat(opc, "w"), "\t$Rd, $Rn, $imm",
- [(set rGPR:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]> {
+ [(set GPRnopc:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]> {
bits<4> Rd;
bits<4> Rn;
bits<12> imm;
@@ -644,9 +669,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
let Inst{7-0} = imm{7-0};
}
// register
- def rr : T2sThreeReg<(outs rGPR:$Rd), (ins GPR:$Rn, rGPR:$Rm), IIC_iALUr,
- opc, ".w\t$Rd, $Rn, $Rm",
- [(set rGPR:$Rd, (opnode GPR:$Rn, rGPR:$Rm))]> {
+ def rr : T2sThreeReg<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, rGPR:$Rm),
+ IIC_iALUr, opc, ".w\t$Rd, $Rn, $Rm",
+ [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, rGPR:$Rm))]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -658,9 +683,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
}
// shifted register
def rs : T2sTwoRegShiftedReg<
- (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm),
+ (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm),
IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
- [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]> {
+ [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24} = 1;
@@ -671,13 +696,13 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
/// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns
/// for a binary operation that produces a value and use the carry
/// bit. It's not predicable.
-let Uses = [CPSR] in {
+let Defs = [CPSR], Uses = [CPSR] in {
multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
// shifted imm
def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
- [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>,
+ [(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, t2_so_imm:$imm, CPSR))]>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
@@ -687,7 +712,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
// register
def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
opc, ".w\t$Rd, $Rn, $Rm",
- [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
+ [(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, rGPR:$Rm, CPSR))]>,
Requires<[IsThumb2]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
@@ -701,7 +726,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
def rs : T2sTwoRegShiftedReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
- [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>,
+ [(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm, CPSR))]>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -710,64 +735,35 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
}
}
-// Carry setting variants
-// NOTE: CPSR def omitted because it will be handled by the custom inserter.
-let usesCustomInserter = 1 in {
-multiclass T2I_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> {
- // shifted imm
- def ri : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
- 4, IIC_iALUi,
- [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>;
- // register
- def rr : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
- 4, IIC_iALUr,
- [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
- let isCommutable = Commutable;
- }
- // shifted register
- def rs : t2PseudoInst<
- (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
- 4, IIC_iALUsi,
- [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>;
-}
-}
-
/// T2I_rbin_s_is - Same as T2I_rbin_irs except sets 's' bit and the register
/// version is not needed since this is only for codegen.
-let isCodeGenOnly = 1, Defs = [CPSR] in {
+///
+/// These opcodes will be converted to the real non-S opcodes by
+/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
+let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in {
multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
// shifted imm
- def ri : T2TwoRegImm<
+ def ri : T2sTwoRegImm<
(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
- !strconcat(opc, "s"), ".w\t$Rd, $Rn, $imm",
- [(set rGPR:$Rd, (opnode t2_so_imm:$imm, rGPR:$Rn))]> {
- let Inst{31-27} = 0b11110;
- let Inst{25} = 0;
- let Inst{24-21} = opcod;
- let Inst{20} = 1; // The S bit.
- let Inst{15} = 0;
- }
+ opc, ".w\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, CPSR, (opnode t2_so_imm:$imm, rGPR:$Rn))]>;
// shifted register
- def rs : T2TwoRegShiftedReg<
+ def rs : T2sTwoRegShiftedReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
- IIC_iALUsi, !strconcat(opc, "s"), "\t$Rd, $Rn, $ShiftedRm",
- [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]> {
- let Inst{31-27} = 0b11101;
- let Inst{26-25} = 0b01;
- let Inst{24-21} = opcod;
- let Inst{20} = 1; // The S bit.
- }
+ IIC_iALUsi, opc, "\t$Rd, $Rn, $ShiftedRm",
+ [(set rGPR:$Rd, CPSR, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]>;
}
}
/// T2I_sh_ir - Defines a set of (op reg, {so_imm|r}) patterns for a shift /
// rotate operation that produces a value.
-multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> {
+multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode,
+ string baseOpc> {
// 5-bit imm
def ri : T2sTwoRegShiftImm<
- (outs rGPR:$Rd), (ins rGPR:$Rm, i32imm:$imm), IIC_iMOVsi,
+ (outs rGPR:$Rd), (ins rGPR:$Rm, ty:$imm), IIC_iMOVsi,
opc, ".w\t$Rd, $Rm, $imm",
- [(set rGPR:$Rd, (opnode rGPR:$Rm, imm1_31:$imm))]> {
+ [(set rGPR:$Rd, (opnode rGPR:$Rm, (i32 ty:$imm)))]> {
let Inst{31-27} = 0b11101;
let Inst{26-21} = 0b010010;
let Inst{19-16} = 0b1111; // Rn
@@ -784,20 +780,50 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> {
let Inst{15-12} = 0b1111;
let Inst{7-4} = 0b0000;
}
+
+ // Optional destination register
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
+ ty:$imm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $Rm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
+ rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
+
+ // Assembler aliases w/o the ".w" suffix.
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rd, rGPR:$Rn,
+ ty:$imm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn,
+ rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
+
+ // and with the optional destination operand, too.
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
+ ty:$imm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
+ rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
}
/// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
/// patterns. Similar to T2I_bin_irs except the instruction does not produce
/// a explicit result, only implicitly set CPSR.
-let isCompare = 1, Defs = [CPSR] in {
multiclass T2I_cmp_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode> {
+ PatFrag opnode, string baseOpc> {
+let isCompare = 1, Defs = [CPSR] in {
// shifted imm
def ri : T2OneRegCmpImm<
- (outs), (ins GPR:$Rn, t2_so_imm:$imm), iii,
+ (outs), (ins GPRnopc:$Rn, t2_so_imm:$imm), iii,
opc, ".w\t$Rn, $imm",
- [(opnode GPR:$Rn, t2_so_imm:$imm)]> {
+ [(opnode GPRnopc:$Rn, t2_so_imm:$imm)]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -807,9 +833,9 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc,
}
// register
def rr : T2TwoRegCmp<
- (outs), (ins GPR:$lhs, rGPR:$rhs), iir,
- opc, ".w\t$lhs, $rhs",
- [(opnode GPR:$lhs, rGPR:$rhs)]> {
+ (outs), (ins GPRnopc:$Rn, rGPR:$Rm), iir,
+ opc, ".w\t$Rn, $Rm",
+ [(opnode GPRnopc:$Rn, rGPR:$Rm)]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -821,9 +847,9 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc,
}
// shifted register
def rs : T2OneRegCmpShiftedReg<
- (outs), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), iis,
+ (outs), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm), iis,
opc, ".w\t$Rn, $ShiftedRm",
- [(opnode GPR:$Rn, t2_so_reg:$ShiftedRm)]> {
+ [(opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -831,55 +857,60 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc,
let Inst{11-8} = 0b1111; // Rd
}
}
+
+ // Assembler aliases w/o the ".w" suffix.
+ // No alias here for 'rr' version as not all instantiations of this
+ // multiclass want one (CMP in particular, does not).
+ def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPRnopc:$Rn,
+ t2_so_imm:$imm, pred:$p)>;
+ def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $shift"),
+ (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPRnopc:$Rn,
+ t2_so_reg:$shift,
+ pred:$p)>;
}
/// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns.
multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
- InstrItinClass iii, InstrItinClass iis, PatFrag opnode> {
- def i12 : T2Ii12<(outs GPR:$Rt), (ins t2addrmode_imm12:$addr), iii,
+ InstrItinClass iii, InstrItinClass iis, RegisterClass target,
+ PatFrag opnode> {
+ def i12 : T2Ii12<(outs target:$Rt), (ins t2addrmode_imm12:$addr), iii,
opc, ".w\t$Rt, $addr",
- [(set GPR:$Rt, (opnode t2addrmode_imm12:$addr))]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-25} = 0b00;
+ [(set target:$Rt, (opnode t2addrmode_imm12:$addr))]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{31-25} = 0b1111100;
let Inst{24} = signed;
let Inst{23} = 1;
let Inst{22-21} = opcod;
let Inst{20} = 1; // load
-
- bits<4> Rt;
- let Inst{15-12} = Rt;
-
- bits<17> addr;
- let addr{12} = 1; // add = TRUE
let Inst{19-16} = addr{16-13}; // Rn
- let Inst{23} = addr{12}; // U
+ let Inst{15-12} = Rt;
let Inst{11-0} = addr{11-0}; // imm
}
- def i8 : T2Ii8 <(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), iii,
+ def i8 : T2Ii8 <(outs target:$Rt), (ins t2addrmode_negimm8:$addr), iii,
opc, "\t$Rt, $addr",
- [(set GPR:$Rt, (opnode t2addrmode_imm8:$addr))]> {
+ [(set target:$Rt, (opnode t2addrmode_negimm8:$addr))]> {
+ bits<4> Rt;
+ bits<13> addr;
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
let Inst{24} = signed;
let Inst{23} = 0;
let Inst{22-21} = opcod;
let Inst{20} = 1; // load
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Rt;
let Inst{11} = 1;
// Offset: index==TRUE, wback==FALSE
let Inst{10} = 1; // The P bit.
- let Inst{8} = 0; // The W bit.
-
- bits<4> Rt;
- let Inst{15-12} = Rt;
-
- bits<13> addr;
- let Inst{19-16} = addr{12-9}; // Rn
let Inst{9} = addr{8}; // U
+ let Inst{8} = 0; // The W bit.
let Inst{7-0} = addr{7-0}; // imm
}
- def s : T2Iso <(outs GPR:$Rt), (ins t2addrmode_so_reg:$addr), iis,
+ def s : T2Iso <(outs target:$Rt), (ins t2addrmode_so_reg:$addr), iis,
opc, ".w\t$Rt, $addr",
- [(set GPR:$Rt, (opnode t2addrmode_so_reg:$addr))]> {
+ [(set target:$Rt, (opnode t2addrmode_so_reg:$addr))]> {
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
let Inst{24} = signed;
@@ -895,12 +926,14 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
let Inst{19-16} = addr{9-6}; // Rn
let Inst{3-0} = addr{5-2}; // Rm
let Inst{5-4} = addr{1-0}; // imm
+
+ let DecoderMethod = "DecodeT2LoadShift";
}
// FIXME: Is the pci variant actually needed?
- def pci : T2Ipc <(outs GPR:$Rt), (ins t2ldrlabel:$addr), iii,
+ def pci : T2Ipc <(outs target:$Rt), (ins t2ldrlabel:$addr), iii,
opc, ".w\t$Rt, $addr",
- [(set GPR:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> {
+ [(set target:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> {
let isReMaterializable = 1;
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
@@ -918,10 +951,11 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
/// T2I_st - Defines a set of (op r, {imm12|imm8|so_reg}) store patterns.
multiclass T2I_st<bits<2> opcod, string opc,
- InstrItinClass iii, InstrItinClass iis, PatFrag opnode> {
- def i12 : T2Ii12<(outs), (ins GPR:$Rt, t2addrmode_imm12:$addr), iii,
+ InstrItinClass iii, InstrItinClass iis, RegisterClass target,
+ PatFrag opnode> {
+ def i12 : T2Ii12<(outs), (ins target:$Rt, t2addrmode_imm12:$addr), iii,
opc, ".w\t$Rt, $addr",
- [(opnode GPR:$Rt, t2addrmode_imm12:$addr)]> {
+ [(opnode target:$Rt, t2addrmode_imm12:$addr)]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0001;
let Inst{22-21} = opcod;
@@ -936,9 +970,9 @@ multiclass T2I_st<bits<2> opcod, string opc,
let Inst{23} = addr{12}; // U
let Inst{11-0} = addr{11-0}; // imm
}
- def i8 : T2Ii8 <(outs), (ins GPR:$Rt, t2addrmode_imm8:$addr), iii,
+ def i8 : T2Ii8 <(outs), (ins target:$Rt, t2addrmode_negimm8:$addr), iii,
opc, "\t$Rt, $addr",
- [(opnode GPR:$Rt, t2addrmode_imm8:$addr)]> {
+ [(opnode target:$Rt, t2addrmode_negimm8:$addr)]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0000;
let Inst{22-21} = opcod;
@@ -956,9 +990,9 @@ multiclass T2I_st<bits<2> opcod, string opc,
let Inst{9} = addr{8}; // U
let Inst{7-0} = addr{7-0}; // imm
}
- def s : T2Iso <(outs), (ins GPR:$Rt, t2addrmode_so_reg:$addr), iis,
+ def s : T2Iso <(outs), (ins target:$Rt, t2addrmode_so_reg:$addr), iis,
opc, ".w\t$Rt, $addr",
- [(opnode GPR:$Rt, t2addrmode_so_reg:$addr)]> {
+ [(opnode target:$Rt, t2addrmode_so_reg:$addr)]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0000;
let Inst{22-21} = opcod;
@@ -977,146 +1011,81 @@ multiclass T2I_st<bits<2> opcod, string opc,
/// T2I_ext_rrot - A unary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
-multiclass T2I_ext_rrot<bits<3> opcod, string opc, PatFrag opnode> {
- def r : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr,
- opc, ".w\t$Rd, $Rm",
- [(set rGPR:$Rd, (opnode rGPR:$Rm))]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0100;
- let Inst{22-20} = opcod;
- let Inst{19-16} = 0b1111; // Rn
- let Inst{15-12} = 0b1111;
- let Inst{7} = 1;
- let Inst{5-4} = 0b00; // rotate
- }
- def r_rot : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr,
- opc, ".w\t$Rd, $Rm, ror $rot",
- [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0100;
- let Inst{22-20} = opcod;
- let Inst{19-16} = 0b1111; // Rn
- let Inst{15-12} = 0b1111;
- let Inst{7} = 1;
-
- bits<2> rot;
- let Inst{5-4} = rot{1-0}; // rotate
- }
+class T2I_ext_rrot<bits<3> opcod, string opc, PatFrag opnode>
+ : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr,
+ opc, ".w\t$Rd, $Rm$rot",
+ [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>,
+ Requires<[IsThumb2]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+
+ bits<2> rot;
+ let Inst{5-4} = rot{1-0}; // rotate
}
// UXTB16 - Requres T2ExtractPack, does not need the .w qualifier.
-multiclass T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
- def r : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr,
- opc, "\t$Rd, $Rm",
- [(set rGPR:$Rd, (opnode rGPR:$Rm))]>,
- Requires<[HasT2ExtractPack, IsThumb2]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0100;
- let Inst{22-20} = opcod;
- let Inst{19-16} = 0b1111; // Rn
- let Inst{15-12} = 0b1111;
- let Inst{7} = 1;
- let Inst{5-4} = 0b00; // rotate
- }
- def r_rot : T2TwoReg<(outs rGPR:$dst), (ins rGPR:$Rm, rot_imm:$rot),
- IIC_iEXTr, opc, "\t$dst, $Rm, ror $rot",
- [(set rGPR:$dst, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>,
- Requires<[HasT2ExtractPack, IsThumb2]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0100;
- let Inst{22-20} = opcod;
- let Inst{19-16} = 0b1111; // Rn
- let Inst{15-12} = 0b1111;
- let Inst{7} = 1;
-
- bits<2> rot;
- let Inst{5-4} = rot{1-0}; // rotate
- }
+class T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode>
+ : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot),
+ IIC_iEXTr, opc, "\t$Rd, $Rm$rot",
+ [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
+ bits<2> rot;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = rot;
}
// SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern
// supported yet.
-multiclass T2I_ext_rrot_sxtb16<bits<3> opcod, string opc> {
- def r : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr,
- opc, "\t$Rd, $Rm", []>,
+class T2I_ext_rrot_sxtb16<bits<3> opcod, string opc>
+ : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr,
+ opc, "\t$Rd, $Rm$rot", []>,
Requires<[IsThumb2, HasT2ExtractPack]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0100;
- let Inst{22-20} = opcod;
- let Inst{19-16} = 0b1111; // Rn
- let Inst{15-12} = 0b1111;
- let Inst{7} = 1;
- let Inst{5-4} = 0b00; // rotate
- }
- def r_rot : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, i32imm:$rot), IIC_iEXTr,
- opc, "\t$Rd, $Rm, ror $rot", []>,
- Requires<[IsThumb2, HasT2ExtractPack]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0100;
- let Inst{22-20} = opcod;
- let Inst{19-16} = 0b1111; // Rn
- let Inst{15-12} = 0b1111;
- let Inst{7} = 1;
-
- bits<2> rot;
- let Inst{5-4} = rot{1-0}; // rotate
- }
+ bits<2> rot;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = rot;
}
/// T2I_exta_rrot - A binary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
-multiclass T2I_exta_rrot<bits<3> opcod, string opc, PatFrag opnode> {
- def rr : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iEXTAr,
- opc, "\t$Rd, $Rn, $Rm",
- [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
- Requires<[HasT2ExtractPack, IsThumb2]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0100;
- let Inst{22-20} = opcod;
- let Inst{15-12} = 0b1111;
- let Inst{7} = 1;
- let Inst{5-4} = 0b00; // rotate
- }
- def rr_rot : T2ThreeReg<(outs rGPR:$Rd),
- (ins rGPR:$Rn, rGPR:$Rm, rot_imm:$rot),
- IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm, ror $rot",
- [(set rGPR:$Rd, (opnode rGPR:$Rn,
- (rotr rGPR:$Rm, rot_imm:$rot)))]>,
- Requires<[HasT2ExtractPack, IsThumb2]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0100;
- let Inst{22-20} = opcod;
- let Inst{15-12} = 0b1111;
- let Inst{7} = 1;
-
- bits<2> rot;
- let Inst{5-4} = rot{1-0}; // rotate
- }
+class T2I_exta_rrot<bits<3> opcod, string opc, PatFrag opnode>
+ : T2ThreeReg<(outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rot_imm:$rot),
+ IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, (rotr rGPR:$Rm,rot_imm:$rot)))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
+ bits<2> rot;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = rot;
}
-// DO variant - disassembly only, no pattern
-
-multiclass T2I_exta_rrot_DO<bits<3> opcod, string opc> {
- def rr : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iEXTAr,
- opc, "\t$Rd, $Rn, $Rm", []> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0100;
- let Inst{22-20} = opcod;
- let Inst{15-12} = 0b1111;
- let Inst{7} = 1;
- let Inst{5-4} = 0b00; // rotate
- }
- def rr_rot :T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, i32imm:$rot),
- IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm, ror $rot", []> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0100;
- let Inst{22-20} = opcod;
- let Inst{15-12} = 0b1111;
- let Inst{7} = 1;
-
- bits<2> rot;
- let Inst{5-4} = rot{1-0}; // rotate
- }
+class T2I_exta_rrot_np<bits<3> opcod, string opc>
+ : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm,rot_imm:$rot),
+ IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot", []> {
+ bits<2> rot;
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = rot;
}
//===----------------------------------------------------------------------===//
@@ -1143,7 +1112,7 @@ class T2PCOneRegImm<dag oops, dag iops, InstrItinClass itin,
// assembler.
def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd),
(ins t2adrlabel:$addr, pred:$p),
- IIC_iALUi, "adr{$p}.w\t$Rd, #$addr", []> {
+ IIC_iALUi, "adr{$p}.w\t$Rd, $addr", []> {
let Inst{31-27} = 0b11110;
let Inst{25-24} = 0b10;
// Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE)
@@ -1160,6 +1129,8 @@ def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd),
let Inst{26} = addr{11};
let Inst{14-12} = addr{10-8};
let Inst{7-0} = addr{7-0};
+
+ let DecoderMethod = "DecodeT2Adr";
}
let neverHasSideEffects = 1, isReMaterializable = 1 in
@@ -1177,33 +1148,33 @@ def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd),
// Load
let canFoldAsLoad = 1, isReMaterializable = 1 in
-defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si,
+defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si, GPR,
UnOpFrag<(load node:$Src)>>;
// Loads with zero extension
defm t2LDRH : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
- UnOpFrag<(zextloadi16 node:$Src)>>;
+ rGPR, UnOpFrag<(zextloadi16 node:$Src)>>;
defm t2LDRB : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
- UnOpFrag<(zextloadi8 node:$Src)>>;
+ rGPR, UnOpFrag<(zextloadi8 node:$Src)>>;
// Loads with sign extension
defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
- UnOpFrag<(sextloadi16 node:$Src)>>;
+ rGPR, UnOpFrag<(sextloadi16 node:$Src)>>;
defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
- UnOpFrag<(sextloadi8 node:$Src)>>;
+ rGPR, UnOpFrag<(sextloadi8 node:$Src)>>;
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2),
(ins t2addrmode_imm8s4:$addr),
- IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", []>;
+ IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>;
} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
// zextload i1 -> zextload i8
def : T2Pat<(zextloadi1 t2addrmode_imm12:$addr),
(t2LDRBi12 t2addrmode_imm12:$addr)>;
-def : T2Pat<(zextloadi1 t2addrmode_imm8:$addr),
- (t2LDRBi8 t2addrmode_imm8:$addr)>;
+def : T2Pat<(zextloadi1 t2addrmode_negimm8:$addr),
+ (t2LDRBi8 t2addrmode_negimm8:$addr)>;
def : T2Pat<(zextloadi1 t2addrmode_so_reg:$addr),
(t2LDRBs t2addrmode_so_reg:$addr)>;
def : T2Pat<(zextloadi1 (ARMWrapper tconstpool:$addr)),
@@ -1214,8 +1185,8 @@ def : T2Pat<(zextloadi1 (ARMWrapper tconstpool:$addr)),
// earlier?
def : T2Pat<(extloadi1 t2addrmode_imm12:$addr),
(t2LDRBi12 t2addrmode_imm12:$addr)>;
-def : T2Pat<(extloadi1 t2addrmode_imm8:$addr),
- (t2LDRBi8 t2addrmode_imm8:$addr)>;
+def : T2Pat<(extloadi1 t2addrmode_negimm8:$addr),
+ (t2LDRBi8 t2addrmode_negimm8:$addr)>;
def : T2Pat<(extloadi1 t2addrmode_so_reg:$addr),
(t2LDRBs t2addrmode_so_reg:$addr)>;
def : T2Pat<(extloadi1 (ARMWrapper tconstpool:$addr)),
@@ -1223,8 +1194,8 @@ def : T2Pat<(extloadi1 (ARMWrapper tconstpool:$addr)),
def : T2Pat<(extloadi8 t2addrmode_imm12:$addr),
(t2LDRBi12 t2addrmode_imm12:$addr)>;
-def : T2Pat<(extloadi8 t2addrmode_imm8:$addr),
- (t2LDRBi8 t2addrmode_imm8:$addr)>;
+def : T2Pat<(extloadi8 t2addrmode_negimm8:$addr),
+ (t2LDRBi8 t2addrmode_negimm8:$addr)>;
def : T2Pat<(extloadi8 t2addrmode_so_reg:$addr),
(t2LDRBs t2addrmode_so_reg:$addr)>;
def : T2Pat<(extloadi8 (ARMWrapper tconstpool:$addr)),
@@ -1232,8 +1203,8 @@ def : T2Pat<(extloadi8 (ARMWrapper tconstpool:$addr)),
def : T2Pat<(extloadi16 t2addrmode_imm12:$addr),
(t2LDRHi12 t2addrmode_imm12:$addr)>;
-def : T2Pat<(extloadi16 t2addrmode_imm8:$addr),
- (t2LDRHi8 t2addrmode_imm8:$addr)>;
+def : T2Pat<(extloadi16 t2addrmode_negimm8:$addr),
+ (t2LDRHi8 t2addrmode_negimm8:$addr)>;
def : T2Pat<(extloadi16 t2addrmode_so_reg:$addr),
(t2LDRHs t2addrmode_so_reg:$addr)>;
def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)),
@@ -1247,83 +1218,86 @@ def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)),
// Indexed loads
let mayLoad = 1, neverHasSideEffects = 1 in {
-def t2LDR_PRE : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+def t2LDR_PRE : T2Ipreldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins t2addrmode_imm8:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_iu,
- "ldr", "\t$Rt, $addr!", "$addr.base = $Rn",
- []>;
+ "ldr", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
+ []> {
+ let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8";
+}
-def t2LDR_POST : T2Iidxldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn),
- (ins GPR:$base, t2am_imm8_offset:$addr),
- AddrModeT2_i8, IndexModePost, IIC_iLoad_iu,
- "ldr", "\t$Rt, [$Rn], $addr", "$base = $Rn",
- []>;
+def t2LDR_POST : T2Ipostldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_iu,
+ "ldr", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
-def t2LDRB_PRE : T2Iidxldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+def t2LDRB_PRE : T2Ipreldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins t2addrmode_imm8:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
- "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn",
- []>;
-def t2LDRB_POST : T2Iidxldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn),
- (ins GPR:$base, t2am_imm8_offset:$addr),
- AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
- "ldrb", "\t$Rt, [$Rn], $addr", "$base = $Rn",
- []>;
-
-def t2LDRH_PRE : T2Iidxldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+ "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
+ []> {
+ let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8";
+}
+def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+
+def t2LDRH_PRE : T2Ipreldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins t2addrmode_imm8:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
- "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn",
- []>;
-def t2LDRH_POST : T2Iidxldst<0, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn),
- (ins GPR:$base, t2am_imm8_offset:$addr),
- AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
- "ldrh", "\t$Rt, [$Rn], $addr", "$base = $Rn",
- []>;
-
-def t2LDRSB_PRE : T2Iidxldst<1, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+ "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
+ []> {
+ let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8";
+}
+def t2LDRH_POST : T2Ipostldst<0, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+
+def t2LDRSB_PRE : T2Ipreldst<1, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins t2addrmode_imm8:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
- "ldrsb", "\t$Rt, $addr!", "$addr.base = $Rn",
- []>;
-def t2LDRSB_POST : T2Iidxldst<1, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn),
- (ins GPR:$base, t2am_imm8_offset:$addr),
- AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
- "ldrsb", "\t$Rt, [$Rn], $addr", "$base = $Rn",
- []>;
-
-def t2LDRSH_PRE : T2Iidxldst<1, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+ "ldrsb", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
+ []> {
+ let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8";
+}
+def t2LDRSB_POST : T2Ipostldst<1, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrsb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+
+def t2LDRSH_PRE : T2Ipreldst<1, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins t2addrmode_imm8:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
- "ldrsh", "\t$Rt, $addr!", "$addr.base = $Rn",
- []>;
-def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$Rn),
- (ins GPR:$base, t2am_imm8_offset:$addr),
- AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
- "ldrsh", "\t$dst, [$Rn], $addr", "$base = $Rn",
- []>;
+ "ldrsh", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
+ []> {
+ let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8";
+}
+def t2LDRSH_POST : T2Ipostldst<1, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrsh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
} // mayLoad = 1, neverHasSideEffects = 1
-// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110) and are
-// for disassembly only.
+// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110).
// Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4
class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii>
- : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc,
+ : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_posimm8:$addr), ii, opc,
"\t$Rt, $addr", []> {
+ bits<4> Rt;
+ bits<13> addr;
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
let Inst{24} = signed;
let Inst{23} = 0;
let Inst{22-21} = type;
let Inst{20} = 1; // load
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = Rt;
let Inst{11} = 1;
let Inst{10-8} = 0b110; // PUW.
-
- bits<4> Rt;
- bits<13> addr;
- let Inst{15-12} = Rt;
- let Inst{19-16} = addr{12-9};
- let Inst{7-0} = addr{7-0};
+ let Inst{7-0} = addr{7-0};
}
def t2LDRT : T2IldT<0, 0b10, "ldrt", IIC_iLoad_i>;
@@ -1333,67 +1307,97 @@ def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt", IIC_iLoad_bh_i>;
def t2LDRSHT : T2IldT<1, 0b01, "ldrsht", IIC_iLoad_bh_i>;
// Store
-defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si,
+defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si, GPR,
BinOpFrag<(store node:$LHS, node:$RHS)>>;
defm t2STRB:T2I_st<0b00,"strb", IIC_iStore_bh_i, IIC_iStore_bh_si,
- BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+ rGPR, BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
- BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+ rGPR, BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
// Store doubleword
let mayLoad = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
(ins GPR:$Rt, GPR:$Rt2, t2addrmode_imm8s4:$addr),
- IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", []>;
+ IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>;
// Indexed stores
-def t2STR_PRE : T2Iidxldst<0, 0b10, 0, 1, (outs GPR:$base_wb),
- (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, t2addrmode_imm8:$addr),
AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
- "str", "\t$Rt, [$Rn, $addr]!",
- "$Rn = $base_wb,@earlyclobber $base_wb",
- [(set GPR:$base_wb,
- (pre_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
-
-def t2STR_POST : T2Iidxldst<0, 0b10, 0, 0, (outs GPR:$base_wb),
- (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
- AddrModeT2_i8, IndexModePost, IIC_iStore_iu,
- "str", "\t$Rt, [$Rn], $addr",
- "$Rn = $base_wb,@earlyclobber $base_wb",
- [(set GPR:$base_wb,
- (post_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
-
-def t2STRH_PRE : T2Iidxldst<0, 0b01, 0, 1, (outs GPR:$base_wb),
- (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+ "str", "\t$Rt, $addr!",
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
+ let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8";
+}
+def t2STRH_PRE : T2Ipreldst<0, 0b01, 0, 1, (outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, t2addrmode_imm8:$addr),
AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
- "strh", "\t$Rt, [$Rn, $addr]!",
- "$Rn = $base_wb,@earlyclobber $base_wb",
- [(set GPR:$base_wb,
- (pre_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
-
-def t2STRH_POST : T2Iidxldst<0, 0b01, 0, 0, (outs GPR:$base_wb),
- (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
- AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
- "strh", "\t$Rt, [$Rn], $addr",
- "$Rn = $base_wb,@earlyclobber $base_wb",
- [(set GPR:$base_wb,
- (post_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+ "strh", "\t$Rt, $addr!",
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
+ let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8";
+}
-def t2STRB_PRE : T2Iidxldst<0, 0b00, 0, 1, (outs GPR:$base_wb),
- (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, t2addrmode_imm8:$addr),
AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu,
- "strb", "\t$Rt, [$Rn, $addr]!",
- "$Rn = $base_wb,@earlyclobber $base_wb",
- [(set GPR:$base_wb,
- (pre_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+ "strb", "\t$Rt, $addr!",
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
+ let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8";
+}
-def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb),
- (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iStore_iu,
+ "str", "\t$Rt, $Rn$offset",
+ "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
+ [(set GPRnopc:$Rn_wb,
+ (post_store rGPR:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset))]>;
+
+def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset),
AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
- "strb", "\t$Rt, [$Rn], $addr",
- "$Rn = $base_wb,@earlyclobber $base_wb",
- [(set GPR:$base_wb,
- (post_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+ "strh", "\t$Rt, $Rn$offset",
+ "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
+ [(set GPRnopc:$Rn_wb,
+ (post_truncsti16 rGPR:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset))]>;
+
+def t2STRB_POST : T2Ipostldst<0, 0b00, 0, 0, (outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset),
+ AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
+ "strb", "\t$Rt, $Rn$offset",
+ "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
+ [(set GPRnopc:$Rn_wb,
+ (post_truncsti8 rGPR:$Rt, addr_offset_none:$Rn,
+ t2am_imm8_offset:$offset))]>;
+
+// Pseudo-instructions for pattern matching the pre-indexed stores. We can't
+// put the patterns on the instruction definitions directly as ISel wants
+// the address base and offset to be separate operands, not a single
+// complex operand like we represent the instructions themselves. The
+// pseudos map between the two.
+let usesCustomInserter = 1,
+ Constraints = "$Rn = $Rn_wb,@earlyclobber $Rn_wb" in {
+def t2STR_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPRnopc:$Rn_wb,
+ (pre_store rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>;
+def t2STRB_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPRnopc:$Rn_wb,
+ (pre_truncsti8 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>;
+def t2STRH_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
+ (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p),
+ 4, IIC_iStore_ru,
+ [(set GPRnopc:$Rn_wb,
+ (pre_truncsti16 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>;
+}
+
// STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly
// only.
@@ -1424,21 +1428,31 @@ def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>;
// ldrd / strd pre / post variants
// For disassembly only.
-def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2),
- (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru,
- "ldrd", "\t$Rt, $Rt2, [$base, $imm]!", []>;
+def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb),
+ (ins t2addrmode_imm8s4:$addr), IIC_iLoad_d_ru,
+ "ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []> {
+ let AsmMatchConverter = "cvtT2LdrdPre";
+ let DecoderMethod = "DecodeT2LDRDPreInstruction";
+}
-def t2LDRD_POST : T2Ii8s4<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2),
- (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru,
- "ldrd", "\t$Rt, $Rt2, [$base], $imm", []>;
+def t2LDRD_POST : T2Ii8s4post<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb),
+ (ins addr_offset_none:$addr, t2am_imm8s4_offset:$imm),
+ IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, $addr$imm",
+ "$addr.base = $wb", []>;
-def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs),
- (ins rGPR:$Rt, rGPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm),
- IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base, $imm]!", []>;
+def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs GPR:$wb),
+ (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr),
+ IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr!",
+ "$addr.base = $wb", []> {
+ let AsmMatchConverter = "cvtT2StrdPre";
+ let DecoderMethod = "DecodeT2STRDPreInstruction";
+}
-def t2STRD_POST : T2Ii8s4<0, 1, 0, (outs),
- (ins rGPR:$Rt, rGPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm),
- IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base], $imm", []>;
+def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb),
+ (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr,
+ t2am_imm8s4_offset:$imm),
+ IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr$imm",
+ "$addr.base = $wb", []>;
// T2Ipl (Preload Data/Instruction) signals the memory system of possible future
// data/instruction access. These are for disassembly only.
@@ -1463,9 +1477,9 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
let Inst{11-0} = addr{11-0}; // imm12
}
- def i8 : T2Ii8<(outs), (ins t2addrmode_imm8:$addr), IIC_Preload, opc,
+ def i8 : T2Ii8<(outs), (ins t2addrmode_negimm8:$addr), IIC_Preload, opc,
"\t$addr",
- [(ARMPreload t2addrmode_imm8:$addr, (i32 write), (i32 instr))]> {
+ [(ARMPreload t2addrmode_negimm8:$addr, (i32 write), (i32 instr))]> {
let Inst{31-25} = 0b1111100;
let Inst{24} = instr;
let Inst{23} = 0; // U = 0
@@ -1496,6 +1510,8 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
let Inst{19-16} = addr{9-6}; // Rn
let Inst{3-0} = addr{5-2}; // Rm
let Inst{5-4} = addr{1-0}; // imm2
+
+ let DecoderMethod = "DecodeT2LoadShift";
}
}
@@ -1507,11 +1523,11 @@ defm t2PLI : T2Ipl<0, 1, "pli">, Requires<[IsThumb2,HasV7]>;
// Load / store multiple Instructions.
//
-multiclass thumb2_ldst_mult<string asm, InstrItinClass itin,
+multiclass thumb2_ld_mult<string asm, InstrItinClass itin,
InstrItinClass itin_upd, bit L_bit> {
def IA :
T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
- itin, !strconcat(asm, "ia${p}.w\t$Rn, $regs"), []> {
+ itin, !strconcat(asm, "${p}.w\t$Rn, $regs"), []> {
bits<4> Rn;
bits<16> regs;
@@ -1522,11 +1538,12 @@ multiclass thumb2_ldst_mult<string asm, InstrItinClass itin,
let Inst{21} = 0; // No writeback
let Inst{20} = L_bit;
let Inst{19-16} = Rn;
- let Inst{15-0} = regs;
+ let Inst{15} = 0;
+ let Inst{14-0} = regs{14-0};
}
def IA_UPD :
T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
- itin_upd, !strconcat(asm, "ia${p}.w\t$Rn!, $regs"), "$Rn = $wb", []> {
+ itin_upd, !strconcat(asm, "${p}.w\t$Rn!, $regs"), "$Rn = $wb", []> {
bits<4> Rn;
bits<16> regs;
@@ -1537,11 +1554,12 @@ multiclass thumb2_ldst_mult<string asm, InstrItinClass itin,
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
let Inst{19-16} = Rn;
- let Inst{15-0} = regs;
+ let Inst{15} = 0;
+ let Inst{14-0} = regs{14-0};
}
def DB :
T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
- itin, !strconcat(asm, "db${p}.w\t$Rn, $regs"), []> {
+ itin, !strconcat(asm, "db${p}\t$Rn, $regs"), []> {
bits<4> Rn;
bits<16> regs;
@@ -1552,11 +1570,12 @@ multiclass thumb2_ldst_mult<string asm, InstrItinClass itin,
let Inst{21} = 0; // No writeback
let Inst{20} = L_bit;
let Inst{19-16} = Rn;
- let Inst{15-0} = regs;
+ let Inst{15} = 0;
+ let Inst{14-0} = regs{14-0};
}
def DB_UPD :
T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
- itin_upd, !strconcat(asm, "db${p}.w\t$Rn, $regs"), "$Rn = $wb", []> {
+ itin_upd, !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
bits<4> Rn;
bits<16> regs;
@@ -1567,17 +1586,95 @@ multiclass thumb2_ldst_mult<string asm, InstrItinClass itin,
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
let Inst{19-16} = Rn;
- let Inst{15-0} = regs;
+ let Inst{15} = 0;
+ let Inst{14-0} = regs{14-0};
}
}
let neverHasSideEffects = 1 in {
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
-defm t2LDM : thumb2_ldst_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu, 1>;
+defm t2LDM : thumb2_ld_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu, 1>;
+
+multiclass thumb2_st_mult<string asm, InstrItinClass itin,
+ InstrItinClass itin_upd, bit L_bit> {
+ def IA :
+ T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin, !strconcat(asm, "${p}.w\t$Rn, $regs"), []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = 0;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14} = regs{14};
+ let Inst{13} = 0;
+ let Inst{12-0} = regs{12-0};
+ }
+ def IA_UPD :
+ T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin_upd, !strconcat(asm, "${p}.w\t$Rn!, $regs"), "$Rn = $wb", []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = 0;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14} = regs{14};
+ let Inst{13} = 0;
+ let Inst{12-0} = regs{12-0};
+ }
+ def DB :
+ T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin, !strconcat(asm, "db${p}\t$Rn, $regs"), []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = 0;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14} = regs{14};
+ let Inst{13} = 0;
+ let Inst{12-0} = regs{12-0};
+ }
+ def DB_UPD :
+ T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin_upd, !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = 0;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15} = 0;
+ let Inst{14} = regs{14};
+ let Inst{13} = 0;
+ let Inst{12-0} = regs{12-0};
+ }
+}
+
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
-defm t2STM : thumb2_ldst_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>;
+defm t2STM : thumb2_st_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>;
} // neverHasSideEffects
@@ -1587,7 +1684,7 @@ defm t2STM : thumb2_ldst_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>;
//
let neverHasSideEffects = 1 in
-def t2MOVr : T2sTwoReg<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVr,
+def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPR:$Rm), IIC_iMOVr,
"mov", ".w\t$Rd, $Rm", []> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -1596,6 +1693,10 @@ def t2MOVr : T2sTwoReg<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVr,
let Inst{14-12} = 0b000;
let Inst{7-4} = 0b0000;
}
+def : t2InstAlias<"movs${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
+ pred:$p, CPSR)>;
+def : t2InstAlias<"movs${p} $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
+ pred:$p, CPSR)>;
// AddedComplexity to ensure isel tries t2MOVi before t2MOVi16.
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
@@ -1610,12 +1711,20 @@ def t2MOVi : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), IIC_iMOVi,
let Inst{15} = 0;
}
-def : InstAlias<"mov${s}${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm,
- pred:$p, cc_out:$s)>,
- Requires<[IsThumb2]>;
+// cc_out is handled as part of the explicit mnemonic in the parser for 'mov'.
+// Use aliases to get that to play nice here.
+def : t2InstAlias<"movs${p}.w $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm,
+ pred:$p, CPSR)>;
+def : t2InstAlias<"movs${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm,
+ pred:$p, CPSR)>;
+
+def : t2InstAlias<"mov${p}.w $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm,
+ pred:$p, zero_reg)>;
+def : t2InstAlias<"mov${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm,
+ pred:$p, zero_reg)>;
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
-def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins i32imm_hilo16:$imm), IIC_iMOVi,
+def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi,
"movw", "\t$Rd, $imm",
[(set rGPR:$Rd, imm0_65535:$imm)]> {
let Inst{31-27} = 0b11110;
@@ -1632,6 +1741,7 @@ def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins i32imm_hilo16:$imm), IIC_iMOVi,
let Inst{26} = imm{11};
let Inst{14-12} = imm{10-8};
let Inst{7-0} = imm{7-0};
+ let DecoderMethod = "DecodeT2MOVTWInstruction";
}
def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
@@ -1639,7 +1749,7 @@ def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
let Constraints = "$src = $Rd" in {
def t2MOVTi16 : T2I<(outs rGPR:$Rd),
- (ins rGPR:$src, i32imm_hilo16:$imm), IIC_iMOVi,
+ (ins rGPR:$src, imm0_65535_expr:$imm), IIC_iMOVi,
"movt", "\t$Rd, $imm",
[(set rGPR:$Rd,
(or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]> {
@@ -1657,6 +1767,7 @@ def t2MOVTi16 : T2I<(outs rGPR:$Rd),
let Inst{26} = imm{11};
let Inst{14-12} = imm{10-8};
let Inst{7-0} = imm{7-0};
+ let DecoderMethod = "DecodeT2MOVTWInstruction";
}
def t2MOVTi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
@@ -1671,28 +1782,26 @@ def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>;
// Sign extenders
-defm t2SXTB : T2I_ext_rrot<0b100, "sxtb",
+def t2SXTB : T2I_ext_rrot<0b100, "sxtb",
UnOpFrag<(sext_inreg node:$Src, i8)>>;
-defm t2SXTH : T2I_ext_rrot<0b000, "sxth",
+def t2SXTH : T2I_ext_rrot<0b000, "sxth",
UnOpFrag<(sext_inreg node:$Src, i16)>>;
-defm t2SXTB16 : T2I_ext_rrot_sxtb16<0b010, "sxtb16">;
+def t2SXTB16 : T2I_ext_rrot_sxtb16<0b010, "sxtb16">;
-defm t2SXTAB : T2I_exta_rrot<0b100, "sxtab",
+def t2SXTAB : T2I_exta_rrot<0b100, "sxtab",
BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
-defm t2SXTAH : T2I_exta_rrot<0b000, "sxtah",
+def t2SXTAH : T2I_exta_rrot<0b000, "sxtah",
BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
-defm t2SXTAB16 : T2I_exta_rrot_DO<0b010, "sxtab16">;
-
-// TODO: SXT(A){B|H}16 - done for disassembly only
+def t2SXTAB16 : T2I_exta_rrot_np<0b010, "sxtab16">;
// Zero extenders
let AddedComplexity = 16 in {
-defm t2UXTB : T2I_ext_rrot<0b101, "uxtb",
+def t2UXTB : T2I_ext_rrot<0b101, "uxtb",
UnOpFrag<(and node:$Src, 0x000000FF)>>;
-defm t2UXTH : T2I_ext_rrot<0b001, "uxth",
+def t2UXTH : T2I_ext_rrot<0b001, "uxth",
UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
-defm t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16",
+def t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16",
UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
@@ -1700,17 +1809,17 @@ defm t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16",
// instead so we can include a check for masking back in the upper
// eight bits of the source into the lower eight bits of the result.
//def : T2Pat<(and (shl rGPR:$Src, (i32 8)), 0xFF00FF),
-// (t2UXTB16r_rot rGPR:$Src, 24)>,
+// (t2UXTB16 rGPR:$Src, 3)>,
// Requires<[HasT2ExtractPack, IsThumb2]>;
def : T2Pat<(and (srl rGPR:$Src, (i32 8)), 0xFF00FF),
- (t2UXTB16r_rot rGPR:$Src, 8)>,
+ (t2UXTB16 rGPR:$Src, 1)>,
Requires<[HasT2ExtractPack, IsThumb2]>;
-defm t2UXTAB : T2I_exta_rrot<0b101, "uxtab",
+def t2UXTAB : T2I_exta_rrot<0b101, "uxtab",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
-defm t2UXTAH : T2I_exta_rrot<0b001, "uxtah",
+def t2UXTAH : T2I_exta_rrot<0b001, "uxtah",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
-defm t2UXTAB16 : T2I_exta_rrot_DO<0b011, "uxtab16">;
+def t2UXTAB16 : T2I_exta_rrot_np<0b011, "uxtab16">;
}
//===----------------------------------------------------------------------===//
@@ -1723,27 +1832,37 @@ defm t2SUB : T2I_bin_ii12rs<0b101, "sub",
BinOpFrag<(sub node:$LHS, node:$RHS)>>;
// ADD and SUB with 's' bit set. No 12-bit immediate (T4) variants.
+//
+// Currently, t2ADDS/t2SUBS are pseudo opcodes that exist only in the
+// selection DAG. They are "lowered" to real t2ADD/t2SUB opcodes by
+// AdjustInstrPostInstrSelection where we determine whether or not to
+// set the "s" bit based on CPSR liveness.
+//
+// FIXME: Eliminate t2ADDS/t2SUBS pseudo opcodes after adding tablegen
+// support for an optional CPSR definition that corresponds to the DAG
+// node's second value. We can then eliminate the implicit def of CPSR.
defm t2ADDS : T2I_bin_s_irs <0b1000, "add",
IIC_iALUi, IIC_iALUr, IIC_iALUsi,
- BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>;
+ BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>;
defm t2SUBS : T2I_bin_s_irs <0b1101, "sub",
IIC_iALUi, IIC_iALUr, IIC_iALUsi,
- BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+ BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
+let hasPostISelHook = 1 in {
defm t2ADC : T2I_adde_sube_irs<0b1010, "adc",
- BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>;
+ BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>, 1>;
defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc",
- BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>;
-defm t2ADCS : T2I_adde_sube_s_irs<BinOpFrag<(adde_live_carry node:$LHS,
- node:$RHS)>, 1>;
-defm t2SBCS : T2I_adde_sube_s_irs<BinOpFrag<(sube_live_carry node:$LHS,
- node:$RHS)>>;
+ BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>;
+}
// RSB
defm t2RSB : T2I_rbin_irs <0b1110, "rsb",
BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+
+// FIXME: Eliminate them if we can write def : Pat patterns which defines
+// CPSR and the implicit def of CPSR is not needed.
defm t2RSBS : T2I_rbin_s_is <0b1110, "rsb",
- BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+ BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
// The assume-no-carry-in form uses the negation of the input since add/sub
@@ -1760,23 +1879,18 @@ def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm),
def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm),
(t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>;
let AddedComplexity = 1 in
-def : T2Pat<(addc rGPR:$src, imm0_255_neg:$imm),
+def : T2Pat<(ARMaddc rGPR:$src, imm0_255_neg:$imm),
(t2SUBSri rGPR:$src, imm0_255_neg:$imm)>;
-def : T2Pat<(addc rGPR:$src, t2_so_imm_neg:$imm),
+def : T2Pat<(ARMaddc rGPR:$src, t2_so_imm_neg:$imm),
(t2SUBSri rGPR:$src, t2_so_imm_neg:$imm)>;
// The with-carry-in form matches bitwise not instead of the negation.
// Effectively, the inverse interpretation of the carry flag already accounts
// for part of the negation.
let AddedComplexity = 1 in
-def : T2Pat<(adde_dead_carry rGPR:$src, imm0_255_not:$imm),
+def : T2Pat<(ARMadde rGPR:$src, imm0_255_not:$imm, CPSR),
(t2SBCri rGPR:$src, imm0_255_not:$imm)>;
-def : T2Pat<(adde_dead_carry rGPR:$src, t2_so_imm_not:$imm),
+def : T2Pat<(ARMadde rGPR:$src, t2_so_imm_not:$imm, CPSR),
(t2SBCri rGPR:$src, t2_so_imm_not:$imm)>;
-let AddedComplexity = 1 in
-def : T2Pat<(adde_live_carry rGPR:$src, imm0_255_not:$imm),
- (t2SBCSri rGPR:$src, imm0_255_not:$imm)>;
-def : T2Pat<(adde_live_carry rGPR:$src, t2_so_imm_not:$imm),
- (t2SBCSri rGPR:$src, t2_so_imm_not:$imm)>;
// Select Bytes -- for disassembly only
@@ -1893,8 +2007,7 @@ class T2FourReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops,
let Inst{7-4} = op7_4;
}
-// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only
-
+// Unsigned Sum of Absolute Differences [and Accumulate].
def t2USAD8 : T2ThreeReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm),
NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []>,
@@ -1906,8 +2019,7 @@ def t2USADA8 : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
"usada8", "\t$Rd, $Rn, $Rm, $Ra", []>,
Requires<[IsThumb2, HasThumb2DSP]>;
-// Signed/Unsigned saturate -- for disassembly only
-
+// Signed/Unsigned saturate.
class T2SatI<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: T2I<oops, iops, itin, opc, asm, pattern> {
@@ -1918,26 +2030,26 @@ class T2SatI<dag oops, dag iops, InstrItinClass itin,
let Inst{11-8} = Rd;
let Inst{19-16} = Rn;
- let Inst{4-0} = sat_imm{4-0};
- let Inst{21} = sh{6};
+ let Inst{4-0} = sat_imm;
+ let Inst{21} = sh{5};
let Inst{14-12} = sh{4-2};
let Inst{7-6} = sh{1-0};
}
def t2SSAT: T2SatI<
- (outs rGPR:$Rd), (ins ssat_imm:$sat_imm, rGPR:$Rn, shift_imm:$sh),
- NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh",
- [/* For disassembly only; pattern left blank */]> {
+ (outs rGPR:$Rd),
+ (ins imm1_32:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
+ NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []> {
let Inst{31-27} = 0b11110;
let Inst{25-22} = 0b1100;
let Inst{20} = 0;
let Inst{15} = 0;
+ let Inst{5} = 0;
}
def t2SSAT16: T2SatI<
- (outs rGPR:$Rd), (ins ssat_imm:$sat_imm, rGPR:$Rn), NoItinerary,
- "ssat16", "\t$Rd, $sat_imm, $Rn",
- [/* For disassembly only; pattern left blank */]>,
+ (outs rGPR:$Rd), (ins imm1_16:$sat_imm, rGPR:$Rn), NoItinerary,
+ "ssat16", "\t$Rd, $sat_imm, $Rn", []>,
Requires<[IsThumb2, HasThumb2DSP]> {
let Inst{31-27} = 0b11110;
let Inst{25-22} = 0b1100;
@@ -1946,30 +2058,30 @@ def t2SSAT16: T2SatI<
let Inst{21} = 1; // sh = '1'
let Inst{14-12} = 0b000; // imm3 = '000'
let Inst{7-6} = 0b00; // imm2 = '00'
+ let Inst{5-4} = 0b00;
}
def t2USAT: T2SatI<
- (outs rGPR:$Rd), (ins i32imm:$sat_imm, rGPR:$Rn, shift_imm:$sh),
- NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh",
- [/* For disassembly only; pattern left blank */]> {
+ (outs rGPR:$Rd),
+ (ins imm0_31:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
+ NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []> {
let Inst{31-27} = 0b11110;
let Inst{25-22} = 0b1110;
let Inst{20} = 0;
let Inst{15} = 0;
}
-def t2USAT16: T2SatI<(outs rGPR:$dst), (ins i32imm:$sat_imm, rGPR:$Rn),
+def t2USAT16: T2SatI<(outs rGPR:$Rd), (ins imm0_15:$sat_imm, rGPR:$Rn),
NoItinerary,
- "usat16", "\t$dst, $sat_imm, $Rn",
- [/* For disassembly only; pattern left blank */]>,
+ "usat16", "\t$Rd, $sat_imm, $Rn", []>,
Requires<[IsThumb2, HasThumb2DSP]> {
- let Inst{31-27} = 0b11110;
- let Inst{25-22} = 0b1110;
+ let Inst{31-22} = 0b1111001110;
let Inst{20} = 0;
let Inst{15} = 0;
let Inst{21} = 1; // sh = '1'
let Inst{14-12} = 0b000; // imm3 = '000'
let Inst{7-6} = 0b00; // imm2 = '00'
+ let Inst{5-4} = 0b00;
}
def : T2Pat<(int_arm_ssat GPR:$a, imm:$pos), (t2SSAT imm:$pos, GPR:$a, 0)>;
@@ -1979,10 +2091,14 @@ def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>;
// Shift and rotate Instructions.
//
-defm t2LSL : T2I_sh_ir<0b00, "lsl", BinOpFrag<(shl node:$LHS, node:$RHS)>>;
-defm t2LSR : T2I_sh_ir<0b01, "lsr", BinOpFrag<(srl node:$LHS, node:$RHS)>>;
-defm t2ASR : T2I_sh_ir<0b10, "asr", BinOpFrag<(sra node:$LHS, node:$RHS)>>;
-defm t2ROR : T2I_sh_ir<0b11, "ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>;
+defm t2LSL : T2I_sh_ir<0b00, "lsl", imm0_31,
+ BinOpFrag<(shl node:$LHS, node:$RHS)>, "t2LSL">;
+defm t2LSR : T2I_sh_ir<0b01, "lsr", imm_sr,
+ BinOpFrag<(srl node:$LHS, node:$RHS)>, "t2LSR">;
+defm t2ASR : T2I_sh_ir<0b10, "asr", imm_sr,
+ BinOpFrag<(sra node:$LHS, node:$RHS)>, "t2ASR">;
+defm t2ROR : T2I_sh_ir<0b11, "ror", imm0_31,
+ BinOpFrag<(rotr node:$LHS, node:$RHS)>, "t2ROR">;
// (rotr x, (and y, 0x...1f)) ==> (ROR x, y)
def : Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),
@@ -2090,7 +2206,7 @@ def t2BFC : T2BitFI<(outs rGPR:$Rd), (ins rGPR:$src, bf_inv_mask_imm:$imm),
}
def t2SBFX: T2TwoRegBitFI<
- (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm0_31_m1:$msb),
+ (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm1_32:$msb),
IIC_iUNAsi, "sbfx", "\t$Rd, $Rn, $lsb, $msb", []> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
@@ -2099,7 +2215,7 @@ def t2SBFX: T2TwoRegBitFI<
}
def t2UBFX: T2TwoRegBitFI<
- (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm0_31_m1:$msb),
+ (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm1_32:$msb),
IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $msb", []> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
@@ -2125,26 +2241,6 @@ let Constraints = "$src = $Rd" in {
let msb{4-0} = imm{9-5};
let lsb{4-0} = imm{4-0};
}
-
- // GNU as only supports this form of bfi (w/ 4 arguments)
- let isAsmParserOnly = 1 in
- def t2BFI4p : T2TwoRegBitFI<(outs rGPR:$Rd),
- (ins rGPR:$src, rGPR:$Rn, lsb_pos_imm:$lsbit,
- width_imm:$width),
- IIC_iBITi, "bfi", "\t$Rd, $Rn, $lsbit, $width",
- []> {
- let Inst{31-27} = 0b11110;
- let Inst{26} = 0; // should be 0.
- let Inst{25} = 1;
- let Inst{24-20} = 0b10110;
- let Inst{15} = 0;
- let Inst{5} = 0; // should be 0.
-
- bits<5> lsbit;
- bits<5> width;
- let msb{4-0} = width; // Custom encoder => lsb+width-1
- let lsb{4-0} = lsbit;
- }
}
defm t2ORN : T2I_bin_irs<0b0011, "orn",
@@ -2152,13 +2248,53 @@ defm t2ORN : T2I_bin_irs<0b0011, "orn",
BinOpFrag<(or node:$LHS, (not node:$RHS))>,
"t2ORN", 0, "">;
+/// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
+/// unary operation that produces a value. These are predicable and can be
+/// changed to modify CPSR.
+multiclass T2I_un_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Cheap = 0, bit ReMat = 0> {
+ // shifted imm
+ def i : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), iii,
+ opc, "\t$Rd, $imm",
+ [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]> {
+ let isAsCheapAsAMove = Cheap;
+ let isReMaterializable = ReMat;
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0;
+ }
+ // register
+ def r : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), iir,
+ opc, ".w\t$Rd, $Rm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
+ // shifted register
+ def s : T2sOneRegShiftedReg<(outs rGPR:$Rd), (ins t2_so_reg:$ShiftedRm), iis,
+ opc, ".w\t$Rd, $ShiftedRm",
+ [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm))]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ }
+}
+
// Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version
let AddedComplexity = 1 in
defm t2MVN : T2I_un_irs <0b0011, "mvn",
IIC_iMVNi, IIC_iMVNr, IIC_iMVNsi,
UnOpFrag<(not node:$Src)>, 1, 1>;
-
let AddedComplexity = 1 in
def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm),
(t2BICri rGPR:$src, t2_so_imm_not:$imm)>;
@@ -2209,9 +2345,9 @@ def t2MLS: T2FourReg<
let neverHasSideEffects = 1 in {
let isCommutable = 1 in {
def t2SMULL : T2MulLong<0b000, 0b0000,
- (outs rGPR:$Rd, rGPR:$Ra),
+ (outs rGPR:$RdLo, rGPR:$RdHi),
(ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64,
- "smull", "\t$Rd, $Ra, $Rn, $Rm", []>;
+ "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
def t2UMULL : T2MulLong<0b010, 0b0000,
(outs rGPR:$RdLo, rGPR:$RdHi),
@@ -2468,7 +2604,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
-// Halfword multiple accumulate long: SMLAL<x><y> -- for disassembly only
+// Halfword multiple accumulate long: SMLAL<x><y>
def t2SMLALBB : T2FourReg_mac<1, 0b100, 0b1000, (outs rGPR:$Ra,rGPR:$Rd),
(ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbb", "\t$Ra, $Rd, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>,
@@ -2487,8 +2623,6 @@ def t2SMLALTT : T2FourReg_mac<1, 0b100, 0b1011, (outs rGPR:$Ra,rGPR:$Rd),
Requires<[IsThumb2, HasThumb2DSP]>;
// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
-// These are for disassembly only.
-
def t2SMUAD: T2ThreeReg_mac<
0, 0b010, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
IIC_iMAC32, "smuad", "\t$Rd, $Rn, $Rm", []>,
@@ -2513,7 +2647,7 @@ def t2SMUSDX:T2ThreeReg_mac<
Requires<[IsThumb2, HasThumb2DSP]> {
let Inst{15-12} = 0b1111;
}
-def t2SMLAD : T2ThreeReg_mac<
+def t2SMLAD : T2FourReg_mac<
0, 0b010, 0b0000, (outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlad",
"\t$Rd, $Rn, $Rm, $Ra", []>,
@@ -2532,20 +2666,20 @@ def t2SMLSDX : T2FourReg_mac<0, 0b100, 0b0001, (outs rGPR:$Rd),
"\t$Rd, $Rn, $Rm, $Ra", []>,
Requires<[IsThumb2, HasThumb2DSP]>;
def t2SMLALD : T2FourReg_mac<1, 0b100, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
- (ins rGPR:$Rm, rGPR:$Rn), IIC_iMAC64, "smlald",
- "\t$Ra, $Rd, $Rm, $Rn", []>,
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, "smlald",
+ "\t$Ra, $Rd, $Rn, $Rm", []>,
Requires<[IsThumb2, HasThumb2DSP]>;
def t2SMLALDX : T2FourReg_mac<1, 0b100, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
- (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlaldx",
- "\t$Ra, $Rd, $Rm, $Rn", []>,
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaldx",
+ "\t$Ra, $Rd, $Rn, $Rm", []>,
Requires<[IsThumb2, HasThumb2DSP]>;
def t2SMLSLD : T2FourReg_mac<1, 0b101, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
- (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsld",
- "\t$Ra, $Rd, $Rm, $Rn", []>,
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlsld",
+ "\t$Ra, $Rd, $Rn, $Rm", []>,
Requires<[IsThumb2, HasThumb2DSP]>;
def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
(ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsldx",
- "\t$Ra, $Rd, $Rm, $Rn", []>,
+ "\t$Ra, $Rd, $Rn, $Rm", []>,
Requires<[IsThumb2, HasThumb2DSP]>;
//===----------------------------------------------------------------------===//
@@ -2613,10 +2747,10 @@ def : T2Pat<(or (sra (shl rGPR:$Rm, (i32 24)), (i32 16)),
(t2REVSH rGPR:$Rm)>;
def t2PKHBT : T2ThreeReg<
- (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh),
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, pkh_lsl_amt:$sh),
IIC_iBITsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh",
[(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF),
- (and (shl rGPR:$Rm, lsl_amt:$sh),
+ (and (shl rGPR:$Rm, pkh_lsl_amt:$sh),
0xFFFF0000)))]>,
Requires<[HasT2ExtractPack, IsThumb2]> {
let Inst{31-27} = 0b11101;
@@ -2625,9 +2759,9 @@ def t2PKHBT : T2ThreeReg<
let Inst{5} = 0; // BT form
let Inst{4} = 0;
- bits<8> sh;
- let Inst{14-12} = sh{7-5};
- let Inst{7-6} = sh{4-3};
+ bits<5> sh;
+ let Inst{14-12} = sh{4-2};
+ let Inst{7-6} = sh{1-0};
}
// Alternate cases for PKHBT where identities eliminate some nodes.
@@ -2635,16 +2769,16 @@ def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (and rGPR:$src2, 0xFFFF0000)),
(t2PKHBT rGPR:$src1, rGPR:$src2, 0)>,
Requires<[HasT2ExtractPack, IsThumb2]>;
def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (shl rGPR:$src2, imm16_31:$sh)),
- (t2PKHBT rGPR:$src1, rGPR:$src2, (lsl_shift_imm imm16_31:$sh))>,
+ (t2PKHBT rGPR:$src1, rGPR:$src2, imm16_31:$sh)>,
Requires<[HasT2ExtractPack, IsThumb2]>;
// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
// will match the pattern below.
def t2PKHTB : T2ThreeReg<
- (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh),
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, pkh_asr_amt:$sh),
IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh",
[(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF0000),
- (and (sra rGPR:$Rm, asr_amt:$sh),
+ (and (sra rGPR:$Rm, pkh_asr_amt:$sh),
0xFFFF)))]>,
Requires<[HasT2ExtractPack, IsThumb2]> {
let Inst{31-27} = 0b11101;
@@ -2653,19 +2787,19 @@ def t2PKHTB : T2ThreeReg<
let Inst{5} = 1; // TB form
let Inst{4} = 0;
- bits<8> sh;
- let Inst{14-12} = sh{7-5};
- let Inst{7-6} = sh{4-3};
+ bits<5> sh;
+ let Inst{14-12} = sh{4-2};
+ let Inst{7-6} = sh{1-0};
}
// Alternate cases for PKHTB where identities eliminate some nodes. Note that
// a shift amount of 0 is *not legal* here, it is PKHBT instead.
def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16_31:$sh)),
- (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm16_31:$sh))>,
+ (t2PKHTB rGPR:$src1, rGPR:$src2, imm16_31:$sh)>,
Requires<[HasT2ExtractPack, IsThumb2]>;
def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
(and (srl rGPR:$src2, imm1_15:$sh), 0xFFFF)),
- (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm1_15:$sh))>,
+ (t2PKHTB rGPR:$src1, rGPR:$src2, imm1_15:$sh)>,
Requires<[HasT2ExtractPack, IsThumb2]>;
//===----------------------------------------------------------------------===//
@@ -2673,14 +2807,14 @@ def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
//
defm t2CMP : T2I_cmp_irs<0b1101, "cmp",
IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
- BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
+ BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>, "t2CMP">;
-def : T2Pat<(ARMcmpZ GPR:$lhs, t2_so_imm:$imm),
- (t2CMPri GPR:$lhs, t2_so_imm:$imm)>;
-def : T2Pat<(ARMcmpZ GPR:$lhs, rGPR:$rhs),
- (t2CMPrr GPR:$lhs, rGPR:$rhs)>;
-def : T2Pat<(ARMcmpZ GPR:$lhs, t2_so_reg:$rhs),
- (t2CMPrs GPR:$lhs, t2_so_reg:$rhs)>;
+def : T2Pat<(ARMcmpZ GPRnopc:$lhs, t2_so_imm:$imm),
+ (t2CMPri GPRnopc:$lhs, t2_so_imm:$imm)>;
+def : T2Pat<(ARMcmpZ GPRnopc:$lhs, rGPR:$rhs),
+ (t2CMPrr GPRnopc:$lhs, rGPR:$rhs)>;
+def : T2Pat<(ARMcmpZ GPRnopc:$lhs, t2_so_reg:$rhs),
+ (t2CMPrs GPRnopc:$lhs, t2_so_reg:$rhs)>;
//FIXME: Disable CMN, as CCodes are backwards from compare expectations
// Compare-to-zero still works out, just not the relationals
@@ -2688,20 +2822,23 @@ def : T2Pat<(ARMcmpZ GPR:$lhs, t2_so_reg:$rhs),
// BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
defm t2CMNz : T2I_cmp_irs<0b1000, "cmn",
IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
- BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>;
+ BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>,
+ "t2CMNz">;
//def : T2Pat<(ARMcmp GPR:$src, t2_so_imm_neg:$imm),
// (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>;
-def : T2Pat<(ARMcmpZ GPR:$src, t2_so_imm_neg:$imm),
- (t2CMNzri GPR:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(ARMcmpZ GPRnopc:$src, t2_so_imm_neg:$imm),
+ (t2CMNzri GPRnopc:$src, t2_so_imm_neg:$imm)>;
defm t2TST : T2I_cmp_irs<0b0000, "tst",
IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
- BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>>;
+ BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>,
+ "t2TST">;
defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
- BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>;
+ BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>,
+ "t2TEQ">;
// Conditional moves
// FIXME: should be able to write a pattern for ARMcmov, but can't use
@@ -2723,7 +2860,7 @@ def t2MOVCCi : t2PseudoInst<(outs rGPR:$Rd),
// FIXME: Pseudo-ize these. For now, just mark codegen only.
let isCodeGenOnly = 1 in {
let isMoveImm = 1 in
-def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, i32imm_hilo16:$imm),
+def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, imm0_65535_expr:$imm),
IIC_iCMOVi,
"movw", "\t$Rd, $imm", []>,
RegConstraint<"$false = $Rd"> {
@@ -2807,20 +2944,19 @@ def t2DMB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
}
def t2DSB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
- "dsb", "\t$opt",
- [/* For disassembly only; pattern left blank */]>,
+ "dsb", "\t$opt", []>,
Requires<[IsThumb, HasDB]> {
bits<4> opt;
let Inst{31-4} = 0xf3bf8f4;
let Inst{3-0} = opt;
}
-// ISB has only full system option -- for disassembly only
-def t2ISB : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "isb", "",
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsThumb2, HasV7]> {
+def t2ISB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
+ "isb", "\t$opt",
+ []>, Requires<[IsThumb2, HasDB]> {
+ bits<4> opt;
let Inst{31-4} = 0xf3bf8f6;
- let Inst{3-0} = 0b1111;
+ let Inst{3-0} = opt;
}
class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz,
@@ -2858,28 +2994,27 @@ class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz,
}
let mayLoad = 1 in {
-def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr),
+def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"ldrexb", "\t$Rt, $addr", "", []>;
-def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr),
+def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"ldrexh", "\t$Rt, $addr", "", []>;
-def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_reg:$addr),
+def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr),
AddrModeNone, 4, NoItinerary,
"ldrex", "\t$Rt, $addr", "", []> {
+ bits<4> Rt;
+ bits<12> addr;
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0000101;
- let Inst{11-8} = 0b1111;
- let Inst{7-0} = 0b00000000; // imm8 = 0
-
- bits<4> Rt;
- bits<4> addr;
- let Inst{19-16} = addr;
+ let Inst{19-16} = addr{11-8};
let Inst{15-12} = Rt;
+ let Inst{11-8} = 0b1111;
+ let Inst{7-0} = addr{7-0};
}
let hasExtraDefRegAllocReq = 1 in
def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2),
- (ins t2addrmode_reg:$addr),
+ (ins addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"ldrexd", "\t$Rt, $Rt2, $addr", "",
[], {?, ?, ?, ?}> {
@@ -2890,33 +3025,33 @@ def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2),
let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd),
- (ins rGPR:$Rt, t2addrmode_reg:$addr),
+ (ins rGPR:$Rt, addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"strexb", "\t$Rd, $Rt, $addr", "", []>;
def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd),
- (ins rGPR:$Rt, t2addrmode_reg:$addr),
+ (ins rGPR:$Rt, addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"strexh", "\t$Rd, $Rt, $addr", "", []>;
-def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr),
+def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
+ t2addrmode_imm0_1020s4:$addr),
AddrModeNone, 4, NoItinerary,
"strex", "\t$Rd, $Rt, $addr", "",
[]> {
- let Inst{31-27} = 0b11101;
- let Inst{26-20} = 0b0000100;
- let Inst{7-0} = 0b00000000; // imm8 = 0
-
bits<4> Rd;
- bits<4> addr;
bits<4> Rt;
- let Inst{11-8} = Rd;
- let Inst{19-16} = addr;
+ bits<12> addr;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0000100;
+ let Inst{19-16} = addr{11-8};
let Inst{15-12} = Rt;
+ let Inst{11-8} = Rd;
+ let Inst{7-0} = addr{7-0};
}
}
let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in
def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd),
- (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_reg:$addr),
+ (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [],
{?, ?, ?, ?}> {
@@ -2924,9 +3059,7 @@ def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd),
let Inst{11-8} = Rt2;
}
-// Clear-Exclusive is for disassembly only.
-def t2CLREX : T2XI<(outs), (ins), NoItinerary, "clrex",
- [/* For disassembly only; pattern left blank */]>,
+def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>,
Requires<[IsThumb2, HasV7]> {
let Inst{31-16} = 0xf3bf;
let Inst{15-14} = 0b10;
@@ -2986,8 +3119,8 @@ def t2LDMIA_RET: t2PseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
let isPredicable = 1 in
-def t2B : T2XI<(outs), (ins uncondbrtarget:$target), IIC_Br,
- "b.w\t$target",
+def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br,
+ "b", ".w\t$target",
[(br bb:$target)]> {
let Inst{31-27} = 0b11110;
let Inst{15-14} = 0b10;
@@ -3009,15 +3142,13 @@ def t2BR_JT : t2PseudoInst<(outs),
// FIXME: Add a non-pc based case that can be predicated.
def t2TBB_JT : t2PseudoInst<(outs),
- (ins GPR:$index, i32imm:$jt, i32imm:$id),
- 0, IIC_Br, []>;
+ (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>;
def t2TBH_JT : t2PseudoInst<(outs),
- (ins GPR:$index, i32imm:$jt, i32imm:$id),
- 0, IIC_Br, []>;
+ (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>;
-def t2TBB : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br,
- "tbb", "\t[$Rn, $Rm]", []> {
+def t2TBB : T2I<(outs), (ins addrmode_tbb:$addr), IIC_Br,
+ "tbb", "\t$addr", []> {
bits<4> Rn;
bits<4> Rm;
let Inst{31-20} = 0b111010001101;
@@ -3025,10 +3156,12 @@ def t2TBB : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br,
let Inst{15-5} = 0b11110000000;
let Inst{4} = 0; // B form
let Inst{3-0} = Rm;
+
+ let DecoderMethod = "DecodeThumbTableBranch";
}
-def t2TBH : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br,
- "tbh", "\t[$Rn, $Rm, lsl #1]", []> {
+def t2TBH : T2I<(outs), (ins addrmode_tbh:$addr), IIC_Br,
+ "tbh", "\t$addr", []> {
bits<4> Rn;
bits<4> Rm;
let Inst{31-20} = 0b111010001101;
@@ -3036,13 +3169,15 @@ def t2TBH : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br,
let Inst{15-5} = 0b11110000000;
let Inst{4} = 1; // H form
let Inst{3-0} = Rm;
+
+ let DecoderMethod = "DecodeThumbTableBranch";
}
} // isNotDuplicable, isIndirectBranch
} // isBranch, isTerminator, isBarrier
// FIXME: should be able to write a pattern for ARMBrcond, but can't use
-// a two-value operand where a dag node expects two operands. :(
+// a two-value operand where a dag node expects ", "two operands. :(
let isBranch = 1, isTerminator = 1 in
def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
"b", ".w\t$target",
@@ -3060,6 +3195,8 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
let Inst{13} = target{18};
let Inst{21-16} = target{17-12};
let Inst{10-0} = target{11-1};
+
+ let DecoderMethod = "DecodeThumb2BCCInstruction";
}
// Tail calls. The Darwin version of thumb tail calls uses a t2 branch, so
@@ -3068,9 +3205,10 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
// Darwin version.
let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
Uses = [SP] in
- def tTAILJMPd: tPseudoExpand<(outs), (ins uncondbrtarget:$dst, variable_ops),
+ def tTAILJMPd: tPseudoExpand<(outs),
+ (ins uncondbrtarget:$dst, pred:$p, variable_ops),
4, IIC_Br, [],
- (t2B uncondbrtarget:$dst)>,
+ (t2B uncondbrtarget:$dst, pred:$p)>,
Requires<[IsThumb2, IsDarwin]>;
}
@@ -3087,30 +3225,55 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
bits<4> mask;
let Inst{7-4} = cc;
let Inst{3-0} = mask;
+
+ let DecoderMethod = "DecodeIT";
}
// Branch and Exchange Jazelle -- for disassembly only
// Rm = Inst{19-16}
-def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func",
- [/* For disassembly only; pattern left blank */]> {
+def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", []> {
+ bits<4> func;
let Inst{31-27} = 0b11110;
let Inst{26} = 0;
let Inst{25-20} = 0b111100;
- let Inst{15-14} = 0b10;
- let Inst{12} = 0;
-
- bits<4> func;
let Inst{19-16} = func;
+ let Inst{15-0} = 0b1000111100000000;
+}
+
+// Compare and branch on zero / non-zero
+let isBranch = 1, isTerminator = 1 in {
+ def tCBZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br,
+ "cbz\t$Rn, $target", []>,
+ T1Misc<{0,0,?,1,?,?,?}>,
+ Requires<[IsThumb2]> {
+ // A8.6.27
+ bits<6> target;
+ bits<3> Rn;
+ let Inst{9} = target{5};
+ let Inst{7-3} = target{4-0};
+ let Inst{2-0} = Rn;
+ }
+
+ def tCBNZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br,
+ "cbnz\t$Rn, $target", []>,
+ T1Misc<{1,0,?,1,?,?,?}>,
+ Requires<[IsThumb2]> {
+ // A8.6.27
+ bits<6> target;
+ bits<3> Rn;
+ let Inst{9} = target{5};
+ let Inst{7-3} = target{4-0};
+ let Inst{2-0} = Rn;
+ }
}
-// Change Processor State is a system instruction -- for disassembly and
-// parsing only.
+
+// Change Processor State is a system instruction.
// FIXME: Since the asm parser has currently no clean way to handle optional
// operands, create 3 versions of the same instruction. Once there's a clean
// framework to represent optional operands, change this behavior.
class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary,
- !strconcat("cps", asm_op),
- [/* For disassembly only; pattern left blank */]> {
+ !strconcat("cps", asm_op), []> {
bits<2> imod;
bits<3> iflags;
bits<5> mode;
@@ -3126,6 +3289,7 @@ class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary,
let Inst{8} = M;
let Inst{7-5} = iflags;
let Inst{4-0} = mode;
+ let DecoderMethod = "DecodeT2CPSInstruction";
}
let M = 1 in
@@ -3135,14 +3299,12 @@ let mode = 0, M = 0 in
def t2CPS2p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags),
"$imod.w\t$iflags">;
let imod = 0, iflags = 0, M = 1 in
- def t2CPS1p : t2CPS<(ins i32imm:$mode), "\t$mode">;
+ def t2CPS1p : t2CPS<(ins imm0_31:$mode), "\t$mode">;
// A6.3.4 Branches and miscellaneous control
// Table A6-14 Change Processor State, and hint instructions
-// Helper class for disassembly only.
class T2I_hint<bits<8> op7_0, string opc, string asm>
- : T2I<(outs), (ins), NoItinerary, opc, asm,
- [/* For disassembly only; pattern left blank */]> {
+ : T2I<(outs), (ins), NoItinerary, opc, asm, []> {
let Inst{31-20} = 0xf3a;
let Inst{19-16} = 0b1111;
let Inst{15-14} = 0b10;
@@ -3158,20 +3320,17 @@ def t2WFI : T2I_hint<0b00000011, "wfi", ".w">;
def t2SEV : T2I_hint<0b00000100, "sev", ".w">;
def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> {
- let Inst{31-20} = 0xf3a;
- let Inst{15-14} = 0b10;
- let Inst{12} = 0;
- let Inst{10-8} = 0b000;
- let Inst{7-4} = 0b1111;
-
bits<4> opt;
+ let Inst{31-20} = 0b111100111010;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-8} = 0b10000000;
+ let Inst{7-4} = 0b1111;
let Inst{3-0} = opt;
}
-// Secure Monitor Call is a system instruction -- for disassembly only
+// Secure Monitor Call is a system instruction.
// Option = Inst{19-16}
-def t2SMC : T2I<(outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt",
- [/* For disassembly only; pattern left blank */]> {
+def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", []> {
let Inst{31-27} = 0b11110;
let Inst{26-20} = 0b1111111;
let Inst{15-12} = 0b1000;
@@ -3180,32 +3339,30 @@ def t2SMC : T2I<(outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt",
let Inst{19-16} = opt;
}
-class T2SRS<bits<12> op31_20,
- dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
+class T2SRS<bits<2> Op, bit W, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
: T2I<oops, iops, itin, opc, asm, pattern> {
- let Inst{31-20} = op31_20{11-0};
-
bits<5> mode;
+ let Inst{31-25} = 0b1110100;
+ let Inst{24-23} = Op;
+ let Inst{22} = 0;
+ let Inst{21} = W;
+ let Inst{20-16} = 0b01101;
+ let Inst{15-5} = 0b11000000000;
let Inst{4-0} = mode{4-0};
}
-// Store Return State is a system instruction -- for disassembly only
-def t2SRSDBW : T2SRS<0b111010000010,
- (outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp!, $mode",
- [/* For disassembly only; pattern left blank */]>;
-def t2SRSDB : T2SRS<0b111010000000,
- (outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp, $mode",
- [/* For disassembly only; pattern left blank */]>;
-def t2SRSIAW : T2SRS<0b111010011010,
- (outs),(ins i32imm:$mode),NoItinerary,"srsia","\tsp!, $mode",
- [/* For disassembly only; pattern left blank */]>;
-def t2SRSIA : T2SRS<0b111010011000,
- (outs), (ins i32imm:$mode),NoItinerary,"srsia","\tsp, $mode",
- [/* For disassembly only; pattern left blank */]>;
-
-// Return From Exception is a system instruction -- for disassembly only
+// Store Return State is a system instruction.
+def t2SRSDB_UPD : T2SRS<0b00, 1, (outs), (ins imm0_31:$mode), NoItinerary,
+ "srsdb", "\tsp!, $mode", []>;
+def t2SRSDB : T2SRS<0b00, 0, (outs), (ins imm0_31:$mode), NoItinerary,
+ "srsdb","\tsp, $mode", []>;
+def t2SRSIA_UPD : T2SRS<0b11, 1, (outs), (ins imm0_31:$mode), NoItinerary,
+ "srsia","\tsp!, $mode", []>;
+def t2SRSIA : T2SRS<0b11, 0, (outs), (ins imm0_31:$mode), NoItinerary,
+ "srsia","\tsp, $mode", []>;
+// Return From Exception is a system instruction.
class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: T2I<oops, iops, itin, opc, asm, pattern> {
@@ -3277,53 +3434,186 @@ def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp),
imm:$cp))]>,
Requires<[IsThumb2]>;
+// Pseudo isntruction that combines movs + predicated rsbmi
+// to implement integer ABS
+let usesCustomInserter = 1, Defs = [CPSR] in {
+def t2ABS : PseudoInst<(outs rGPR:$dst), (ins rGPR:$src),
+ NoItinerary, []>, Requires<[IsThumb2]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Coprocessor load/store -- for disassembly only
+//
+class T2CI<bits<4> op31_28, dag oops, dag iops, string opc, string asm>
+ : T2I<oops, iops, NoItinerary, opc, asm, []> {
+ let Inst{31-28} = op31_28;
+ let Inst{27-25} = 0b110;
+}
+
+multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm> {
+ def _OFFSET : T2CI<op31_28,
+ (outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ asm, "\t$cop, $CRd, $addr"> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 0; // W = 0
+ let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _PRE : T2CI<op31_28,
+ (outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ asm, "\t$cop, $CRd, $addr!"> {
+ bits<13> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 1; // P = 1
+ let Inst{23} = addr{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 1; // W = 1
+ let Inst{20} = load;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = addr{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _POST: T2CI<op31_28,
+ (outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ postidx_imm8s4:$offset),
+ asm, "\t$cop, $CRd, $addr, $offset"> {
+ bits<9> offset;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = offset{8};
+ let Inst{22} = Dbit;
+ let Inst{21} = 1; // W = 1
+ let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = offset{7-0};
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+ def _OPTION : T2CI<op31_28, (outs),
+ (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
+ coproc_option_imm:$option),
+ asm, "\t$cop, $CRd, $addr, $option"> {
+ bits<8> option;
+ bits<4> addr;
+ bits<4> cop;
+ bits<4> CRd;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = 1; // U = 1
+ let Inst{22} = Dbit;
+ let Inst{21} = 0; // W = 0
+ let Inst{20} = load;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = CRd;
+ let Inst{11-8} = cop;
+ let Inst{7-0} = option;
+ let DecoderMethod = "DecodeCopMemInstruction";
+ }
+}
+
+defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc">;
+defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl">;
+defm t2STC : t2LdStCop<0b1110, 0, 0, "stc">;
+defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl">;
+defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2">;
+defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l">;
+defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2">;
+defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">;
+
+
//===----------------------------------------------------------------------===//
// Move between special register and ARM core register -- for disassembly only
//
+// Move to ARM core register from Special Register
-class T2SpecialReg<bits<12> op31_20, bits<2> op15_14, bits<1> op12,
- dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : T2I<oops, iops, itin, opc, asm, pattern> {
- let Inst{31-20} = op31_20{11-0};
- let Inst{15-14} = op15_14{1-0};
- let Inst{12} = op12{0};
+// A/R class MRS.
+//
+// A/R class can only move from CPSR or SPSR.
+def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr", []>,
+ Requires<[IsThumb2,IsARClass]> {
+ bits<4> Rd;
+ let Inst{31-12} = 0b11110011111011111000;
+ let Inst{11-8} = Rd;
+ let Inst{7-0} = 0b0000;
}
-class T2MRS<bits<12> op31_20, bits<2> op15_14, bits<1> op12,
- dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : T2SpecialReg<op31_20, op15_14, op12, oops, iops, itin, opc, asm, pattern> {
+def : t2InstAlias<"mrs${p} $Rd, cpsr", (t2MRS_AR GPR:$Rd, pred:$p)>;
+
+def t2MRSsys_AR: T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr", []>,
+ Requires<[IsThumb2,IsARClass]> {
bits<4> Rd;
+ let Inst{31-12} = 0b11110011111111111000;
+ let Inst{11-8} = Rd;
+ let Inst{7-0} = 0b0000;
+}
+
+// M class MRS.
+//
+// This MRS has a mask field in bits 7-0 and can take more values than
+// the A/R class (a full msr_mask).
+def t2MRS_M : T2I<(outs rGPR:$Rd), (ins msr_mask:$mask), NoItinerary,
+ "mrs", "\t$Rd, $mask", []>,
+ Requires<[IsThumb2,IsMClass]> {
+ bits<4> Rd;
+ bits<8> mask;
+ let Inst{31-12} = 0b11110011111011111000;
let Inst{11-8} = Rd;
let Inst{19-16} = 0b1111;
+ let Inst{7-0} = mask;
}
-def t2MRS : T2MRS<0b111100111110, 0b10, 0,
- (outs rGPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, cpsr",
- [/* For disassembly only; pattern left blank */]>;
-def t2MRSsys : T2MRS<0b111100111111, 0b10, 0,
- (outs rGPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr",
- [/* For disassembly only; pattern left blank */]>;
// Move from ARM core register to Special Register
//
+// A/R class MSR.
+//
// No need to have both system and application versions, the encodings are the
// same and the assembly parser has no way to distinguish between them. The mask
// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains
// the mask with the fields to be accessed in the special register.
-def t2MSR : T2SpecialReg<0b111100111000 /* op31-20 */, 0b10 /* op15-14 */,
- 0 /* op12 */, (outs), (ins msr_mask:$mask, rGPR:$Rn),
- NoItinerary, "msr", "\t$mask, $Rn",
- [/* For disassembly only; pattern left blank */]> {
+def t2MSR_AR : T2I<(outs), (ins msr_mask:$mask, rGPR:$Rn),
+ NoItinerary, "msr", "\t$mask, $Rn", []>,
+ Requires<[IsThumb2,IsARClass]> {
bits<5> mask;
bits<4> Rn;
- let Inst{19-16} = Rn;
+ let Inst{31-21} = 0b11110011100;
let Inst{20} = mask{4}; // R Bit
- let Inst{13} = 0b0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b1000;
let Inst{11-8} = mask{3-0};
+ let Inst{7-0} = 0;
}
+// M class MSR.
+//
+// Move from ARM core register to Special Register
+def t2MSR_M : T2I<(outs), (ins msr_mask:$SYSm, rGPR:$Rn),
+ NoItinerary, "msr", "\t$SYSm, $Rn", []>,
+ Requires<[IsThumb2,IsMClass]> {
+ bits<8> SYSm;
+ bits<4> Rn;
+ let Inst{31-21} = 0b11110011100;
+ let Inst{20} = 0b0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b1000;
+ let Inst{7-0} = SYSm;
+}
+
+
//===----------------------------------------------------------------------===//
// Move between coprocessor and ARM core register
//
@@ -3389,13 +3679,12 @@ def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0,
/* from coprocessor to ARM core register */
def t2MRC : t2MovRCopro<0b1110, "mrc", 1,
- (outs GPR:$Rt),
- (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
- []>;
+ (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, imm0_7:$opc2), []>;
def t2MRC2 : t2MovRCopro<0b1111, "mrc2", 1,
- (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn,
- c_imm:$CRm, i32imm:$opc2), []>;
+ (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, imm0_7:$opc2), []>;
def : T2v6Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
(t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
@@ -3465,3 +3754,269 @@ def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
let Inst{19-16} = CRn;
let Inst{23-20} = opc1;
}
+
+
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// SXT/UXT with no rotate
+let AddedComplexity = 16 in {
+def : T2Pat<(and rGPR:$Rm, 0x000000FF), (t2UXTB rGPR:$Rm, 0)>,
+ Requires<[IsThumb2]>;
+def : T2Pat<(and rGPR:$Rm, 0x0000FFFF), (t2UXTH rGPR:$Rm, 0)>,
+ Requires<[IsThumb2]>;
+def : T2Pat<(and rGPR:$Rm, 0x00FF00FF), (t2UXTB16 rGPR:$Rm, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(add rGPR:$Rn, (and rGPR:$Rm, 0x00FF)),
+ (t2UXTAB rGPR:$Rn, rGPR:$Rm, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(add rGPR:$Rn, (and rGPR:$Rm, 0xFFFF)),
+ (t2UXTAH rGPR:$Rn, rGPR:$Rm, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+}
+
+def : T2Pat<(sext_inreg rGPR:$Src, i8), (t2SXTB rGPR:$Src, 0)>,
+ Requires<[IsThumb2]>;
+def : T2Pat<(sext_inreg rGPR:$Src, i16), (t2SXTH rGPR:$Src, 0)>,
+ Requires<[IsThumb2]>;
+def : T2Pat<(add rGPR:$Rn, (sext_inreg rGPR:$Rm, i8)),
+ (t2SXTAB rGPR:$Rn, rGPR:$Rm, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(add rGPR:$Rn, (sext_inreg rGPR:$Rm, i16)),
+ (t2SXTAH rGPR:$Rn, rGPR:$Rm, 0)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
+// Atomic load/store patterns
+def : T2Pat<(atomic_load_8 t2addrmode_imm12:$addr),
+ (t2LDRBi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_load_8 t2addrmode_negimm8:$addr),
+ (t2LDRBi8 t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_load_8 t2addrmode_so_reg:$addr),
+ (t2LDRBs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(atomic_load_16 t2addrmode_imm12:$addr),
+ (t2LDRHi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_load_16 t2addrmode_negimm8:$addr),
+ (t2LDRHi8 t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_load_16 t2addrmode_so_reg:$addr),
+ (t2LDRHs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(atomic_load_32 t2addrmode_imm12:$addr),
+ (t2LDRi12 t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_load_32 t2addrmode_negimm8:$addr),
+ (t2LDRi8 t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_load_32 t2addrmode_so_reg:$addr),
+ (t2LDRs t2addrmode_so_reg:$addr)>;
+def : T2Pat<(atomic_store_8 t2addrmode_imm12:$addr, GPR:$val),
+ (t2STRBi12 GPR:$val, t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_store_8 t2addrmode_negimm8:$addr, GPR:$val),
+ (t2STRBi8 GPR:$val, t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_store_8 t2addrmode_so_reg:$addr, GPR:$val),
+ (t2STRBs GPR:$val, t2addrmode_so_reg:$addr)>;
+def : T2Pat<(atomic_store_16 t2addrmode_imm12:$addr, GPR:$val),
+ (t2STRHi12 GPR:$val, t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_store_16 t2addrmode_negimm8:$addr, GPR:$val),
+ (t2STRHi8 GPR:$val, t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_store_16 t2addrmode_so_reg:$addr, GPR:$val),
+ (t2STRHs GPR:$val, t2addrmode_so_reg:$addr)>;
+def : T2Pat<(atomic_store_32 t2addrmode_imm12:$addr, GPR:$val),
+ (t2STRi12 GPR:$val, t2addrmode_imm12:$addr)>;
+def : T2Pat<(atomic_store_32 t2addrmode_negimm8:$addr, GPR:$val),
+ (t2STRi8 GPR:$val, t2addrmode_negimm8:$addr)>;
+def : T2Pat<(atomic_store_32 t2addrmode_so_reg:$addr, GPR:$val),
+ (t2STRs GPR:$val, t2addrmode_so_reg:$addr)>;
+
+
+//===----------------------------------------------------------------------===//
+// Assembler aliases
+//
+
+// Aliases for ADC without the ".w" optional width specifier.
+def : t2InstAlias<"adc${s}${p} $Rd, $Rn, $Rm",
+ (t2ADCrr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"adc${s}${p} $Rd, $Rn, $ShiftedRm",
+ (t2ADCrs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s)>;
+
+// Aliases for SBC without the ".w" optional width specifier.
+def : t2InstAlias<"sbc${s}${p} $Rd, $Rn, $Rm",
+ (t2SBCrr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sbc${s}${p} $Rd, $Rn, $ShiftedRm",
+ (t2SBCrs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s)>;
+
+// Aliases for ADD without the ".w" optional width specifier.
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
+ (t2ADDri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"add${p} $Rd, $Rn, $imm",
+ (t2ADDri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>;
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $Rm",
+ (t2ADDrr GPRnopc:$Rd, GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $ShiftedRm",
+ (t2ADDrs GPRnopc:$Rd, GPRnopc:$Rn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s)>;
+
+// Aliases for SUB without the ".w" optional width specifier.
+def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $imm",
+ (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sub${p} $Rd, $Rn, $imm",
+ (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>;
+def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $Rm",
+ (t2SUBrr GPRnopc:$Rd, GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $ShiftedRm",
+ (t2SUBrs GPRnopc:$Rd, GPRnopc:$Rn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s)>;
+
+// Alias for compares without the ".w" optional width specifier.
+def : t2InstAlias<"cmn${p} $Rn, $Rm",
+ (t2CMNzrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>;
+def : t2InstAlias<"teq${p} $Rn, $Rm",
+ (t2TEQrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>;
+def : t2InstAlias<"tst${p} $Rn, $Rm",
+ (t2TSTrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>;
+
+// Memory barriers
+def : InstAlias<"dmb", (t2DMB 0xf)>, Requires<[IsThumb2, HasDB]>;
+def : InstAlias<"dsb", (t2DSB 0xf)>, Requires<[IsThumb2, HasDB]>;
+def : InstAlias<"isb", (t2ISB 0xf)>, Requires<[IsThumb2, HasDB]>;
+
+// Alias for LDR, LDRB, LDRH, LDRSB, and LDRSH without the ".w" optional
+// width specifier.
+def : t2InstAlias<"ldr${p} $Rt, $addr",
+ (t2LDRi12 GPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrb${p} $Rt, $addr",
+ (t2LDRBi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrh${p} $Rt, $addr",
+ (t2LDRHi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsb${p} $Rt, $addr",
+ (t2LDRSBi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsh${p} $Rt, $addr",
+ (t2LDRSHi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+
+def : t2InstAlias<"ldr${p} $Rt, $addr",
+ (t2LDRs GPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"ldrb${p} $Rt, $addr",
+ (t2LDRBs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"ldrh${p} $Rt, $addr",
+ (t2LDRHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsb${p} $Rt, $addr",
+ (t2LDRSBs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsh${p} $Rt, $addr",
+ (t2LDRSHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+
+// Alias for MVN without the ".w" optional width specifier.
+def : t2InstAlias<"mvn${s}${p} $Rd, $Rm",
+ (t2MVNr rGPR:$Rd, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"mvn${s}${p} $Rd, $ShiftedRm",
+ (t2MVNs rGPR:$Rd, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>;
+
+// PKHBT/PKHTB with default shift amount. PKHTB is equivalent to PKHBT when the
+// shift amount is zero (i.e., unspecified).
+def : InstAlias<"pkhbt${p} $Rd, $Rn, $Rm",
+ (t2PKHBT rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : InstAlias<"pkhtb${p} $Rd, $Rn, $Rm",
+ (t2PKHBT rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
+// PUSH/POP aliases for STM/LDM
+def : t2InstAlias<"push${p}.w $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"push${p} $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"pop${p}.w $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"pop${p} $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>;
+
+// Alias for REV/REV16/REVSH without the ".w" optional width specifier.
+def : t2InstAlias<"rev${p} $Rd, $Rm", (t2REV rGPR:$Rd, rGPR:$Rm, pred:$p)>;
+def : t2InstAlias<"rev16${p} $Rd, $Rm", (t2REV16 rGPR:$Rd, rGPR:$Rm, pred:$p)>;
+def : t2InstAlias<"revsh${p} $Rd, $Rm", (t2REVSH rGPR:$Rd, rGPR:$Rm, pred:$p)>;
+
+
+// Alias for RSB without the ".w" optional width specifier, and with optional
+// implied destination register.
+def : t2InstAlias<"rsb${s}${p} $Rd, $Rn, $imm",
+ (t2RSBri rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"rsb${s}${p} $Rdn, $imm",
+ (t2RSBri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"rsb${s}${p} $Rdn, $Rm",
+ (t2RSBrr rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"rsb${s}${p} $Rdn, $ShiftedRm",
+ (t2RSBrs rGPR:$Rdn, rGPR:$Rdn, t2_so_reg:$ShiftedRm, pred:$p,
+ cc_out:$s)>;
+
+// SSAT/USAT optional shift operand.
+def : t2InstAlias<"ssat${p} $Rd, $sat_imm, $Rn",
+ (t2SSAT rGPR:$Rd, imm1_32:$sat_imm, rGPR:$Rn, 0, pred:$p)>;
+def : t2InstAlias<"usat${p} $Rd, $sat_imm, $Rn",
+ (t2USAT rGPR:$Rd, imm0_31:$sat_imm, rGPR:$Rn, 0, pred:$p)>;
+
+// STM w/o the .w suffix.
+def : t2InstAlias<"stm${p} $Rn, $regs",
+ (t2STMIA GPR:$Rn, pred:$p, reglist:$regs)>;
+
+// Alias for STR, STRB, and STRH without the ".w" optional
+// width specifier.
+def : t2InstAlias<"str${p} $Rt, $addr",
+ (t2STRi12 GPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"strb${p} $Rt, $addr",
+ (t2STRBi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"strh${p} $Rt, $addr",
+ (t2STRHi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>;
+
+def : t2InstAlias<"str${p} $Rt, $addr",
+ (t2STRs GPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"strb${p} $Rt, $addr",
+ (t2STRBs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+def : t2InstAlias<"strh${p} $Rt, $addr",
+ (t2STRHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
+
+// Extend instruction optional rotate operand.
+def : t2InstAlias<"sxtab${p} $Rd, $Rn, $Rm",
+ (t2SXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"sxtah${p} $Rd, $Rn, $Rm",
+ (t2SXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"sxtab16${p} $Rd, $Rn, $Rm",
+ (t2SXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
+
+def : t2InstAlias<"sxtb${p} $Rd, $Rm",
+ (t2SXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"sxtb16${p} $Rd, $Rm",
+ (t2SXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"sxth${p} $Rd, $Rm",
+ (t2SXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"sxtb${p}.w $Rd, $Rm",
+ (t2SXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"sxth${p}.w $Rd, $Rm",
+ (t2SXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+
+def : t2InstAlias<"uxtab${p} $Rd, $Rn, $Rm",
+ (t2UXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"uxtah${p} $Rd, $Rn, $Rm",
+ (t2UXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"uxtab16${p} $Rd, $Rn, $Rm",
+ (t2UXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"uxtb${p} $Rd, $Rm",
+ (t2UXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"uxtb16${p} $Rd, $Rm",
+ (t2UXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"uxth${p} $Rd, $Rm",
+ (t2UXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+
+def : t2InstAlias<"uxtb${p}.w $Rd, $Rm",
+ (t2UXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+def : t2InstAlias<"uxth${p}.w $Rd, $Rm",
+ (t2UXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
+
+// Extend instruction w/o the ".w" optional width specifier.
+def : t2InstAlias<"uxtb${p} $Rd, $Rm$rot",
+ (t2UXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+def : t2InstAlias<"uxtb16${p} $Rd, $Rm$rot",
+ (t2UXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+def : t2InstAlias<"uxth${p} $Rd, $Rm$rot",
+ (t2UXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+
+def : t2InstAlias<"sxtb${p} $Rd, $Rm$rot",
+ (t2SXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+def : t2InstAlias<"sxtb16${p} $Rd, $Rm$rot",
+ (t2SXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+def : t2InstAlias<"sxth${p} $Rd, $Rm$rot",
+ (t2SXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index f1f3cb9..e746cf2 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -31,18 +31,34 @@ def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
// Operand Definitions.
//
+// 8-bit floating-point immediate encodings.
+def FPImmOperand : AsmOperandClass {
+ let Name = "FPImm";
+ let ParserMethod = "parseFPImm";
+}
+
def vfp_f32imm : Operand<f32>,
PatLeaf<(f32 fpimm), [{
- return ARM::getVFPf32Imm(N->getValueAPF()) != -1;
- }]> {
- let PrintMethod = "printVFPf32ImmOperand";
+ return ARM_AM::getFP32Imm(N->getValueAPF()) != -1;
+ }], SDNodeXForm<fpimm, [{
+ APFloat InVal = N->getValueAPF();
+ uint32_t enc = ARM_AM::getFP32Imm(InVal);
+ return CurDAG->getTargetConstant(enc, MVT::i32);
+ }]>> {
+ let PrintMethod = "printFPImmOperand";
+ let ParserMatchClass = FPImmOperand;
}
def vfp_f64imm : Operand<f64>,
PatLeaf<(f64 fpimm), [{
- return ARM::getVFPf64Imm(N->getValueAPF()) != -1;
- }]> {
- let PrintMethod = "printVFPf64ImmOperand";
+ return ARM_AM::getFP64Imm(N->getValueAPF()) != -1;
+ }], SDNodeXForm<fpimm, [{
+ APFloat InVal = N->getValueAPF();
+ uint32_t enc = ARM_AM::getFP64Imm(InVal);
+ return CurDAG->getTargetConstant(enc, MVT::i32);
+ }]>> {
+ let PrintMethod = "printFPImmOperand";
+ let ParserMatchClass = FPImmOperand;
}
@@ -385,26 +401,26 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
// Between half-precision and single-precision. For disassembly only.
// FIXME: Verify encoding after integrated assembler is working.
-def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
- /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$dst, $a",
+def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+ /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>;
def : ARMPat<(f32_to_f16 SPR:$a),
(i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
-def VCVTBHS: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
- /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$dst, $a",
+def VCVTBHS: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+ /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>;
def : ARMPat<(f16_to_f32 GPR:$a),
(VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
-def VCVTTSH: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
- /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$dst, $a",
+def VCVTTSH: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+ /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>;
-def VCVTTHS: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
- /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$dst, $a",
+def VCVTTHS: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+ /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>;
def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0,
@@ -511,14 +527,25 @@ def VMOVRRD : AVConv3I<0b11000101, 0b1011,
}
def VMOVRRS : AVConv3I<0b11000101, 0b1010,
- (outs GPR:$wb, GPR:$dst2), (ins SPR:$src1, SPR:$src2),
- IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src1, $src2",
+ (outs GPR:$Rt, GPR:$Rt2), (ins SPR:$src1, SPR:$src2),
+ IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $src1, $src2",
[/* For disassembly only; pattern left blank */]> {
+ bits<5> src1;
+ bits<4> Rt;
+ bits<4> Rt2;
+
+ // Encode instruction operands.
+ let Inst{3-0} = src1{3-0};
+ let Inst{5} = src1{4};
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+
let Inst{7-6} = 0b00;
// Some single precision VFP instructions may be executed on both NEON and VFP
// pipelines.
let D = VFPNeonDomain;
+ let DecoderMethod = "DecodeVMOVRRS";
}
} // neverHasSideEffects
@@ -552,11 +579,24 @@ def VMOVSRR : AVConv5I<0b11000100, 0b1010,
(outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
[/* For disassembly only; pattern left blank */]> {
+ // Instruction operands.
+ bits<5> dst1;
+ bits<4> src1;
+ bits<4> src2;
+
+ // Encode instruction operands.
+ let Inst{3-0} = dst1{3-0};
+ let Inst{5} = dst1{4};
+ let Inst{15-12} = src1;
+ let Inst{19-16} = src2;
+
let Inst{7-6} = 0b00;
// Some single precision VFP instructions may be executed on both NEON and VFP
// pipelines.
let D = VFPNeonDomain;
+
+ let DecoderMethod = "DecodeVMOVSRR";
}
// FMRDH: SPR -> GPR
@@ -1084,45 +1124,42 @@ def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm),
VFPMiscFrm, IIC_fpUNA64,
"vmov", ".f64\t$Dd, $imm",
[(set DPR:$Dd, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> {
- // Instruction operands.
- bits<5> Dd;
- bits<32> imm;
-
- // Encode instruction operands.
- let Inst{15-12} = Dd{3-0};
- let Inst{22} = Dd{4};
- let Inst{19} = imm{31};
- let Inst{18-16} = imm{22-20};
- let Inst{3-0} = imm{19-16};
+ bits<5> Dd;
+ bits<8> imm;
- // Encode remaining instruction bits.
let Inst{27-23} = 0b11101;
+ let Inst{22} = Dd{4};
let Inst{21-20} = 0b11;
+ let Inst{19-16} = imm{7-4};
+ let Inst{15-12} = Dd{3-0};
let Inst{11-9} = 0b101;
let Inst{8} = 1; // Double precision.
let Inst{7-4} = 0b0000;
+ let Inst{3-0} = imm{3-0};
}
def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm),
VFPMiscFrm, IIC_fpUNA32,
"vmov", ".f32\t$Sd, $imm",
[(set SPR:$Sd, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
- // Instruction operands.
- bits<5> Sd;
- bits<32> imm;
-
- // Encode instruction operands.
- let Inst{15-12} = Sd{4-1};
- let Inst{22} = Sd{0};
- let Inst{19} = imm{31}; // The immediate is handled as a double.
- let Inst{18-16} = imm{22-20};
- let Inst{3-0} = imm{19-16};
+ bits<5> Sd;
+ bits<8> imm;
- // Encode remaining instruction bits.
let Inst{27-23} = 0b11101;
+ let Inst{22} = Sd{0};
let Inst{21-20} = 0b11;
+ let Inst{19-16} = imm{7-4};
+ let Inst{15-12} = Sd{4-1};
let Inst{11-9} = 0b101;
let Inst{8} = 0; // Single precision.
let Inst{7-4} = 0b0000;
+ let Inst{3-0} = imm{3-0};
}
}
+
+//===----------------------------------------------------------------------===//
+// Assembler aliases.
+//
+
+def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>;
+
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index c6efea1..faa8ba7 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -14,10 +14,10 @@
#define DEBUG_TYPE "arm-ldst-opt"
#include "ARM.h"
-#include "ARMAddressingModes.h"
#include "ARMBaseInstrInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMRegisterInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -763,9 +764,9 @@ static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
ARM_AM::AddrOpc Mode) {
switch (Opc) {
case ARM::LDRi12:
- return ARM::LDR_PRE;
+ return ARM::LDR_PRE_IMM;
case ARM::STRi12:
- return ARM::STR_PRE;
+ return ARM::STR_PRE_IMM;
case ARM::VLDRS:
return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
case ARM::VLDRD:
@@ -789,9 +790,9 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
ARM_AM::AddrOpc Mode) {
switch (Opc) {
case ARM::LDRi12:
- return ARM::LDR_POST;
+ return ARM::LDR_POST_IMM;
case ARM::STRi12:
- return ARM::STR_POST;
+ return ARM::STR_POST_IMM;
case ARM::VLDRS:
return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
case ARM::VLDRD:
@@ -892,12 +893,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
if (!DoMerge)
return false;
- unsigned Offset = 0;
- if (isAM2)
- Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
- else if (!isAM5)
- Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
-
if (isAM5) {
// VLDM[SD}_UPD, VSTM[SD]_UPD
// (There are no base-updating versions of VLDR/VSTR instructions, but the
@@ -911,28 +906,44 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
.addReg(MO.getReg(), (isLd ? getDefRegState(true) :
getKillRegState(MO.isKill())));
} else if (isLd) {
- if (isAM2)
- // LDR_PRE, LDR_POST,
- BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
- .addReg(Base, RegState::Define)
- .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
- else
+ if (isAM2) {
+ // LDR_PRE, LDR_POST
+ if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
+ int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+ .addReg(Base, RegState::Define)
+ .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ } else {
+ int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+ .addReg(Base, RegState::Define)
+ .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+ }
+ } else {
+ int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
// t2LDR_PRE, t2LDR_POST
BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
.addReg(Base, RegState::Define)
.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ }
} else {
MachineOperand &MO = MI->getOperand(0);
- if (isAM2)
+ // FIXME: post-indexed stores use am2offset_imm, which still encodes
+ // the vestigal zero-reg offset register. When that's fixed, this clause
+ // can be removed entirely.
+ if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
+ int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
// STR_PRE, STR_POST
BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
.addReg(MO.getReg(), getKillRegState(MO.isKill()))
.addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
- else
+ } else {
+ int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
// t2STR_PRE, t2STR_POST
BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
.addReg(MO.getReg(), getKillRegState(MO.isKill()))
.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ }
}
MBB.erase(MBBI);
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index 7411b59..daa126d 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -14,7 +14,7 @@
#include "ARM.h"
#include "ARMAsmPrinter.h"
-#include "ARMMCExpr.h"
+#include "MCTargetDesc/ARMMCExpr.h"
#include "llvm/Constants.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/MC/MCExpr.h"
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 76eb496..036822d 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -182,8 +182,10 @@ def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>;
// Current Program Status Register.
def CPSR : ARMReg<0, "cpsr">;
-def FPSCR : ARMReg<1, "fpscr">;
-def ITSTATE : ARMReg<2, "itstate">;
+def APSR : ARMReg<1, "apsr">;
+def SPSR : ARMReg<2, "spsr">;
+def FPSCR : ARMReg<3, "fpscr">;
+def ITSTATE : ARMReg<4, "itstate">;
// Special Registers - only available in privileged mode.
def FPSID : ARMReg<0, "fpsid">;
@@ -213,6 +215,23 @@ def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12),
}];
}
+// GPRs without the PC. Some ARM instructions do not allow the PC in
+// certain operand slots, particularly as the destination. Primarily
+// useful for disassembly.
+def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> {
+ let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8)];
+ let AltOrderSelect = [{
+ return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
+ }];
+}
+
+// GPRsp - Only the SP is legal. Used by Thumb1 instructions that want the
+// implied SP argument list.
+// FIXME: It would be better to not use this at all and refactor the
+// instructions to not have SP an an explicit argument. That makes
+// frame index resolution a bit trickier, though.
+def GPRsp : RegisterClass<"ARM", [i32], 32, (add SP)>;
+
// restricted GPR register class. Many Thumb2 instructions allow the full
// register range for operands, but have undefined behaviours when PC
// or SP (R13 or R15) are used. The ARM ISA refers to these operands
@@ -328,5 +347,6 @@ def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (sequence "QQQQ%u", 0, 3)> {
// Condition code registers.
def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> {
+ let CopyCost = -1; // Don't allow copying of status registers.
let isAllocatable = 0;
}
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index ef0aaf2..a3a3d58 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -138,13 +138,12 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
// Adjust parameters for memset, EABI uses format (ptr, size, value),
// GNU library uses (ptr, value, size)
// See RTABI section 4.3.4
-SDValue
-ARMSelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
- SDValue Chain, SDValue Dst,
- SDValue Src, SDValue Size,
- unsigned Align, bool isVolatile,
- MachinePointerInfo DstPtrInfo) const
-{
+SDValue ARMSelectionDAGInfo::
+EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst,
+ SDValue Src, SDValue Size,
+ unsigned Align, bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const {
// Use default for non AAPCS subtargets
if (!Subtarget->isAAPCS_ABI())
return SDValue();
@@ -155,7 +154,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
TargetLowering::ArgListEntry Entry;
// First argument: data pointer
- const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*DAG.getContext());
+ Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*DAG.getContext());
Entry.Node = Dst;
Entry.Ty = IntPtrTy;
Args.push_back(Entry);
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h
index ec1bf5c..6419a73 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -14,10 +14,27 @@
#ifndef ARMSELECTIONDAGINFO_H
#define ARMSELECTIONDAGINFO_H
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/Target/TargetSelectionDAGInfo.h"
namespace llvm {
+namespace ARM_AM {
+ static inline ShiftOpc getShiftOpcForNode(unsigned Opcode) {
+ switch (Opcode) {
+ default: return ARM_AM::no_shift;
+ case ISD::SHL: return ARM_AM::lsl;
+ case ISD::SRL: return ARM_AM::lsr;
+ case ISD::SRA: return ARM_AM::asr;
+ case ISD::ROTR: return ARM_AM::ror;
+ //case ISD::ROTL: // Only if imm -> turn into ROTR.
+ // Can't handle RRX here, because it would require folding a flag into
+ // the addressing mode. :( This causes us to miss certain things.
+ //case ARMISD::RRX: return ARM_AM::rrx;
+ }
+ }
+} // end namespace ARM_AM
+
class ARMSelectionDAGInfo : public TargetSelectionDAGInfo {
/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
/// make the right decision when generating code for different targets.
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 1cab9e4..247d6be 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -53,11 +53,14 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
, HasVMLxForwarding(false)
, SlowFPBrcc(false)
, InThumbMode(false)
+ , InNaClMode(false)
, HasThumb2(false)
+ , IsMClass(false)
, NoARM(false)
, PostRAScheduler(false)
, IsR9Reserved(ReserveR9)
, UseMovt(false)
+ , SupportsTailCall(false)
, HasFP16(false)
, HasD16(false)
, HasHardwareDivide(false)
@@ -111,6 +114,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
else {
IsR9Reserved = ReserveR9 | !HasV6Ops;
UseMovt = DarwinUseMOVT && hasV6T2Ops();
+ const Triple &T = getTargetTriple();
+ SupportsTailCall = T.getOS() == Triple::IOS && !T.isOSVersionLT(5, 0);
}
if (!isThumb() || hasThumb2())
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index c650872..b63e108 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -70,9 +70,16 @@ protected:
/// InThumbMode - True if compiling for Thumb, false for ARM.
bool InThumbMode;
+ /// InNaClMode - True if targeting Native Client
+ bool InNaClMode;
+
/// HasThumb2 - True if Thumb2 instructions are supported.
bool HasThumb2;
+ /// IsMClass - True if the subtarget belongs to the 'M' profile of CPUs -
+ /// v6m, v7m for example.
+ bool IsMClass;
+
/// NoARM - True if subtarget does not support ARM mode execution.
bool NoARM;
@@ -86,6 +93,11 @@ protected:
/// imms (including global addresses).
bool UseMovt;
+ /// SupportsTailCall - True if the OS supports tail call. The dynamic linker
+ /// must be able to synthesize call stubs for interworking between ARM and
+ /// Thumb.
+ bool SupportsTailCall;
+
/// HasFP16 - True if subtarget supports half-precision FP (We support VFP+HF
/// only so far)
bool HasFP16;
@@ -209,6 +221,9 @@ protected:
const Triple &getTargetTriple() const { return TargetTriple; }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
+ bool isTargetNaCl() const {
+ return TargetTriple.getOS() == Triple::NativeClient;
+ }
bool isTargetELF() const { return !isTargetDarwin(); }
bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; }
@@ -218,10 +233,13 @@ protected:
bool isThumb1Only() const { return InThumbMode && !HasThumb2; }
bool isThumb2() const { return InThumbMode && HasThumb2; }
bool hasThumb2() const { return HasThumb2; }
+ bool isMClass() const { return IsMClass; }
+ bool isARClass() const { return !IsMClass; }
bool isR9Reserved() const { return IsR9Reserved; }
bool useMovt() const { return UseMovt && hasV6T2Ops(); }
+ bool supportsTailCall() const { return SupportsTailCall; }
bool allowsUnalignedMem() const { return AllowsUnalignedMem; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index f0b176a..96b1e89 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -15,77 +15,50 @@
#include "ARM.h"
#include "llvm/PassManager.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegistry.h"
using namespace llvm;
-// This is duplicated code. Refactor this.
-static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
- MCContext &Ctx, TargetAsmBackend &TAB,
- raw_ostream &OS,
- MCCodeEmitter *Emitter,
- bool RelaxAll,
- bool NoExecStack) {
- Triple TheTriple(TT);
-
- if (TheTriple.isOSDarwin())
- return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
-
- if (TheTriple.isOSWindows()) {
- llvm_unreachable("ARM does not support Windows COFF format");
- return NULL;
- }
-
- return createELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack);
-}
+static cl::opt<bool>
+EnableGlobalMerge("global-merge", cl::Hidden,
+ cl::desc("Enable global merge pass"),
+ cl::init(true));
extern "C" void LLVMInitializeARMTarget() {
// Register the target.
RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget);
RegisterTargetMachine<ThumbTargetMachine> Y(TheThumbTarget);
-
- // Register the MC Code Emitter
- TargetRegistry::RegisterCodeEmitter(TheARMTarget, createARMMCCodeEmitter);
- TargetRegistry::RegisterCodeEmitter(TheThumbTarget, createARMMCCodeEmitter);
-
- // Register the asm backend.
- TargetRegistry::RegisterAsmBackend(TheARMTarget, createARMAsmBackend);
- TargetRegistry::RegisterAsmBackend(TheThumbTarget, createARMAsmBackend);
-
- // Register the object streamer.
- TargetRegistry::RegisterObjectStreamer(TheARMTarget, createMCStreamer);
- TargetRegistry::RegisterObjectStreamer(TheThumbTarget, createMCStreamer);
-
}
/// TargetMachine ctor - Create an ARM architecture model.
///
-ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T,
- const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : LLVMTargetMachine(T, TT, CPU, FS),
+ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
Subtarget(TT, CPU, FS),
JITInfo(),
InstrItins(Subtarget.getInstrItineraryData()) {
- DefRelocModel = getRelocationModel();
-
// Default to soft float ABI
if (FloatABIType == FloatABI::Default)
FloatABIType = FloatABI::Soft;
}
-ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : ARMBaseTargetMachine(T, TT, CPU, FS), InstrInfo(Subtarget),
+ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM), InstrInfo(Subtarget),
DataLayout(Subtarget.isAPCS_ABI() ?
std::string("e-p:32:32-f64:32:64-i64:32:64-"
- "v128:32:128-v64:32:64-n32") :
+ "v128:32:128-v64:32:64-n32-S32") :
+ Subtarget.isAAPCS_ABI() ?
+ std::string("e-p:32:32-f64:64:64-i64:64:64-"
+ "v128:64:128-v64:64:64-n32-S64") :
std::string("e-p:32:32-f64:64:64-i64:64:64-"
- "v128:64:128-v64:64:64-n32")),
+ "v128:64:128-v64:64:64-n32-S32")),
ELFWriterInfo(*this),
TLInfo(*this),
TSInfo(*this),
@@ -95,20 +68,24 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
"support ARM mode execution!");
}
-ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : ARMBaseTargetMachine(T, TT, CPU, FS),
+ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM),
InstrInfo(Subtarget.hasThumb2()
? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
: ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
DataLayout(Subtarget.isAPCS_ABI() ?
std::string("e-p:32:32-f64:32:64-i64:32:64-"
"i16:16:32-i8:8:32-i1:8:32-"
- "v128:32:128-v64:32:64-a:0:32-n32") :
+ "v128:32:128-v64:32:64-a:0:32-n32-S32") :
+ Subtarget.isAAPCS_ABI() ?
+ std::string("e-p:32:32-f64:64:64-i64:64:64-"
+ "i16:16:32-i8:8:32-i1:8:32-"
+ "v128:64:128-v64:64:64-a:0:32-n32-S64") :
std::string("e-p:32:32-f64:64:64-i64:64:64-"
"i16:16:32-i8:8:32-i1:8:32-"
- "v128:64:128-v64:64:64-a:0:32-n32")),
+ "v128:64:128-v64:64:64-a:0:32-n32-S32")),
ELFWriterInfo(*this),
TLInfo(*this),
TSInfo(*this),
@@ -117,10 +94,9 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
: (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
}
-// Pass Pipeline Configuration
bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
- if (OptLevel != CodeGenOpt::None)
+ if (OptLevel != CodeGenOpt::None && EnableGlobalMerge)
PM.add(createARMGlobalMergePass(getTargetLowering()));
return false;
@@ -139,7 +115,6 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
PM.add(createARMLoadStoreOptimizationPass(true));
if (OptLevel != CodeGenOpt::None && Subtarget.isCortexA9())
PM.add(createMLxExpansionPass());
-
return true;
}
@@ -150,7 +125,7 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
if (!Subtarget.isThumb1Only())
PM.add(createARMLoadStoreOptimizationPass());
if (Subtarget.hasNEON())
- PM.add(createNEONMoveFixPass());
+ PM.add(createExecutionDependencyFixPass(&ARM::DPRRegClass));
}
// Expand some pseudo instructions into multiple instructions to allow
@@ -179,10 +154,6 @@ bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
CodeGenOpt::Level OptLevel,
JITCodeEmitter &JCE) {
- // FIXME: Move this to TargetJITInfo!
- if (DefRelocModel == Reloc::Default)
- setRelocationModel(Reloc::Static);
-
// Machine code emitter pass for ARM.
PM.add(createARMJITCodeEmitterPass(*this, JCE));
return false;
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index bc3d46a..c8c601c 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -37,11 +37,11 @@ protected:
private:
ARMJITInfo JITInfo;
InstrItineraryData InstrItins;
- Reloc::Model DefRelocModel; // Reloc model before it's overridden.
public:
- ARMBaseTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ ARMBaseTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
virtual ARMJITInfo *getJITInfo() { return &JITInfo; }
virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
@@ -69,8 +69,9 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
ARMSelectionDAGInfo TSInfo;
ARMFrameLowering FrameLowering;
public:
- ARMTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ ARMTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
virtual const ARMRegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
@@ -108,8 +109,9 @@ class ThumbTargetMachine : public ARMBaseTargetMachine {
// Either Thumb1FrameLowering or ARMFrameLowering.
OwningPtr<ARMFrameLowering> FrameLowering;
public:
- ThumbTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ ThumbTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
/// returns either Thumb1RegisterInfo or Thumb2RegisterInfo
virtual const ARMBaseRegisterInfo *getRegisterInfo() const {
diff --git a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
index d9a5fa2..14d35ba 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
@@ -7,16 +7,15 @@
//
//===----------------------------------------------------------------------===//
-#include "ARM.h"
-#include "ARMTargetMachine.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCTargetAsmLexer.h"
-#include "llvm/Target/TargetAsmLexer.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallVector.h"
@@ -30,7 +29,7 @@ using namespace llvm;
namespace {
-class ARMBaseAsmLexer : public TargetAsmLexer {
+class ARMBaseAsmLexer : public MCTargetAsmLexer {
const MCAsmInfo &AsmInfo;
const AsmToken &lexDefinite() {
@@ -43,7 +42,7 @@ protected:
rmap_ty RegisterMap;
- void InitRegisterMap(const TargetRegisterInfo *info) {
+ void InitRegisterMap(const MCRegisterInfo *info) {
unsigned numRegs = info->getNumRegs();
for (unsigned i = 0; i < numRegs; ++i) {
@@ -77,33 +76,23 @@ protected:
}
public:
ARMBaseAsmLexer(const Target &T, const MCAsmInfo &MAI)
- : TargetAsmLexer(T), AsmInfo(MAI) {
+ : MCTargetAsmLexer(T), AsmInfo(MAI) {
}
};
class ARMAsmLexer : public ARMBaseAsmLexer {
public:
- ARMAsmLexer(const Target &T, const MCAsmInfo &MAI)
+ ARMAsmLexer(const Target &T, const MCRegisterInfo &MRI, const MCAsmInfo &MAI)
: ARMBaseAsmLexer(T, MAI) {
- std::string tripleString("arm-unknown-unknown");
- std::string featureString;
- std::string CPU;
- OwningPtr<const TargetMachine>
- targetMachine(T.createTargetMachine(tripleString, CPU, featureString));
- InitRegisterMap(targetMachine->getRegisterInfo());
+ InitRegisterMap(&MRI);
}
};
class ThumbAsmLexer : public ARMBaseAsmLexer {
public:
- ThumbAsmLexer(const Target &T, const MCAsmInfo &MAI)
+ ThumbAsmLexer(const Target &T, const MCRegisterInfo &MRI,const MCAsmInfo &MAI)
: ARMBaseAsmLexer(T, MAI) {
- std::string tripleString("thumb-unknown-unknown");
- std::string featureString;
- std::string CPU;
- OwningPtr<const TargetMachine>
- targetMachine(T.createTargetMachine(tripleString, CPU, featureString));
- InitRegisterMap(targetMachine->getRegisterInfo());
+ InitRegisterMap(&MRI);
}
};
@@ -149,6 +138,6 @@ AsmToken ARMBaseAsmLexer::LexTokenUAL() {
}
extern "C" void LLVMInitializeARMAsmLexer() {
- RegisterAsmLexer<ARMAsmLexer> X(TheARMTarget);
- RegisterAsmLexer<ThumbAsmLexer> Y(TheThumbTarget);
+ RegisterMCAsmLexer<ARMAsmLexer> X(TheARMTarget);
+ RegisterMCAsmLexer<ThumbAsmLexer> Y(TheThumbTarget);
}
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index a474127..24f15b4 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -7,11 +7,9 @@
//
//===----------------------------------------------------------------------===//
-#include "ARM.h"
-#include "ARMAddressingModes.h"
-#include "ARMMCExpr.h"
-#include "ARMBaseRegisterInfo.h"
-#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMMCExpr.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
@@ -20,12 +18,17 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
@@ -37,49 +40,65 @@ namespace {
class ARMOperand;
-class ARMAsmParser : public TargetAsmParser {
+class ARMAsmParser : public MCTargetAsmParser {
MCSubtargetInfo &STI;
MCAsmParser &Parser;
+ struct {
+ ARMCC::CondCodes Cond; // Condition for IT block.
+ unsigned Mask:4; // Condition mask for instructions.
+ // Starting at first 1 (from lsb).
+ // '1' condition as indicated in IT.
+ // '0' inverse of condition (else).
+ // Count of instructions in IT block is
+ // 4 - trailingzeroes(mask)
+
+ bool FirstCond; // Explicit flag for when we're parsing the
+ // First instruction in the IT block. It's
+ // implied in the mask, so needs special
+ // handling.
+
+ unsigned CurPosition; // Current position in parsing of IT
+ // block. In range [0,3]. Initialized
+ // according to count of instructions in block.
+ // ~0U if no active IT block.
+ } ITState;
+ bool inITBlock() { return ITState.CurPosition != ~0U;}
+ void forwardITPosition() {
+ if (!inITBlock()) return;
+ // Move to the next instruction in the IT block, if there is one. If not,
+ // mark the block as done.
+ unsigned TZ = CountTrailingZeros_32(ITState.Mask);
+ if (++ITState.CurPosition == 5 - TZ)
+ ITState.CurPosition = ~0U; // Done with the IT block after this.
+ }
+
+
MCAsmParser &getParser() const { return Parser; }
MCAsmLexer &getLexer() const { return Parser.getLexer(); }
void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
- int TryParseRegister();
- virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
- bool TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &);
- int TryParseShiftRegister(SmallVectorImpl<MCParsedAsmOperand*> &);
- bool ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &);
- bool ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &,
- ARMII::AddrMode AddrMode);
- bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic);
- bool ParsePrefix(ARMMCExpr::VariantKind &RefKind);
- const MCExpr *ApplyPrefixToExpr(const MCExpr *E,
- MCSymbolRefExpr::VariantKind Variant);
-
-
- bool ParseMemoryOffsetReg(bool &Negative,
- bool &OffsetRegShifted,
- enum ARM_AM::ShiftOpc &ShiftType,
- const MCExpr *&ShiftAmount,
- const MCExpr *&Offset,
- bool &OffsetIsReg,
- int &OffsetRegNum,
- SMLoc &E);
- bool ParseShift(enum ARM_AM::ShiftOpc &St,
- const MCExpr *&ShiftAmount, SMLoc &E);
- bool ParseDirectiveWord(unsigned Size, SMLoc L);
- bool ParseDirectiveThumb(SMLoc L);
- bool ParseDirectiveThumbFunc(SMLoc L);
- bool ParseDirectiveCode(SMLoc L);
- bool ParseDirectiveSyntax(SMLoc L);
-
- bool MatchAndEmitInstruction(SMLoc IDLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out);
- void GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
+ int tryParseRegister();
+ bool tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &);
+ int tryParseShiftRegister(SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic);
+ bool parsePrefix(ARMMCExpr::VariantKind &RefKind);
+ bool parseMemRegOffsetShift(ARM_AM::ShiftOpc &ShiftType,
+ unsigned &ShiftAmount);
+ bool parseDirectiveWord(unsigned Size, SMLoc L);
+ bool parseDirectiveThumb(SMLoc L);
+ bool parseDirectiveThumbFunc(SMLoc L);
+ bool parseDirectiveCode(SMLoc L);
+ bool parseDirectiveSyntax(SMLoc L);
+
+ StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode,
+ bool &CarrySetting, unsigned &ProcessorIMod,
+ StringRef &ITMask);
+ void getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
bool &CanAcceptPredicationCode);
bool isThumb() const {
@@ -89,10 +108,22 @@ class ARMAsmParser : public TargetAsmParser {
bool isThumbOne() const {
return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2) == 0;
}
+ bool isThumbTwo() const {
+ return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2);
+ }
+ bool hasV6Ops() const {
+ return STI.getFeatureBits() & ARM::HasV6Ops;
+ }
+ bool hasV7Ops() const {
+ return STI.getFeatureBits() & ARM::HasV7Ops;
+ }
void SwitchMode() {
unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb));
setAvailableFeatures(FB);
}
+ bool isMClass() const {
+ return STI.getFeatureBits() & ARM::FeatureMClass;
+ }
/// @name Auto-generated Match Functions
/// {
@@ -102,43 +133,108 @@ class ARMAsmParser : public TargetAsmParser {
/// }
- OperandMatchResultTy tryParseCoprocNumOperand(
- SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy tryParseCoprocRegOperand(
+ OperandMatchResultTy parseITCondCode(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseCoprocNumOperand(
SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy tryParseMemBarrierOptOperand(
+ OperandMatchResultTy parseCoprocRegOperand(
SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy tryParseProcIFlagsOperand(
+ OperandMatchResultTy parseCoprocOptionOperand(
SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy tryParseMSRMaskOperand(
+ OperandMatchResultTy parseMemBarrierOptOperand(
SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy tryParseMemMode2Operand(
+ OperandMatchResultTy parseProcIFlagsOperand(
SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy tryParseMemMode3Operand(
+ OperandMatchResultTy parseMSRMaskOperand(
SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &O,
+ StringRef Op, int Low, int High);
+ OperandMatchResultTy parsePKHLSLImm(SmallVectorImpl<MCParsedAsmOperand*> &O) {
+ return parsePKHImm(O, "lsl", 0, 31);
+ }
+ OperandMatchResultTy parsePKHASRImm(SmallVectorImpl<MCParsedAsmOperand*> &O) {
+ return parsePKHImm(O, "asr", 1, 32);
+ }
+ OperandMatchResultTy parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseRotImm(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseBitfield(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseFPImm(SmallVectorImpl<MCParsedAsmOperand*>&);
// Asm Match Converter Methods
- bool CvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+ bool cvtT2LdrdPre(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtT2StrdPre(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool CvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+ bool cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool CvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+ bool cvtStWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool CvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+ bool cvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtLdExtTWriteBackImm(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtLdExtTWriteBackReg(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtStExtTWriteBackImm(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtStExtTWriteBackReg(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtLdrdPre(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtStrdPre(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtThumbMultiply(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+
+ bool validateInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
+ void processInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
+ bool shouldOmitCCOutOperand(StringRef Mnemonic,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
public:
+ enum ARMMatchResultTy {
+ Match_RequiresITBlock = FIRST_TARGET_MATCH_RESULT_TY,
+ Match_RequiresNotITBlock,
+ Match_RequiresV6,
+ Match_RequiresThumb2
+ };
+
ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
- : TargetAsmParser(), STI(_STI), Parser(_Parser) {
+ : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
MCAsmParserExtension::Initialize(_Parser);
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+
+ // Not in an ITBlock to start with.
+ ITState.CurPosition = ~0U;
}
- virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands);
- virtual bool ParseDirective(AsmToken DirectiveID);
+ // Implementation of the MCTargetAsmParser interface:
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+ bool ParseInstruction(StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ bool ParseDirective(AsmToken DirectiveID);
+
+ unsigned checkTargetMatchPredicate(MCInst &Inst);
+
+ bool MatchAndEmitInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out);
};
} // end anonymous namespace
@@ -148,22 +244,30 @@ namespace {
/// instruction.
class ARMOperand : public MCParsedAsmOperand {
enum KindTy {
- CondCode,
- CCOut,
- CoprocNum,
- CoprocReg,
- Immediate,
- MemBarrierOpt,
- Memory,
- MSRMask,
- ProcIFlags,
- Register,
- RegisterList,
- DPRRegisterList,
- SPRRegisterList,
- ShiftedRegister,
- Shifter,
- Token
+ k_CondCode,
+ k_CCOut,
+ k_ITCondMask,
+ k_CoprocNum,
+ k_CoprocReg,
+ k_CoprocOption,
+ k_Immediate,
+ k_FPImmediate,
+ k_MemBarrierOpt,
+ k_Memory,
+ k_PostIndexRegister,
+ k_MSRMask,
+ k_ProcIFlags,
+ k_VectorIndex,
+ k_Register,
+ k_RegisterList,
+ k_DPRRegisterList,
+ k_SPRRegisterList,
+ k_ShiftedRegister,
+ k_ShiftedImmediate,
+ k_ShifterImmediate,
+ k_RotateImmediate,
+ k_BitfieldDescriptor,
+ k_Token
} Kind;
SMLoc StartLoc, EndLoc;
@@ -175,12 +279,20 @@ class ARMOperand : public MCParsedAsmOperand {
} CC;
struct {
- ARM_MB::MemBOpt Val;
- } MBOpt;
+ unsigned Val;
+ } Cop;
struct {
unsigned Val;
- } Cop;
+ } CoprocOption;
+
+ struct {
+ unsigned Mask:4;
+ } ITMask;
+
+ struct {
+ ARM_MB::MemBOpt Val;
+ } MBOpt;
struct {
ARM_PROC::IFlags Val;
@@ -200,37 +312,60 @@ class ARMOperand : public MCParsedAsmOperand {
} Reg;
struct {
+ unsigned Val;
+ } VectorIndex;
+
+ struct {
const MCExpr *Val;
} Imm;
+ struct {
+ unsigned Val; // encoded 8-bit representation
+ } FPImm;
+
/// Combined record for all forms of ARM address expressions.
struct {
- ARMII::AddrMode AddrMode;
unsigned BaseRegNum;
- union {
- unsigned RegNum; ///< Offset register num, when OffsetIsReg.
- const MCExpr *Value; ///< Offset value, when !OffsetIsReg.
- } Offset;
- const MCExpr *ShiftAmount; // used when OffsetRegShifted is true
- enum ARM_AM::ShiftOpc ShiftType; // used when OffsetRegShifted is true
- unsigned OffsetRegShifted : 1; // only used when OffsetIsReg is true
- unsigned Preindexed : 1;
- unsigned Postindexed : 1;
- unsigned OffsetIsReg : 1;
- unsigned Negative : 1; // only used when OffsetIsReg is true
- unsigned Writeback : 1;
- } Mem;
+ // Offset is in OffsetReg or OffsetImm. If both are zero, no offset
+ // was specified.
+ const MCConstantExpr *OffsetImm; // Offset immediate value
+ unsigned OffsetRegNum; // Offset register num, when OffsetImm == NULL
+ ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg
+ unsigned ShiftImm; // shift for OffsetReg.
+ unsigned Alignment; // 0 = no alignment specified
+ // n = alignment in bytes (8, 16, or 32)
+ unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit)
+ } Memory;
struct {
+ unsigned RegNum;
+ bool isAdd;
ARM_AM::ShiftOpc ShiftTy;
+ unsigned ShiftImm;
+ } PostIdxReg;
+
+ struct {
+ bool isASR;
unsigned Imm;
- } Shift;
+ } ShifterImm;
struct {
ARM_AM::ShiftOpc ShiftTy;
unsigned SrcReg;
unsigned ShiftReg;
unsigned ShiftImm;
- } ShiftedReg;
+ } RegShiftedReg;
+ struct {
+ ARM_AM::ShiftOpc ShiftTy;
+ unsigned SrcReg;
+ unsigned ShiftImm;
+ } RegShiftedImm;
+ struct {
+ unsigned Imm;
+ } RotImm;
+ struct {
+ unsigned LSB;
+ unsigned Width;
+ } Bitfield;
};
ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
@@ -240,45 +375,69 @@ public:
StartLoc = o.StartLoc;
EndLoc = o.EndLoc;
switch (Kind) {
- case CondCode:
+ case k_CondCode:
CC = o.CC;
break;
- case Token:
+ case k_ITCondMask:
+ ITMask = o.ITMask;
+ break;
+ case k_Token:
Tok = o.Tok;
break;
- case CCOut:
- case Register:
+ case k_CCOut:
+ case k_Register:
Reg = o.Reg;
break;
- case RegisterList:
- case DPRRegisterList:
- case SPRRegisterList:
+ case k_RegisterList:
+ case k_DPRRegisterList:
+ case k_SPRRegisterList:
Registers = o.Registers;
break;
- case CoprocNum:
- case CoprocReg:
+ case k_CoprocNum:
+ case k_CoprocReg:
Cop = o.Cop;
break;
- case Immediate:
+ case k_CoprocOption:
+ CoprocOption = o.CoprocOption;
+ break;
+ case k_Immediate:
Imm = o.Imm;
break;
- case MemBarrierOpt:
+ case k_FPImmediate:
+ FPImm = o.FPImm;
+ break;
+ case k_MemBarrierOpt:
MBOpt = o.MBOpt;
break;
- case Memory:
- Mem = o.Mem;
+ case k_Memory:
+ Memory = o.Memory;
+ break;
+ case k_PostIndexRegister:
+ PostIdxReg = o.PostIdxReg;
break;
- case MSRMask:
+ case k_MSRMask:
MMask = o.MMask;
break;
- case ProcIFlags:
+ case k_ProcIFlags:
IFlags = o.IFlags;
break;
- case Shifter:
- Shift = o.Shift;
+ case k_ShifterImmediate:
+ ShifterImm = o.ShifterImm;
+ break;
+ case k_ShiftedRegister:
+ RegShiftedReg = o.RegShiftedReg;
+ break;
+ case k_ShiftedImmediate:
+ RegShiftedImm = o.RegShiftedImm;
break;
- case ShiftedRegister:
- ShiftedReg = o.ShiftedReg;
+ case k_RotateImmediate:
+ RotImm = o.RotImm;
+ break;
+ case k_BitfieldDescriptor:
+ Bitfield = o.Bitfield;
+ break;
+ case k_VectorIndex:
+ VectorIndex = o.VectorIndex;
break;
}
}
@@ -289,94 +448,96 @@ public:
SMLoc getEndLoc() const { return EndLoc; }
ARMCC::CondCodes getCondCode() const {
- assert(Kind == CondCode && "Invalid access!");
+ assert(Kind == k_CondCode && "Invalid access!");
return CC.Val;
}
unsigned getCoproc() const {
- assert((Kind == CoprocNum || Kind == CoprocReg) && "Invalid access!");
+ assert((Kind == k_CoprocNum || Kind == k_CoprocReg) && "Invalid access!");
return Cop.Val;
}
StringRef getToken() const {
- assert(Kind == Token && "Invalid access!");
+ assert(Kind == k_Token && "Invalid access!");
return StringRef(Tok.Data, Tok.Length);
}
unsigned getReg() const {
- assert((Kind == Register || Kind == CCOut) && "Invalid access!");
+ assert((Kind == k_Register || Kind == k_CCOut) && "Invalid access!");
return Reg.RegNum;
}
const SmallVectorImpl<unsigned> &getRegList() const {
- assert((Kind == RegisterList || Kind == DPRRegisterList ||
- Kind == SPRRegisterList) && "Invalid access!");
+ assert((Kind == k_RegisterList || Kind == k_DPRRegisterList ||
+ Kind == k_SPRRegisterList) && "Invalid access!");
return Registers;
}
const MCExpr *getImm() const {
- assert(Kind == Immediate && "Invalid access!");
+ assert(Kind == k_Immediate && "Invalid access!");
return Imm.Val;
}
+ unsigned getFPImm() const {
+ assert(Kind == k_FPImmediate && "Invalid access!");
+ return FPImm.Val;
+ }
+
+ unsigned getVectorIndex() const {
+ assert(Kind == k_VectorIndex && "Invalid access!");
+ return VectorIndex.Val;
+ }
+
ARM_MB::MemBOpt getMemBarrierOpt() const {
- assert(Kind == MemBarrierOpt && "Invalid access!");
+ assert(Kind == k_MemBarrierOpt && "Invalid access!");
return MBOpt.Val;
}
ARM_PROC::IFlags getProcIFlags() const {
- assert(Kind == ProcIFlags && "Invalid access!");
+ assert(Kind == k_ProcIFlags && "Invalid access!");
return IFlags.Val;
}
unsigned getMSRMask() const {
- assert(Kind == MSRMask && "Invalid access!");
+ assert(Kind == k_MSRMask && "Invalid access!");
return MMask.Val;
}
- /// @name Memory Operand Accessors
- /// @{
- ARMII::AddrMode getMemAddrMode() const {
- return Mem.AddrMode;
- }
- unsigned getMemBaseRegNum() const {
- return Mem.BaseRegNum;
- }
- unsigned getMemOffsetRegNum() const {
- assert(Mem.OffsetIsReg && "Invalid access!");
- return Mem.Offset.RegNum;
- }
- const MCExpr *getMemOffset() const {
- assert(!Mem.OffsetIsReg && "Invalid access!");
- return Mem.Offset.Value;
- }
- unsigned getMemOffsetRegShifted() const {
- assert(Mem.OffsetIsReg && "Invalid access!");
- return Mem.OffsetRegShifted;
+ bool isCoprocNum() const { return Kind == k_CoprocNum; }
+ bool isCoprocReg() const { return Kind == k_CoprocReg; }
+ bool isCoprocOption() const { return Kind == k_CoprocOption; }
+ bool isCondCode() const { return Kind == k_CondCode; }
+ bool isCCOut() const { return Kind == k_CCOut; }
+ bool isITMask() const { return Kind == k_ITCondMask; }
+ bool isITCondCode() const { return Kind == k_CondCode; }
+ bool isImm() const { return Kind == k_Immediate; }
+ bool isFPImm() const { return Kind == k_FPImmediate; }
+ bool isImm8s4() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020;
}
- const MCExpr *getMemShiftAmount() const {
- assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!");
- return Mem.ShiftAmount;
+ bool isImm0_1020s4() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ((Value & 3) == 0) && Value >= 0 && Value <= 1020;
}
- enum ARM_AM::ShiftOpc getMemShiftType() const {
- assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!");
- return Mem.ShiftType;
+ bool isImm0_508s4() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ((Value & 3) == 0) && Value >= 0 && Value <= 508;
}
- bool getMemPreindexed() const { return Mem.Preindexed; }
- bool getMemPostindexed() const { return Mem.Postindexed; }
- bool getMemOffsetIsReg() const { return Mem.OffsetIsReg; }
- bool getMemNegative() const { return Mem.Negative; }
- bool getMemWriteback() const { return Mem.Writeback; }
-
- /// @}
-
- bool isCoprocNum() const { return Kind == CoprocNum; }
- bool isCoprocReg() const { return Kind == CoprocReg; }
- bool isCondCode() const { return Kind == CondCode; }
- bool isCCOut() const { return Kind == CCOut; }
- bool isImm() const { return Kind == Immediate; }
bool isImm0_255() const {
- if (Kind != Immediate)
+ if (Kind != k_Immediate)
return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
@@ -384,7 +545,7 @@ public:
return Value >= 0 && Value < 256;
}
bool isImm0_7() const {
- if (Kind != Immediate)
+ if (Kind != k_Immediate)
return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
@@ -392,130 +553,365 @@ public:
return Value >= 0 && Value < 8;
}
bool isImm0_15() const {
- if (Kind != Immediate)
+ if (Kind != k_Immediate)
return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return Value >= 0 && Value < 16;
}
+ bool isImm0_31() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 32;
+ }
+ bool isImm1_16() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 17;
+ }
+ bool isImm1_32() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 33;
+ }
bool isImm0_65535() const {
- if (Kind != Immediate)
+ if (Kind != k_Immediate)
return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return Value >= 0 && Value < 65536;
}
- bool isT2SOImm() const {
- if (Kind != Immediate)
+ bool isImm0_65535Expr() const {
+ if (Kind != k_Immediate)
return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
+ // If it's not a constant expression, it'll generate a fixup and be
+ // handled later.
+ if (!CE) return true;
int64_t Value = CE->getValue();
- return ARM_AM::getT2SOImmVal(Value) != -1;
+ return Value >= 0 && Value < 65536;
}
- bool isReg() const { return Kind == Register; }
- bool isRegList() const { return Kind == RegisterList; }
- bool isDPRRegList() const { return Kind == DPRRegisterList; }
- bool isSPRRegList() const { return Kind == SPRRegisterList; }
- bool isToken() const { return Kind == Token; }
- bool isMemBarrierOpt() const { return Kind == MemBarrierOpt; }
- bool isMemory() const { return Kind == Memory; }
- bool isShifter() const { return Kind == Shifter; }
- bool isShiftedReg() const { return Kind == ShiftedRegister; }
- bool isMemMode2() const {
- if (getMemAddrMode() != ARMII::AddrMode2)
+ bool isImm24bit() const {
+ if (Kind != k_Immediate)
return false;
-
- if (getMemOffsetIsReg())
- return true;
-
- if (getMemNegative() &&
- !(getMemPostindexed() || getMemPreindexed()))
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value <= 0xffffff;
+ }
+ bool isImmThumbSR() const {
+ if (Kind != k_Immediate)
return false;
-
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
-
- // The offset must be in the range 0-4095 (imm12).
- if (Value > 4095 || Value < -4095)
+ return Value > 0 && Value < 33;
+ }
+ bool isPKHLSLImm() const {
+ if (Kind != k_Immediate)
return false;
-
- return true;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 32;
}
- bool isMemMode3() const {
- if (getMemAddrMode() != ARMII::AddrMode3)
+ bool isPKHASRImm() const {
+ if (Kind != k_Immediate)
return false;
-
- if (getMemOffsetIsReg()) {
- if (getMemOffsetRegShifted())
- return false; // No shift with offset reg allowed
- return true;
- }
-
- if (getMemNegative() &&
- !(getMemPostindexed() || getMemPreindexed()))
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 32;
+ }
+ bool isARMSOImm() const {
+ if (Kind != k_Immediate)
return false;
-
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
-
- // The offset must be in the range 0-255 (imm8).
- if (Value > 255 || Value < -255)
+ return ARM_AM::getSOImmVal(Value) != -1;
+ }
+ bool isT2SOImm() const {
+ if (Kind != k_Immediate)
return false;
-
- return true;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ARM_AM::getT2SOImmVal(Value) != -1;
}
- bool isMemMode5() const {
- if (!isMemory() || getMemOffsetIsReg() || getMemWriteback() ||
- getMemNegative())
+ bool isSetEndImm() const {
+ if (Kind != k_Immediate)
return false;
-
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
-
- // The offset must be a multiple of 4 in the range 0-1020.
int64_t Value = CE->getValue();
- return ((Value & 0x3) == 0 && Value <= 1020 && Value >= -1020);
- }
- bool isMemMode7() const {
- if (!isMemory() ||
- getMemPreindexed() ||
- getMemPostindexed() ||
- getMemOffsetIsReg() ||
- getMemNegative() ||
- getMemWriteback())
+ return Value == 1 || Value == 0;
+ }
+ bool isReg() const { return Kind == k_Register; }
+ bool isRegList() const { return Kind == k_RegisterList; }
+ bool isDPRRegList() const { return Kind == k_DPRRegisterList; }
+ bool isSPRRegList() const { return Kind == k_SPRRegisterList; }
+ bool isToken() const { return Kind == k_Token; }
+ bool isMemBarrierOpt() const { return Kind == k_MemBarrierOpt; }
+ bool isMemory() const { return Kind == k_Memory; }
+ bool isShifterImm() const { return Kind == k_ShifterImmediate; }
+ bool isRegShiftedReg() const { return Kind == k_ShiftedRegister; }
+ bool isRegShiftedImm() const { return Kind == k_ShiftedImmediate; }
+ bool isRotImm() const { return Kind == k_RotateImmediate; }
+ bool isBitfield() const { return Kind == k_BitfieldDescriptor; }
+ bool isPostIdxRegShifted() const { return Kind == k_PostIndexRegister; }
+ bool isPostIdxReg() const {
+ return Kind == k_PostIndexRegister && PostIdxReg.ShiftTy == ARM_AM::no_shift;
+ }
+ bool isMemNoOffset(bool alignOK = false) const {
+ if (!isMemory())
return false;
-
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ // No offset of any kind.
+ return Memory.OffsetRegNum == 0 && Memory.OffsetImm == 0 &&
+ (alignOK || Memory.Alignment == 0);
+ }
+ bool isAlignedMemory() const {
+ return isMemNoOffset(true);
+ }
+ bool isAddrMode2() const {
+ if (!isMemory() || Memory.Alignment != 0) return false;
+ // Check for register offset.
+ if (Memory.OffsetRegNum) return true;
+ // Immediate offset in range [-4095, 4095].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val > -4096 && Val < 4096;
+ }
+ bool isAM2OffsetImm() const {
+ if (Kind != k_Immediate)
+ return false;
+ // Immediate offset in range [-4095, 4095].
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
-
- if (CE->getValue())
+ int64_t Val = CE->getValue();
+ return Val > -4096 && Val < 4096;
+ }
+ bool isAddrMode3() const {
+ if (!isMemory() || Memory.Alignment != 0) return false;
+ // No shifts are legal for AM3.
+ if (Memory.ShiftType != ARM_AM::no_shift) return false;
+ // Check for register offset.
+ if (Memory.OffsetRegNum) return true;
+ // Immediate offset in range [-255, 255].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val > -256 && Val < 256;
+ }
+ bool isAM3Offset() const {
+ if (Kind != k_Immediate && Kind != k_PostIndexRegister)
+ return false;
+ if (Kind == k_PostIndexRegister)
+ return PostIdxReg.ShiftTy == ARM_AM::no_shift;
+ // Immediate offset in range [-255, 255].
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Val = CE->getValue();
+ // Special case, #-0 is INT32_MIN.
+ return (Val > -256 && Val < 256) || Val == INT32_MIN;
+ }
+ bool isAddrMode5() const {
+ if (!isMemory() || Memory.Alignment != 0) return false;
+ // Check for register offset.
+ if (Memory.OffsetRegNum) return false;
+ // Immediate offset in range [-1020, 1020] and a multiple of 4.
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return (Val >= -1020 && Val <= 1020 && ((Val & 3) == 0)) ||
+ Val == INT32_MIN;
+ }
+ bool isMemTBB() const {
+ if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative ||
+ Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0)
+ return false;
+ return true;
+ }
+ bool isMemTBH() const {
+ if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative ||
+ Memory.ShiftType != ARM_AM::lsl || Memory.ShiftImm != 1 ||
+ Memory.Alignment != 0 )
+ return false;
+ return true;
+ }
+ bool isMemRegOffset() const {
+ if (!isMemory() || !Memory.OffsetRegNum || Memory.Alignment != 0)
return false;
-
return true;
}
- bool isMemModeRegThumb() const {
- if (!isMemory() || !getMemOffsetIsReg() || getMemWriteback())
+ bool isT2MemRegOffset() const {
+ if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative ||
+ Memory.Alignment != 0)
+ return false;
+ // Only lsl #{0, 1, 2, 3} allowed.
+ if (Memory.ShiftType == ARM_AM::no_shift)
+ return true;
+ if (Memory.ShiftType != ARM_AM::lsl || Memory.ShiftImm > 3)
return false;
return true;
}
- bool isMemModeImmThumb() const {
- if (!isMemory() || getMemOffsetIsReg() || getMemWriteback())
+ bool isMemThumbRR() const {
+ // Thumb reg+reg addressing is simple. Just two registers, a base and
+ // an offset. No shifts, negations or any other complicating factors.
+ if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative ||
+ Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0)
+ return false;
+ return isARMLowRegister(Memory.BaseRegNum) &&
+ (!Memory.OffsetRegNum || isARMLowRegister(Memory.OffsetRegNum));
+ }
+ bool isMemThumbRIs4() const {
+ if (!isMemory() || Memory.OffsetRegNum != 0 ||
+ !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0)
+ return false;
+ // Immediate offset, multiple of 4 in range [0, 124].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val <= 124 && (Val % 4) == 0;
+ }
+ bool isMemThumbRIs2() const {
+ if (!isMemory() || Memory.OffsetRegNum != 0 ||
+ !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0)
+ return false;
+ // Immediate offset, multiple of 4 in range [0, 62].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val <= 62 && (Val % 2) == 0;
+ }
+ bool isMemThumbRIs1() const {
+ if (!isMemory() || Memory.OffsetRegNum != 0 ||
+ !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0)
+ return false;
+ // Immediate offset in range [0, 31].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val <= 31;
+ }
+ bool isMemThumbSPI() const {
+ if (!isMemory() || Memory.OffsetRegNum != 0 ||
+ Memory.BaseRegNum != ARM::SP || Memory.Alignment != 0)
+ return false;
+ // Immediate offset, multiple of 4 in range [0, 1020].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val <= 1020 && (Val % 4) == 0;
+ }
+ bool isMemImm8s4Offset() const {
+ if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Immediate offset a multiple of 4 in range [-1020, 1020].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= -1020 && Val <= 1020 && (Val & 3) == 0;
+ }
+ bool isMemImm0_1020s4Offset() const {
+ if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Immediate offset a multiple of 4 in range [0, 1020].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val <= 1020 && (Val & 3) == 0;
+ }
+ bool isMemImm8Offset() const {
+ if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Immediate offset in range [-255, 255].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return (Val == INT32_MIN) || (Val > -256 && Val < 256);
+ }
+ bool isMemPosImm8Offset() const {
+ if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Immediate offset in range [0, 255].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val >= 0 && Val < 256;
+ }
+ bool isMemNegImm8Offset() const {
+ if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Immediate offset in range [-255, -1].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return Val > -256 && Val < 0;
+ }
+ bool isMemUImm12Offset() const {
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (Kind == k_Immediate && !isa<MCConstantExpr>(getImm()))
+ return true;
+
+ if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
+ // Immediate offset in range [0, 4095].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return (Val >= 0 && Val < 4096);
+ }
+ bool isMemImm12Offset() const {
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (Kind == k_Immediate && !isa<MCConstantExpr>(getImm()))
+ return true;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Immediate offset in range [-4095, 4095].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return (Val > -4096 && Val < 4096) || (Val == INT32_MIN);
+ }
+ bool isPostIdxImm8() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Val = CE->getValue();
+ return (Val > -256 && Val < 256) || (Val == INT32_MIN);
+ }
+ bool isPostIdxImm8s4() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
+ int64_t Val = CE->getValue();
+ return ((Val & 3) == 0 && Val >= -1020 && Val <= 1020) ||
+ (Val == INT32_MIN);
+ }
- // The offset must be a multiple of 4 in the range 0-124.
- uint64_t Value = CE->getValue();
- return ((Value & 0x3) == 0 && Value <= 124);
+ bool isMSRMask() const { return Kind == k_MSRMask; }
+ bool isProcIFlags() const { return Kind == k_ProcIFlags; }
+
+ bool isVectorIndex8() const {
+ if (Kind != k_VectorIndex) return false;
+ return VectorIndex.Val < 8;
+ }
+ bool isVectorIndex16() const {
+ if (Kind != k_VectorIndex) return false;
+ return VectorIndex.Val < 4;
}
- bool isMSRMask() const { return Kind == MSRMask; }
- bool isProcIFlags() const { return Kind == ProcIFlags; }
+ bool isVectorIndex32() const {
+ if (Kind != k_VectorIndex) return false;
+ return VectorIndex.Val < 2;
+ }
+
+
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
// Add as immediates when possible. Null MCExpr = 0.
@@ -544,6 +940,21 @@ public:
Inst.addOperand(MCOperand::CreateImm(getCoproc()));
}
+ void addCoprocOptionOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(CoprocOption.Val));
+ }
+
+ void addITMaskOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(ITMask.Mask));
+ }
+
+ void addITCondCodeOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode())));
+ }
+
void addCCOutOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(getReg()));
@@ -554,22 +965,27 @@ public:
Inst.addOperand(MCOperand::CreateReg(getReg()));
}
- void addShiftedRegOperands(MCInst &Inst, unsigned N) const {
+ void addRegShiftedRegOperands(MCInst &Inst, unsigned N) const {
assert(N == 3 && "Invalid number of operands!");
- assert(isShiftedReg() && "addShiftedRegOperands() on non ShiftedReg!");
- assert((ShiftedReg.ShiftReg == 0 ||
- ARM_AM::getSORegOffset(ShiftedReg.ShiftImm) == 0) &&
- "Invalid shifted register operand!");
- Inst.addOperand(MCOperand::CreateReg(ShiftedReg.SrcReg));
- Inst.addOperand(MCOperand::CreateReg(ShiftedReg.ShiftReg));
+ assert(isRegShiftedReg() && "addRegShiftedRegOperands() on non RegShiftedReg!");
+ Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.SrcReg));
+ Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.ShiftReg));
Inst.addOperand(MCOperand::CreateImm(
- ARM_AM::getSORegOpc(ShiftedReg.ShiftTy, ShiftedReg.ShiftImm)));
+ ARM_AM::getSORegOpc(RegShiftedReg.ShiftTy, RegShiftedReg.ShiftImm)));
}
- void addShifterOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
+ void addRegShiftedImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ assert(isRegShiftedImm() && "addRegShiftedImmOperands() on non RegShiftedImm!");
+ Inst.addOperand(MCOperand::CreateReg(RegShiftedImm.SrcReg));
Inst.addOperand(MCOperand::CreateImm(
- ARM_AM::getSORegOpc(Shift.ShiftTy, 0)));
+ ARM_AM::getSORegOpc(RegShiftedImm.ShiftTy, RegShiftedImm.ShiftImm)));
+ }
+
+ void addShifterImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm((ShifterImm.isASR << 5) |
+ ShifterImm.Imm));
}
void addRegListOperands(MCInst &Inst, unsigned N) const {
@@ -588,11 +1004,57 @@ public:
addRegListOperands(Inst, N);
}
+ void addRotImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // Encoded as val>>3. The printer handles display as 8, 16, 24.
+ Inst.addOperand(MCOperand::CreateImm(RotImm.Imm >> 3));
+ }
+
+ void addBitfieldOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // Munge the lsb/width into a bitfield mask.
+ unsigned lsb = Bitfield.LSB;
+ unsigned width = Bitfield.Width;
+ // Make a 32-bit mask w/ the referenced bits clear and all other bits set.
+ uint32_t Mask = ~(((uint32_t)0xffffffff >> lsb) << (32 - width) >>
+ (32 - (lsb + width)));
+ Inst.addOperand(MCOperand::CreateImm(Mask));
+ }
+
void addImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
addExpr(Inst, getImm());
}
+ void addFPImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getFPImm()));
+ }
+
+ void addImm8s4Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // FIXME: We really want to scale the value here, but the LDRD/STRD
+ // instruction don't encode operands that way yet.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ }
+
+ void addImm0_1020s4Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate is scaled by four in the encoding and is stored
+ // in the MCInst as such. Lop off the low two bits here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4));
+ }
+
+ void addImm0_508s4Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate is scaled by four in the encoding and is stored
+ // in the MCInst as such. Lop off the low two bits here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4));
+ }
+
void addImm0_255Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
addExpr(Inst, getImm());
@@ -608,137 +1070,344 @@ public:
addExpr(Inst, getImm());
}
+ void addImm0_31Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addImm1_16Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The constant encodes as the immediate-1, and we store in the instruction
+ // the bits as encoded, so subtract off one here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1));
+ }
+
+ void addImm1_32Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The constant encodes as the immediate-1, and we store in the instruction
+ // the bits as encoded, so subtract off one here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1));
+ }
+
void addImm0_65535Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
addExpr(Inst, getImm());
}
+ void addImm0_65535ExprOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addImm24bitOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addImmThumbSROperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The constant encodes as the immediate, except for 32, which encodes as
+ // zero.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Imm = CE->getValue();
+ Inst.addOperand(MCOperand::CreateImm((Imm == 32 ? 0 : Imm)));
+ }
+
+ void addPKHLSLImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addPKHASRImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // An ASR value of 32 encodes as 0, so that's how we want to add it to
+ // the instruction as well.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ int Val = CE->getValue();
+ Inst.addOperand(MCOperand::CreateImm(Val == 32 ? 0 : Val));
+ }
+
+ void addARMSOImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
void addT2SOImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
addExpr(Inst, getImm());
}
+ void addSetEndImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt())));
}
- void addMemMode7Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && isMemMode7() && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
-
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
- (void)CE;
- assert((CE || CE->getValue() == 0) &&
- "No offset operand support in mode 7");
+ void addMemNoOffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
}
- void addMemMode2Operands(MCInst &Inst, unsigned N) const {
- assert(isMemMode2() && "Invalid mode or number of operands!");
- Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
- unsigned IdxMode = (getMemPreindexed() | getMemPostindexed() << 1);
+ void addAlignedMemoryOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Memory.Alignment));
+ }
- if (getMemOffsetIsReg()) {
- Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum()));
+ void addAddrMode2Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands!");
+ int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ if (!Memory.OffsetRegNum) {
+ ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
+ // Special case for #-0
+ if (Val == INT32_MIN) Val = 0;
+ if (Val < 0) Val = -Val;
+ Val = ARM_AM::getAM2Opc(AddSub, Val, ARM_AM::no_shift);
+ } else {
+ // For register offset, we encode the shift type and negation flag
+ // here.
+ Val = ARM_AM::getAM2Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add,
+ Memory.ShiftImm, Memory.ShiftType);
+ }
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
- ARM_AM::AddrOpc AMOpc = getMemNegative() ? ARM_AM::sub : ARM_AM::add;
- ARM_AM::ShiftOpc ShOpc = ARM_AM::no_shift;
- int64_t ShiftAmount = 0;
+ void addAM2OffsetImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ assert(CE && "non-constant AM2OffsetImm operand!");
+ int32_t Val = CE->getValue();
+ ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
+ // Special case for #-0
+ if (Val == INT32_MIN) Val = 0;
+ if (Val < 0) Val = -Val;
+ Val = ARM_AM::getAM2Opc(AddSub, Val, ARM_AM::no_shift);
+ Inst.addOperand(MCOperand::CreateReg(0));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
- if (getMemOffsetRegShifted()) {
- ShOpc = getMemShiftType();
- const MCConstantExpr *CE =
- dyn_cast<MCConstantExpr>(getMemShiftAmount());
- ShiftAmount = CE->getValue();
- }
+ void addAddrMode3Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands!");
+ int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ if (!Memory.OffsetRegNum) {
+ ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
+ // Special case for #-0
+ if (Val == INT32_MIN) Val = 0;
+ if (Val < 0) Val = -Val;
+ Val = ARM_AM::getAM3Opc(AddSub, Val);
+ } else {
+ // For register offset, we encode the shift type and negation flag
+ // here.
+ Val = ARM_AM::getAM3Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add, 0);
+ }
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
- Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(AMOpc, ShiftAmount,
- ShOpc, IdxMode)));
+ void addAM3OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ if (Kind == k_PostIndexRegister) {
+ int32_t Val =
+ ARM_AM::getAM3Opc(PostIdxReg.isAdd ? ARM_AM::add : ARM_AM::sub, 0);
+ Inst.addOperand(MCOperand::CreateReg(PostIdxReg.RegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
return;
}
- // Create a operand placeholder to always yield the same number of operands.
+ // Constant offset.
+ const MCConstantExpr *CE = static_cast<const MCConstantExpr*>(getImm());
+ int32_t Val = CE->getValue();
+ ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
+ // Special case for #-0
+ if (Val == INT32_MIN) Val = 0;
+ if (Val < 0) Val = -Val;
+ Val = ARM_AM::getAM3Opc(AddSub, Val);
Inst.addOperand(MCOperand::CreateReg(0));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
- // FIXME: #-0 is encoded differently than #0. Does the parser preserve
- // the difference?
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
- assert(CE && "Non-constant mode 2 offset operand!");
- int64_t Offset = CE->getValue();
+ void addAddrMode5Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ // The lower two bits are always zero and as such are not encoded.
+ int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() / 4 : 0;
+ ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
+ // Special case for #-0
+ if (Val == INT32_MIN) Val = 0;
+ if (Val < 0) Val = -Val;
+ Val = ARM_AM::getAM5Opc(AddSub, Val);
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
- if (Offset >= 0)
- Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(ARM_AM::add,
- Offset, ARM_AM::no_shift, IdxMode)));
- else
- Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(ARM_AM::sub,
- -Offset, ARM_AM::no_shift, IdxMode)));
+ void addMemImm8s4OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addMemImm0_1020s4OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ // The lower two bits are always zero and as such are not encoded.
+ int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() / 4 : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addMemImm8OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addMemPosImm8OffsetOperands(MCInst &Inst, unsigned N) const {
+ addMemImm8OffsetOperands(Inst, N);
+ }
+
+ void addMemNegImm8OffsetOperands(MCInst &Inst, unsigned N) const {
+ addMemImm8OffsetOperands(Inst, N);
}
- void addMemMode3Operands(MCInst &Inst, unsigned N) const {
- assert(isMemMode3() && "Invalid mode or number of operands!");
- Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
- unsigned IdxMode = (getMemPreindexed() | getMemPostindexed() << 1);
+ void addMemUImm12OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ // If this is an immediate, it's a label reference.
+ if (Kind == k_Immediate) {
+ addExpr(Inst, getImm());
+ Inst.addOperand(MCOperand::CreateImm(0));
+ return;
+ }
- if (getMemOffsetIsReg()) {
- Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum()));
+ // Otherwise, it's a normal memory reg+offset.
+ int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
- ARM_AM::AddrOpc AMOpc = getMemNegative() ? ARM_AM::sub : ARM_AM::add;
- Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(AMOpc, 0,
- IdxMode)));
+ void addMemImm12OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ // If this is an immediate, it's a label reference.
+ if (Kind == k_Immediate) {
+ addExpr(Inst, getImm());
+ Inst.addOperand(MCOperand::CreateImm(0));
return;
}
- // Create a operand placeholder to always yield the same number of operands.
- Inst.addOperand(MCOperand::CreateReg(0));
+ // Otherwise, it's a normal memory reg+offset.
+ int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
- // FIXME: #-0 is encoded differently than #0. Does the parser preserve
- // the difference?
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
- assert(CE && "Non-constant mode 3 offset operand!");
- int64_t Offset = CE->getValue();
+ void addMemTBBOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
+ }
- if (Offset >= 0)
- Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(ARM_AM::add,
- Offset, IdxMode)));
- else
- Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(ARM_AM::sub,
- -Offset, IdxMode)));
+ void addMemTBHOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
}
- void addMemMode5Operands(MCInst &Inst, unsigned N) const {
- assert(N == 2 && isMemMode5() && "Invalid number of operands!");
+ void addMemRegOffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands!");
+ unsigned Val = ARM_AM::getAM2Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add,
+ Memory.ShiftImm, Memory.ShiftType);
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
- Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
- assert(!getMemOffsetIsReg() && "Invalid mode 5 operand");
+ void addT2MemRegOffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Memory.ShiftImm));
+ }
- // FIXME: #-0 is encoded differently than #0. Does the parser preserve
- // the difference?
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
- assert(CE && "Non-constant mode 5 offset operand!");
+ void addMemThumbRROperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
+ }
- // The MCInst offset operand doesn't include the low two bits (like
- // the instruction encoding).
- int64_t Offset = CE->getValue() / 4;
- if (Offset >= 0)
- Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::add,
- Offset)));
- else
- Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::sub,
- -Offset)));
+ void addMemThumbRIs4Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue() / 4) : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
}
- void addMemModeRegThumbOperands(MCInst &Inst, unsigned N) const {
- assert(N == 2 && isMemModeRegThumb() && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
- Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum()));
+ void addMemThumbRIs2Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue() / 2) : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
}
- void addMemModeImmThumbOperands(MCInst &Inst, unsigned N) const {
- assert(N == 2 && isMemModeImmThumb() && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
- assert(CE && "Non-constant mode offset operand!");
- Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ void addMemThumbRIs1Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue()) : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addMemThumbSPIOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue() / 4) : 0;
+ Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
+ void addPostIdxImm8Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ assert(CE && "non-constant post-idx-imm8 operand!");
+ int Imm = CE->getValue();
+ bool isAdd = Imm >= 0;
+ if (Imm == INT32_MIN) Imm = 0;
+ Imm = (Imm < 0 ? -Imm : Imm) | (int)isAdd << 8;
+ Inst.addOperand(MCOperand::CreateImm(Imm));
+ }
+
+ void addPostIdxImm8s4Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ assert(CE && "non-constant post-idx-imm8s4 operand!");
+ int Imm = CE->getValue();
+ bool isAdd = Imm >= 0;
+ if (Imm == INT32_MIN) Imm = 0;
+ // Immediate is scaled by 4.
+ Imm = ((Imm < 0 ? -Imm : Imm) / 4) | (int)isAdd << 8;
+ Inst.addOperand(MCOperand::CreateImm(Imm));
+ }
+
+ void addPostIdxRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(PostIdxReg.RegNum));
+ Inst.addOperand(MCOperand::CreateImm(PostIdxReg.isAdd));
+ }
+
+ void addPostIdxRegShiftedOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(PostIdxReg.RegNum));
+ // The sign, shift type, and shift amount are encoded in a single operand
+ // using the AM2 encoding helpers.
+ ARM_AM::AddrOpc opc = PostIdxReg.isAdd ? ARM_AM::add : ARM_AM::sub;
+ unsigned Imm = ARM_AM::getAM2Opc(opc, PostIdxReg.ShiftImm,
+ PostIdxReg.ShiftTy);
+ Inst.addOperand(MCOperand::CreateImm(Imm));
}
void addMSRMaskOperands(MCInst &Inst, unsigned N) const {
@@ -751,10 +1420,33 @@ public:
Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags())));
}
+ void addVectorIndex8Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
+ }
+
+ void addVectorIndex16Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
+ }
+
+ void addVectorIndex32Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
+ }
+
virtual void print(raw_ostream &OS) const;
+ static ARMOperand *CreateITMask(unsigned Mask, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(k_ITCondMask);
+ Op->ITMask.Mask = Mask;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) {
- ARMOperand *Op = new ARMOperand(CondCode);
+ ARMOperand *Op = new ARMOperand(k_CondCode);
Op->CC.Val = CC;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -762,7 +1454,7 @@ public:
}
static ARMOperand *CreateCoprocNum(unsigned CopVal, SMLoc S) {
- ARMOperand *Op = new ARMOperand(CoprocNum);
+ ARMOperand *Op = new ARMOperand(k_CoprocNum);
Op->Cop.Val = CopVal;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -770,15 +1462,23 @@ public:
}
static ARMOperand *CreateCoprocReg(unsigned CopVal, SMLoc S) {
- ARMOperand *Op = new ARMOperand(CoprocReg);
+ ARMOperand *Op = new ARMOperand(k_CoprocReg);
Op->Cop.Val = CopVal;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
+ static ARMOperand *CreateCoprocOption(unsigned Val, SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_CoprocOption);
+ Op->Cop.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
static ARMOperand *CreateCCOut(unsigned RegNum, SMLoc S) {
- ARMOperand *Op = new ARMOperand(CCOut);
+ ARMOperand *Op = new ARMOperand(k_CCOut);
Op->Reg.RegNum = RegNum;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -786,7 +1486,7 @@ public:
}
static ARMOperand *CreateToken(StringRef Str, SMLoc S) {
- ARMOperand *Op = new ARMOperand(Token);
+ ARMOperand *Op = new ARMOperand(k_Token);
Op->Tok.Data = Str.data();
Op->Tok.Length = Str.size();
Op->StartLoc = S;
@@ -795,7 +1495,7 @@ public:
}
static ARMOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(Register);
+ ARMOperand *Op = new ARMOperand(k_Register);
Op->Reg.RegNum = RegNum;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -807,20 +1507,52 @@ public:
unsigned ShiftReg,
unsigned ShiftImm,
SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(ShiftedRegister);
- Op->ShiftedReg.ShiftTy = ShTy;
- Op->ShiftedReg.SrcReg = SrcReg;
- Op->ShiftedReg.ShiftReg = ShiftReg;
- Op->ShiftedReg.ShiftImm = ShiftImm;
+ ARMOperand *Op = new ARMOperand(k_ShiftedRegister);
+ Op->RegShiftedReg.ShiftTy = ShTy;
+ Op->RegShiftedReg.SrcReg = SrcReg;
+ Op->RegShiftedReg.ShiftReg = ShiftReg;
+ Op->RegShiftedReg.ShiftImm = ShiftImm;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateShiftedImmediate(ARM_AM::ShiftOpc ShTy,
+ unsigned SrcReg,
+ unsigned ShiftImm,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_ShiftedImmediate);
+ Op->RegShiftedImm.ShiftTy = ShTy;
+ Op->RegShiftedImm.SrcReg = SrcReg;
+ Op->RegShiftedImm.ShiftImm = ShiftImm;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static ARMOperand *CreateShifter(ARM_AM::ShiftOpc ShTy,
+ static ARMOperand *CreateShifterImm(bool isASR, unsigned Imm,
SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(Shifter);
- Op->Shift.ShiftTy = ShTy;
+ ARMOperand *Op = new ARMOperand(k_ShifterImmediate);
+ Op->ShifterImm.isASR = isASR;
+ Op->ShifterImm.Imm = Imm;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateRotImm(unsigned Imm, SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_RotateImmediate);
+ Op->RotImm.Imm = Imm;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateBitfield(unsigned LSB, unsigned Width,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_BitfieldDescriptor);
+ Op->Bitfield.LSB = LSB;
+ Op->Bitfield.Width = Width;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
@@ -829,12 +1561,13 @@ public:
static ARMOperand *
CreateRegList(const SmallVectorImpl<std::pair<unsigned, SMLoc> > &Regs,
SMLoc StartLoc, SMLoc EndLoc) {
- KindTy Kind = RegisterList;
+ KindTy Kind = k_RegisterList;
- if (ARM::DPRRegClass.contains(Regs.front().first))
- Kind = DPRRegisterList;
- else if (ARM::SPRRegClass.contains(Regs.front().first))
- Kind = SPRRegisterList;
+ if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Regs.front().first))
+ Kind = k_DPRRegisterList;
+ else if (ARMMCRegisterClasses[ARM::SPRRegClassID].
+ contains(Regs.front().first))
+ Kind = k_SPRRegisterList;
ARMOperand *Op = new ARMOperand(Kind);
for (SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator
@@ -846,55 +1579,68 @@ public:
return Op;
}
+ static ARMOperand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E,
+ MCContext &Ctx) {
+ ARMOperand *Op = new ARMOperand(k_VectorIndex);
+ Op->VectorIndex.Val = Idx;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(Immediate);
+ ARMOperand *Op = new ARMOperand(k_Immediate);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static ARMOperand *CreateMem(ARMII::AddrMode AddrMode, unsigned BaseRegNum,
- bool OffsetIsReg, const MCExpr *Offset,
- int OffsetRegNum, bool OffsetRegShifted,
- enum ARM_AM::ShiftOpc ShiftType,
- const MCExpr *ShiftAmount, bool Preindexed,
- bool Postindexed, bool Negative, bool Writeback,
+ static ARMOperand *CreateFPImm(unsigned Val, SMLoc S, MCContext &Ctx) {
+ ARMOperand *Op = new ARMOperand(k_FPImmediate);
+ Op->FPImm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateMem(unsigned BaseRegNum,
+ const MCConstantExpr *OffsetImm,
+ unsigned OffsetRegNum,
+ ARM_AM::ShiftOpc ShiftType,
+ unsigned ShiftImm,
+ unsigned Alignment,
+ bool isNegative,
SMLoc S, SMLoc E) {
- assert((OffsetRegNum == -1 || OffsetIsReg) &&
- "OffsetRegNum must imply OffsetIsReg!");
- assert((!OffsetRegShifted || OffsetIsReg) &&
- "OffsetRegShifted must imply OffsetIsReg!");
- assert((Offset || OffsetIsReg) &&
- "Offset must exists unless register offset is used!");
- assert((!ShiftAmount || (OffsetIsReg && OffsetRegShifted)) &&
- "Cannot have shift amount without shifted register offset!");
- assert((!Offset || !OffsetIsReg) &&
- "Cannot have expression offset and register offset!");
-
- ARMOperand *Op = new ARMOperand(Memory);
- Op->Mem.AddrMode = AddrMode;
- Op->Mem.BaseRegNum = BaseRegNum;
- Op->Mem.OffsetIsReg = OffsetIsReg;
- if (OffsetIsReg)
- Op->Mem.Offset.RegNum = OffsetRegNum;
- else
- Op->Mem.Offset.Value = Offset;
- Op->Mem.OffsetRegShifted = OffsetRegShifted;
- Op->Mem.ShiftType = ShiftType;
- Op->Mem.ShiftAmount = ShiftAmount;
- Op->Mem.Preindexed = Preindexed;
- Op->Mem.Postindexed = Postindexed;
- Op->Mem.Negative = Negative;
- Op->Mem.Writeback = Writeback;
+ ARMOperand *Op = new ARMOperand(k_Memory);
+ Op->Memory.BaseRegNum = BaseRegNum;
+ Op->Memory.OffsetImm = OffsetImm;
+ Op->Memory.OffsetRegNum = OffsetRegNum;
+ Op->Memory.ShiftType = ShiftType;
+ Op->Memory.ShiftImm = ShiftImm;
+ Op->Memory.Alignment = Alignment;
+ Op->Memory.isNegative = isNegative;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+ static ARMOperand *CreatePostIdxReg(unsigned RegNum, bool isAdd,
+ ARM_AM::ShiftOpc ShiftTy,
+ unsigned ShiftImm,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_PostIndexRegister);
+ Op->PostIdxReg.RegNum = RegNum;
+ Op->PostIdxReg.isAdd = isAdd;
+ Op->PostIdxReg.ShiftTy = ShiftTy;
+ Op->PostIdxReg.ShiftImm = ShiftImm;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
static ARMOperand *CreateMemBarrierOpt(ARM_MB::MemBOpt Opt, SMLoc S) {
- ARMOperand *Op = new ARMOperand(MemBarrierOpt);
+ ARMOperand *Op = new ARMOperand(k_MemBarrierOpt);
Op->MBOpt.Val = Opt;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -902,7 +1648,7 @@ public:
}
static ARMOperand *CreateProcIFlags(ARM_PROC::IFlags IFlags, SMLoc S) {
- ARMOperand *Op = new ARMOperand(ProcIFlags);
+ ARMOperand *Op = new ARMOperand(k_ProcIFlags);
Op->IFlags.Val = IFlags;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -910,7 +1656,7 @@ public:
}
static ARMOperand *CreateMSRMask(unsigned MMask, SMLoc S) {
- ARMOperand *Op = new ARMOperand(MSRMask);
+ ARMOperand *Op = new ARMOperand(k_MSRMask);
Op->MMask.Val = MMask;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -922,53 +1668,56 @@ public:
void ARMOperand::print(raw_ostream &OS) const {
switch (Kind) {
- case CondCode:
+ case k_FPImmediate:
+ OS << "<fpimm " << getFPImm() << "(" << ARM_AM::getFPImmFloat(getFPImm())
+ << ") >";
+ break;
+ case k_CondCode:
OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">";
break;
- case CCOut:
+ case k_CCOut:
OS << "<ccout " << getReg() << ">";
break;
- case CoprocNum:
+ case k_ITCondMask: {
+ static char MaskStr[][6] = { "()", "(t)", "(e)", "(tt)", "(et)", "(te)",
+ "(ee)", "(ttt)", "(ett)", "(tet)", "(eet)", "(tte)", "(ete)",
+ "(tee)", "(eee)" };
+ assert((ITMask.Mask & 0xf) == ITMask.Mask);
+ OS << "<it-mask " << MaskStr[ITMask.Mask] << ">";
+ break;
+ }
+ case k_CoprocNum:
OS << "<coprocessor number: " << getCoproc() << ">";
break;
- case CoprocReg:
+ case k_CoprocReg:
OS << "<coprocessor register: " << getCoproc() << ">";
break;
- case MSRMask:
+ case k_CoprocOption:
+ OS << "<coprocessor option: " << CoprocOption.Val << ">";
+ break;
+ case k_MSRMask:
OS << "<mask: " << getMSRMask() << ">";
break;
- case Immediate:
+ case k_Immediate:
getImm()->print(OS);
break;
- case MemBarrierOpt:
+ case k_MemBarrierOpt:
OS << "<ARM_MB::" << MemBOptToString(getMemBarrierOpt()) << ">";
break;
- case Memory:
+ case k_Memory:
OS << "<memory "
- << "am:" << ARMII::AddrModeToString(getMemAddrMode())
- << " base:" << getMemBaseRegNum();
- if (getMemOffsetIsReg()) {
- OS << " offset:<register " << getMemOffsetRegNum();
- if (getMemOffsetRegShifted()) {
- OS << " offset-shift-type:" << getMemShiftType();
- OS << " offset-shift-amount:" << *getMemShiftAmount();
- }
- } else {
- OS << " offset:" << *getMemOffset();
- }
- if (getMemOffsetIsReg())
- OS << " (offset-is-reg)";
- if (getMemPreindexed())
- OS << " (pre-indexed)";
- if (getMemPostindexed())
- OS << " (post-indexed)";
- if (getMemNegative())
- OS << " (negative)";
- if (getMemWriteback())
- OS << " (writeback)";
+ << " base:" << Memory.BaseRegNum;
+ OS << ">";
+ break;
+ case k_PostIndexRegister:
+ OS << "post-idx register " << (PostIdxReg.isAdd ? "" : "-")
+ << PostIdxReg.RegNum;
+ if (PostIdxReg.ShiftTy != ARM_AM::no_shift)
+ OS << ARM_AM::getShiftOpcStr(PostIdxReg.ShiftTy) << " "
+ << PostIdxReg.ShiftImm;
OS << ">";
break;
- case ProcIFlags: {
+ case k_ProcIFlags: {
OS << "<ARM_PROC::";
unsigned IFlags = getProcIFlags();
for (int i=2; i >= 0; --i)
@@ -977,23 +1726,38 @@ void ARMOperand::print(raw_ostream &OS) const {
OS << ">";
break;
}
- case Register:
+ case k_Register:
OS << "<register " << getReg() << ">";
break;
- case Shifter:
- OS << "<shifter " << ARM_AM::getShiftOpcStr(Shift.ShiftTy) << ">";
+ case k_ShifterImmediate:
+ OS << "<shift " << (ShifterImm.isASR ? "asr" : "lsl")
+ << " #" << ShifterImm.Imm << ">";
+ break;
+ case k_ShiftedRegister:
+ OS << "<so_reg_reg "
+ << RegShiftedReg.SrcReg
+ << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(RegShiftedReg.ShiftImm))
+ << ", " << RegShiftedReg.ShiftReg << ", "
+ << ARM_AM::getSORegOffset(RegShiftedReg.ShiftImm)
+ << ">";
break;
- case ShiftedRegister:
- OS << "<so_reg"
- << ShiftedReg.SrcReg
- << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(ShiftedReg.ShiftImm))
- << ", " << ShiftedReg.ShiftReg << ", "
- << ARM_AM::getSORegOffset(ShiftedReg.ShiftImm)
+ case k_ShiftedImmediate:
+ OS << "<so_reg_imm "
+ << RegShiftedImm.SrcReg
+ << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(RegShiftedImm.ShiftImm))
+ << ", " << ARM_AM::getSORegOffset(RegShiftedImm.ShiftImm)
<< ">";
break;
- case RegisterList:
- case DPRRegisterList:
- case SPRRegisterList: {
+ case k_RotateImmediate:
+ OS << "<ror " << " #" << (RotImm.Imm * 8) << ">";
+ break;
+ case k_BitfieldDescriptor:
+ OS << "<bitfield " << "lsb: " << Bitfield.LSB
+ << ", width: " << Bitfield.Width << ">";
+ break;
+ case k_RegisterList:
+ case k_DPRRegisterList:
+ case k_SPRRegisterList: {
OS << "<register_list ";
const SmallVectorImpl<unsigned> &RegList = getRegList();
@@ -1006,9 +1770,12 @@ void ARMOperand::print(raw_ostream &OS) const {
OS << ">";
break;
}
- case Token:
+ case k_Token:
OS << "'" << getToken() << "'";
break;
+ case k_VectorIndex:
+ OS << "<vectorindex " << getVectorIndex() << ">";
+ break;
}
}
@@ -1021,7 +1788,7 @@ static unsigned MatchRegisterName(StringRef Name);
bool ARMAsmParser::ParseRegister(unsigned &RegNo,
SMLoc &StartLoc, SMLoc &EndLoc) {
- RegNo = TryParseRegister();
+ RegNo = tryParseRegister();
return (RegNo == (unsigned)-1);
}
@@ -1030,9 +1797,9 @@ bool ARMAsmParser::ParseRegister(unsigned &RegNo,
/// and if it is a register name the token is eaten and the register number is
/// returned. Otherwise return -1.
///
-int ARMAsmParser::TryParseRegister() {
+int ARMAsmParser::tryParseRegister() {
const AsmToken &Tok = Parser.getTok();
- assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+ if (Tok.isNot(AsmToken::Identifier)) return -1;
// FIXME: Validate register for the current architecture; we have to do
// validation later, so maybe there is no need for this here.
@@ -1050,6 +1817,39 @@ int ARMAsmParser::TryParseRegister() {
if (!RegNum) return -1;
Parser.Lex(); // Eat identifier token.
+
+#if 0
+ // Also check for an index operand. This is only legal for vector registers,
+ // but that'll get caught OK in operand matching, so we don't need to
+ // explicitly filter everything else out here.
+ if (Parser.getTok().is(AsmToken::LBrac)) {
+ SMLoc SIdx = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat left bracket token.
+
+ const MCExpr *ImmVal;
+ SMLoc ExprLoc = Parser.getTok().getLoc();
+ if (getParser().ParseExpression(ImmVal))
+ return MatchOperand_ParseFail;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
+ if (!MCE) {
+ TokError("immediate value expected for vector index");
+ return MatchOperand_ParseFail;
+ }
+
+ SMLoc E = Parser.getTok().getLoc();
+ if (Parser.getTok().isNot(AsmToken::RBrac)) {
+ Error(E, "']' expected");
+ return MatchOperand_ParseFail;
+ }
+
+ Parser.Lex(); // Eat right bracket token.
+
+ Operands.push_back(ARMOperand::CreateVectorIndex(MCE->getValue(),
+ SIdx, E,
+ getContext()));
+ }
+#endif
+
return RegNum;
}
@@ -1058,7 +1858,7 @@ int ARMAsmParser::TryParseRegister() {
// occurs, return -1. An irrecoverable error is one where tokens have been
// consumed in the process of trying to parse the shifter (i.e., when it is
// indeed a shifter operand, but malformed).
-int ARMAsmParser::TryParseShiftRegister(
+int ARMAsmParser::tryParseShiftRegister(
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
@@ -1120,7 +1920,7 @@ int ARMAsmParser::TryParseShiftRegister(
return -1;
}
} else if (Parser.getTok().is(AsmToken::Identifier)) {
- ShiftReg = TryParseRegister();
+ ShiftReg = tryParseRegister();
SMLoc L = Parser.getTok().getLoc();
if (ShiftReg == -1) {
Error (L, "expected immediate or register in shift operand");
@@ -1133,8 +1933,12 @@ int ARMAsmParser::TryParseShiftRegister(
}
}
- Operands.push_back(ARMOperand::CreateShiftedRegister(ShiftTy, SrcReg,
- ShiftReg, Imm,
+ if (ShiftReg && ShiftTy != ARM_AM::rrx)
+ Operands.push_back(ARMOperand::CreateShiftedRegister(ShiftTy, SrcReg,
+ ShiftReg, Imm,
+ S, Parser.getTok().getLoc()));
+ else
+ Operands.push_back(ARMOperand::CreateShiftedImmediate(ShiftTy, SrcReg, Imm,
S, Parser.getTok().getLoc()));
return 0;
@@ -1148,9 +1952,9 @@ int ARMAsmParser::TryParseShiftRegister(
/// TODO this is likely to change to allow different register types and or to
/// parse for a specific register type.
bool ARMAsmParser::
-TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
- int RegNo = TryParseRegister();
+ int RegNo = tryParseRegister();
if (RegNo == -1)
return true;
@@ -1161,6 +1965,37 @@ TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Operands.push_back(ARMOperand::CreateToken(ExclaimTok.getString(),
ExclaimTok.getLoc()));
Parser.Lex(); // Eat exclaim token
+ return false;
+ }
+
+ // Also check for an index operand. This is only legal for vector registers,
+ // but that'll get caught OK in operand matching, so we don't need to
+ // explicitly filter everything else out here.
+ if (Parser.getTok().is(AsmToken::LBrac)) {
+ SMLoc SIdx = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat left bracket token.
+
+ const MCExpr *ImmVal;
+ SMLoc ExprLoc = Parser.getTok().getLoc();
+ if (getParser().ParseExpression(ImmVal))
+ return MatchOperand_ParseFail;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
+ if (!MCE) {
+ TokError("immediate value expected for vector index");
+ return MatchOperand_ParseFail;
+ }
+
+ SMLoc E = Parser.getTok().getLoc();
+ if (Parser.getTok().isNot(AsmToken::RBrac)) {
+ Error(E, "']' expected");
+ return MatchOperand_ParseFail;
+ }
+
+ Parser.Lex(); // Eat right bracket token.
+
+ Operands.push_back(ARMOperand::CreateVectorIndex(MCE->getValue(),
+ SIdx, E,
+ getContext()));
}
return false;
@@ -1209,14 +2044,50 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
return -1;
}
-/// tryParseCoprocNumOperand - Try to parse an coprocessor number operand. The
+/// parseITCondCode - Try to parse a condition code for an IT instruction.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseITCondCode(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ if (!Tok.is(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
+ unsigned CC = StringSwitch<unsigned>(Tok.getString())
+ .Case("eq", ARMCC::EQ)
+ .Case("ne", ARMCC::NE)
+ .Case("hs", ARMCC::HS)
+ .Case("cs", ARMCC::HS)
+ .Case("lo", ARMCC::LO)
+ .Case("cc", ARMCC::LO)
+ .Case("mi", ARMCC::MI)
+ .Case("pl", ARMCC::PL)
+ .Case("vs", ARMCC::VS)
+ .Case("vc", ARMCC::VC)
+ .Case("hi", ARMCC::HI)
+ .Case("ls", ARMCC::LS)
+ .Case("ge", ARMCC::GE)
+ .Case("lt", ARMCC::LT)
+ .Case("gt", ARMCC::GT)
+ .Case("le", ARMCC::LE)
+ .Case("al", ARMCC::AL)
+ .Default(~0U);
+ if (CC == ~0U)
+ return MatchOperand_NoMatch;
+ Parser.Lex(); // Eat the token.
+
+ Operands.push_back(ARMOperand::CreateCondCode(ARMCC::CondCodes(CC), S));
+
+ return MatchOperand_Success;
+}
+
+/// parseCoprocNumOperand - Try to parse an coprocessor number operand. The
/// token must be an Identifier when called, and if it is a coprocessor
/// number, the token is eaten and the operand is added to the operand list.
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-tryParseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+parseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
- assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+ if (Tok.isNot(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
int Num = MatchCoprocessorOperandName(Tok.getString(), 'p');
if (Num == -1)
@@ -1227,14 +2098,15 @@ tryParseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_Success;
}
-/// tryParseCoprocRegOperand - Try to parse an coprocessor register operand. The
+/// parseCoprocRegOperand - Try to parse an coprocessor register operand. The
/// token must be an Identifier when called, and if it is a coprocessor
/// number, the token is eaten and the operand is added to the operand list.
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-tryParseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+parseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
- assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+ if (Tok.isNot(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
int Reg = MatchCoprocessorOperandName(Tok.getString(), 'c');
if (Reg == -1)
@@ -1245,93 +2117,155 @@ tryParseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_Success;
}
-/// Parse a register list, return it if successful else return null. The first
-/// token must be a '{' when called.
-bool ARMAsmParser::
-ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- assert(Parser.getTok().is(AsmToken::LCurly) &&
- "Token is not a Left Curly Brace");
+/// parseCoprocOptionOperand - Try to parse an coprocessor option operand.
+/// coproc_option : '{' imm0_255 '}'
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseCoprocOptionOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
- // Read the rest of the registers in the list.
- unsigned PrevRegNum = 0;
- SmallVector<std::pair<unsigned, SMLoc>, 32> Registers;
-
- do {
- bool IsRange = Parser.getTok().is(AsmToken::Minus);
- Parser.Lex(); // Eat non-identifier token.
-
- const AsmToken &RegTok = Parser.getTok();
- SMLoc RegLoc = RegTok.getLoc();
- if (RegTok.isNot(AsmToken::Identifier)) {
- Error(RegLoc, "register expected");
- return true;
- }
-
- int RegNum = TryParseRegister();
- if (RegNum == -1) {
- Error(RegLoc, "register expected");
- return true;
- }
-
- if (IsRange) {
- int Reg = PrevRegNum;
- do {
- ++Reg;
- Registers.push_back(std::make_pair(Reg, RegLoc));
- } while (Reg != RegNum);
- } else {
- Registers.push_back(std::make_pair(RegNum, RegLoc));
- }
-
- PrevRegNum = RegNum;
- } while (Parser.getTok().is(AsmToken::Comma) ||
- Parser.getTok().is(AsmToken::Minus));
+ // If this isn't a '{', this isn't a coprocessor immediate operand.
+ if (Parser.getTok().isNot(AsmToken::LCurly))
+ return MatchOperand_NoMatch;
+ Parser.Lex(); // Eat the '{'
- // Process the right curly brace of the list.
- const AsmToken &RCurlyTok = Parser.getTok();
- if (RCurlyTok.isNot(AsmToken::RCurly)) {
- Error(RCurlyTok.getLoc(), "'}' expected");
- return true;
+ const MCExpr *Expr;
+ SMLoc Loc = Parser.getTok().getLoc();
+ if (getParser().ParseExpression(Expr)) {
+ Error(Loc, "illegal expression");
+ return MatchOperand_ParseFail;
}
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr);
+ if (!CE || CE->getValue() < 0 || CE->getValue() > 255) {
+ Error(Loc, "coprocessor option must be an immediate in range [0, 255]");
+ return MatchOperand_ParseFail;
+ }
+ int Val = CE->getValue();
- SMLoc E = RCurlyTok.getLoc();
- Parser.Lex(); // Eat right curly brace token.
-
- // Verify the register list.
- SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator
- RI = Registers.begin(), RE = Registers.end();
+ // Check for and consume the closing '}'
+ if (Parser.getTok().isNot(AsmToken::RCurly))
+ return MatchOperand_ParseFail;
+ SMLoc E = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat the '}'
- unsigned HighRegNum = getARMRegisterNumbering(RI->first);
- bool EmittedWarning = false;
+ Operands.push_back(ARMOperand::CreateCoprocOption(Val, S, E));
+ return MatchOperand_Success;
+}
- DenseMap<unsigned, bool> RegMap;
- RegMap[HighRegNum] = true;
+// For register list parsing, we need to map from raw GPR register numbering
+// to the enumeration values. The enumeration values aren't sorted by
+// register number due to our using "sp", "lr" and "pc" as canonical names.
+static unsigned getNextRegister(unsigned Reg) {
+ // If this is a GPR, we need to do it manually, otherwise we can rely
+ // on the sort ordering of the enumeration since the other reg-classes
+ // are sane.
+ if (!ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
+ return Reg + 1;
+ switch(Reg) {
+ default: assert(0 && "Invalid GPR number!");
+ case ARM::R0: return ARM::R1; case ARM::R1: return ARM::R2;
+ case ARM::R2: return ARM::R3; case ARM::R3: return ARM::R4;
+ case ARM::R4: return ARM::R5; case ARM::R5: return ARM::R6;
+ case ARM::R6: return ARM::R7; case ARM::R7: return ARM::R8;
+ case ARM::R8: return ARM::R9; case ARM::R9: return ARM::R10;
+ case ARM::R10: return ARM::R11; case ARM::R11: return ARM::R12;
+ case ARM::R12: return ARM::SP; case ARM::SP: return ARM::LR;
+ case ARM::LR: return ARM::PC; case ARM::PC: return ARM::R0;
+ }
+}
- for (++RI; RI != RE; ++RI) {
- const std::pair<unsigned, SMLoc> &RegInfo = *RI;
- unsigned Reg = getARMRegisterNumbering(RegInfo.first);
+/// Parse a register list.
+bool ARMAsmParser::
+parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ assert(Parser.getTok().is(AsmToken::LCurly) &&
+ "Token is not a Left Curly Brace");
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat '{' token.
+ SMLoc RegLoc = Parser.getTok().getLoc();
- if (RegMap[Reg]) {
- Error(RegInfo.second, "register duplicated in register list");
- return true;
+ // Check the first register in the list to see what register class
+ // this is a list of.
+ int Reg = tryParseRegister();
+ if (Reg == -1)
+ return Error(RegLoc, "register expected");
+
+ MCRegisterClass *RC;
+ if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
+ RC = &ARMMCRegisterClasses[ARM::GPRRegClassID];
+ else if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg))
+ RC = &ARMMCRegisterClasses[ARM::DPRRegClassID];
+ else if (ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg))
+ RC = &ARMMCRegisterClasses[ARM::SPRRegClassID];
+ else
+ return Error(RegLoc, "invalid register in register list");
+
+ // The reglist instructions have at most 16 registers, so reserve
+ // space for that many.
+ SmallVector<std::pair<unsigned, SMLoc>, 16> Registers;
+ // Store the first register.
+ Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc));
+
+ // This starts immediately after the first register token in the list,
+ // so we can see either a comma or a minus (range separator) as a legal
+ // next token.
+ while (Parser.getTok().is(AsmToken::Comma) ||
+ Parser.getTok().is(AsmToken::Minus)) {
+ if (Parser.getTok().is(AsmToken::Minus)) {
+ Parser.Lex(); // Eat the comma.
+ SMLoc EndLoc = Parser.getTok().getLoc();
+ int EndReg = tryParseRegister();
+ if (EndReg == -1)
+ return Error(EndLoc, "register expected");
+ // If the register is the same as the start reg, there's nothing
+ // more to do.
+ if (Reg == EndReg)
+ continue;
+ // The register must be in the same register class as the first.
+ if (!RC->contains(EndReg))
+ return Error(EndLoc, "invalid register in register list");
+ // Ranges must go from low to high.
+ if (getARMRegisterNumbering(Reg) > getARMRegisterNumbering(EndReg))
+ return Error(EndLoc, "bad range in register list");
+
+ // Add all the registers in the range to the register list.
+ while (Reg != EndReg) {
+ Reg = getNextRegister(Reg);
+ Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc));
+ }
+ continue;
}
-
- if (!EmittedWarning && Reg < HighRegNum)
- Warning(RegInfo.second,
- "register not in ascending order in register list");
-
- RegMap[Reg] = true;
- HighRegNum = std::max(Reg, HighRegNum);
+ Parser.Lex(); // Eat the comma.
+ RegLoc = Parser.getTok().getLoc();
+ int OldReg = Reg;
+ Reg = tryParseRegister();
+ if (Reg == -1)
+ return Error(RegLoc, "register expected");
+ // The register must be in the same register class as the first.
+ if (!RC->contains(Reg))
+ return Error(RegLoc, "invalid register in register list");
+ // List must be monotonically increasing.
+ if (getARMRegisterNumbering(Reg) <= getARMRegisterNumbering(OldReg))
+ return Error(RegLoc, "register list not in ascending order");
+ // VFP register lists must also be contiguous.
+ // It's OK to use the enumeration values directly here rather, as the
+ // VFP register classes have the enum sorted properly.
+ if (RC != &ARMMCRegisterClasses[ARM::GPRRegClassID] &&
+ Reg != OldReg + 1)
+ return Error(RegLoc, "non-contiguous register range");
+ Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc));
}
+ SMLoc E = Parser.getTok().getLoc();
+ if (Parser.getTok().isNot(AsmToken::RCurly))
+ return Error(E, "'}' expected");
+ Parser.Lex(); // Eat '}' token.
+
Operands.push_back(ARMOperand::CreateRegList(Registers, S, E));
return false;
}
-/// tryParseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options.
+/// parseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options.
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-tryParseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
@@ -1360,28 +2294,32 @@ tryParseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_Success;
}
-/// tryParseProcIFlagsOperand - Try to parse iflags from CPS instruction.
+/// parseProcIFlagsOperand - Try to parse iflags from CPS instruction.
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-tryParseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+parseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
StringRef IFlagsStr = Tok.getString();
+ // An iflags string of "none" is interpreted to mean that none of the AIF
+ // bits are set. Not a terribly useful instruction, but a valid encoding.
unsigned IFlags = 0;
- for (int i = 0, e = IFlagsStr.size(); i != e; ++i) {
- unsigned Flag = StringSwitch<unsigned>(IFlagsStr.substr(i, 1))
- .Case("a", ARM_PROC::A)
- .Case("i", ARM_PROC::I)
- .Case("f", ARM_PROC::F)
- .Default(~0U);
-
- // If some specific iflag is already set, it means that some letter is
- // present more than once, this is not acceptable.
- if (Flag == ~0U || (IFlags & Flag))
- return MatchOperand_NoMatch;
+ if (IFlagsStr != "none") {
+ for (int i = 0, e = IFlagsStr.size(); i != e; ++i) {
+ unsigned Flag = StringSwitch<unsigned>(IFlagsStr.substr(i, 1))
+ .Case("a", ARM_PROC::A)
+ .Case("i", ARM_PROC::I)
+ .Case("f", ARM_PROC::F)
+ .Default(~0U);
+
+ // If some specific iflag is already set, it means that some letter is
+ // present more than once, this is not acceptable.
+ if (Flag == ~0U || (IFlags & Flag))
+ return MatchOperand_NoMatch;
- IFlags |= Flag;
+ IFlags |= Flag;
+ }
}
Parser.Lex(); // Eat identifier token.
@@ -1389,18 +2327,49 @@ tryParseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_Success;
}
-/// tryParseMSRMaskOperand - Try to parse mask flags from MSR instruction.
+/// parseMSRMaskOperand - Try to parse mask flags from MSR instruction.
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-tryParseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
StringRef Mask = Tok.getString();
+ if (isMClass()) {
+ // See ARMv6-M 10.1.1
+ unsigned FlagsVal = StringSwitch<unsigned>(Mask)
+ .Case("apsr", 0)
+ .Case("iapsr", 1)
+ .Case("eapsr", 2)
+ .Case("xpsr", 3)
+ .Case("ipsr", 5)
+ .Case("epsr", 6)
+ .Case("iepsr", 7)
+ .Case("msp", 8)
+ .Case("psp", 9)
+ .Case("primask", 16)
+ .Case("basepri", 17)
+ .Case("basepri_max", 18)
+ .Case("faultmask", 19)
+ .Case("control", 20)
+ .Default(~0U);
+
+ if (FlagsVal == ~0U)
+ return MatchOperand_NoMatch;
+
+ if (!hasV7Ops() && FlagsVal >= 17 && FlagsVal <= 19)
+ // basepri, basepri_max and faultmask only valid for V7m.
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateMSRMask(FlagsVal, S));
+ return MatchOperand_Success;
+ }
+
// Split spec_reg from flag, example: CPSR_sxf => "CPSR" and "sxf"
size_t Start = 0, Next = Mask.find('_');
StringRef Flags = "";
- StringRef SpecReg = Mask.slice(Start, Next);
+ std::string SpecReg = LowercaseString(Mask.slice(Start, Next));
if (Next != StringRef::npos)
Flags = Mask.slice(Next+1, Mask.size());
@@ -1411,7 +2380,7 @@ tryParseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (SpecReg == "apsr") {
FlagsVal = StringSwitch<unsigned>(Flags)
- .Case("nzcvq", 0x8) // same as CPSR_c
+ .Case("nzcvq", 0x8) // same as CPSR_f
.Case("g", 0x4) // same as CPSR_s
.Case("nzcvqg", 0xc) // same as CPSR_fs
.Default(~0U);
@@ -1420,7 +2389,7 @@ tryParseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (!Flags.empty())
return MatchOperand_NoMatch;
else
- FlagsVal = 0; // No flag
+ FlagsVal = 8; // No flag
}
} else if (SpecReg == "cpsr" || SpecReg == "spsr") {
if (Flags == "all") // cpsr_all is an alias for cpsr_fc
@@ -1455,96 +2424,680 @@ tryParseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_Success;
}
-/// tryParseMemMode2Operand - Try to parse memory addressing mode 2 operand.
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-tryParseMemMode2Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a \"[\"");
+parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op,
+ int Low, int High) {
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier)) {
+ Error(Parser.getTok().getLoc(), Op + " operand expected.");
+ return MatchOperand_ParseFail;
+ }
+ StringRef ShiftName = Tok.getString();
+ std::string LowerOp = LowercaseString(Op);
+ std::string UpperOp = UppercaseString(Op);
+ if (ShiftName != LowerOp && ShiftName != UpperOp) {
+ Error(Parser.getTok().getLoc(), Op + " operand expected.");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat shift type token.
- if (ParseMemory(Operands, ARMII::AddrMode2))
- return MatchOperand_NoMatch;
+ // There must be a '#' and a shift amount.
+ if (Parser.getTok().isNot(AsmToken::Hash)) {
+ Error(Parser.getTok().getLoc(), "'#' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+
+ const MCExpr *ShiftAmount;
+ SMLoc Loc = Parser.getTok().getLoc();
+ if (getParser().ParseExpression(ShiftAmount)) {
+ Error(Loc, "illegal expression");
+ return MatchOperand_ParseFail;
+ }
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount);
+ if (!CE) {
+ Error(Loc, "constant expression expected");
+ return MatchOperand_ParseFail;
+ }
+ int Val = CE->getValue();
+ if (Val < Low || Val > High) {
+ Error(Loc, "immediate value out of range");
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(ARMOperand::CreateImm(CE, Loc, Parser.getTok().getLoc()));
return MatchOperand_Success;
}
-/// tryParseMemMode3Operand - Try to parse memory addressing mode 3 operand.
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-tryParseMemMode3Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a \"[\"");
+parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+ if (Tok.isNot(AsmToken::Identifier)) {
+ Error(Tok.getLoc(), "'be' or 'le' operand expected");
+ return MatchOperand_ParseFail;
+ }
+ int Val = StringSwitch<int>(Tok.getString())
+ .Case("be", 1)
+ .Case("le", 0)
+ .Default(-1);
+ Parser.Lex(); // Eat the token.
+
+ if (Val == -1) {
+ Error(Tok.getLoc(), "'be' or 'le' operand expected");
+ return MatchOperand_ParseFail;
+ }
+ Operands.push_back(ARMOperand::CreateImm(MCConstantExpr::Create(Val,
+ getContext()),
+ S, Parser.getTok().getLoc()));
+ return MatchOperand_Success;
+}
+
+/// parseShifterImm - Parse the shifter immediate operand for SSAT/USAT
+/// instructions. Legal values are:
+/// lsl #n 'n' in [0,31]
+/// asr #n 'n' in [1,32]
+/// n == 32 encoded as n == 0.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+ if (Tok.isNot(AsmToken::Identifier)) {
+ Error(S, "shift operator 'asr' or 'lsl' expected");
+ return MatchOperand_ParseFail;
+ }
+ StringRef ShiftName = Tok.getString();
+ bool isASR;
+ if (ShiftName == "lsl" || ShiftName == "LSL")
+ isASR = false;
+ else if (ShiftName == "asr" || ShiftName == "ASR")
+ isASR = true;
+ else {
+ Error(S, "shift operator 'asr' or 'lsl' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat the operator.
+
+ // A '#' and a shift amount.
+ if (Parser.getTok().isNot(AsmToken::Hash)) {
+ Error(Parser.getTok().getLoc(), "'#' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+
+ const MCExpr *ShiftAmount;
+ SMLoc E = Parser.getTok().getLoc();
+ if (getParser().ParseExpression(ShiftAmount)) {
+ Error(E, "malformed shift expression");
+ return MatchOperand_ParseFail;
+ }
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount);
+ if (!CE) {
+ Error(E, "shift amount must be an immediate");
+ return MatchOperand_ParseFail;
+ }
- if (ParseMemory(Operands, ARMII::AddrMode3))
+ int64_t Val = CE->getValue();
+ if (isASR) {
+ // Shift amount must be in [1,32]
+ if (Val < 1 || Val > 32) {
+ Error(E, "'asr' shift amount must be in range [1,32]");
+ return MatchOperand_ParseFail;
+ }
+ // asr #32 encoded as asr #0, but is not allowed in Thumb2 mode.
+ if (isThumb() && Val == 32) {
+ Error(E, "'asr #32' shift amount not allowed in Thumb mode");
+ return MatchOperand_ParseFail;
+ }
+ if (Val == 32) Val = 0;
+ } else {
+ // Shift amount must be in [1,32]
+ if (Val < 0 || Val > 31) {
+ Error(E, "'lsr' shift amount must be in range [0,31]");
+ return MatchOperand_ParseFail;
+ }
+ }
+
+ E = Parser.getTok().getLoc();
+ Operands.push_back(ARMOperand::CreateShifterImm(isASR, Val, S, E));
+
+ return MatchOperand_Success;
+}
+
+/// parseRotImm - Parse the shifter immediate operand for SXTB/UXTB family
+/// of instructions. Legal values are:
+/// ror #n 'n' in {0, 8, 16, 24}
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+ if (Tok.isNot(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
+ StringRef ShiftName = Tok.getString();
+ if (ShiftName != "ror" && ShiftName != "ROR")
return MatchOperand_NoMatch;
+ Parser.Lex(); // Eat the operator.
+
+ // A '#' and a rotate amount.
+ if (Parser.getTok().isNot(AsmToken::Hash)) {
+ Error(Parser.getTok().getLoc(), "'#' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+
+ const MCExpr *ShiftAmount;
+ SMLoc E = Parser.getTok().getLoc();
+ if (getParser().ParseExpression(ShiftAmount)) {
+ Error(E, "malformed rotate expression");
+ return MatchOperand_ParseFail;
+ }
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount);
+ if (!CE) {
+ Error(E, "rotate amount must be an immediate");
+ return MatchOperand_ParseFail;
+ }
+
+ int64_t Val = CE->getValue();
+ // Shift amount must be in {0, 8, 16, 24} (0 is undocumented extension)
+ // normally, zero is represented in asm by omitting the rotate operand
+ // entirely.
+ if (Val != 8 && Val != 16 && Val != 24 && Val != 0) {
+ Error(E, "'ror' rotate amount must be 8, 16, or 24");
+ return MatchOperand_ParseFail;
+ }
+
+ E = Parser.getTok().getLoc();
+ Operands.push_back(ARMOperand::CreateRotImm(Val, S, E));
+
+ return MatchOperand_Success;
+}
+
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ // The bitfield descriptor is really two operands, the LSB and the width.
+ if (Parser.getTok().isNot(AsmToken::Hash)) {
+ Error(Parser.getTok().getLoc(), "'#' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+
+ const MCExpr *LSBExpr;
+ SMLoc E = Parser.getTok().getLoc();
+ if (getParser().ParseExpression(LSBExpr)) {
+ Error(E, "malformed immediate expression");
+ return MatchOperand_ParseFail;
+ }
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(LSBExpr);
+ if (!CE) {
+ Error(E, "'lsb' operand must be an immediate");
+ return MatchOperand_ParseFail;
+ }
+
+ int64_t LSB = CE->getValue();
+ // The LSB must be in the range [0,31]
+ if (LSB < 0 || LSB > 31) {
+ Error(E, "'lsb' operand must be in the range [0,31]");
+ return MatchOperand_ParseFail;
+ }
+ E = Parser.getTok().getLoc();
+
+ // Expect another immediate operand.
+ if (Parser.getTok().isNot(AsmToken::Comma)) {
+ Error(Parser.getTok().getLoc(), "too few operands");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+ if (Parser.getTok().isNot(AsmToken::Hash)) {
+ Error(Parser.getTok().getLoc(), "'#' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat hash token.
+
+ const MCExpr *WidthExpr;
+ if (getParser().ParseExpression(WidthExpr)) {
+ Error(E, "malformed immediate expression");
+ return MatchOperand_ParseFail;
+ }
+ CE = dyn_cast<MCConstantExpr>(WidthExpr);
+ if (!CE) {
+ Error(E, "'width' operand must be an immediate");
+ return MatchOperand_ParseFail;
+ }
+
+ int64_t Width = CE->getValue();
+ // The LSB must be in the range [1,32-lsb]
+ if (Width < 1 || Width > 32 - LSB) {
+ Error(E, "'width' operand must be in the range [1,32-lsb]");
+ return MatchOperand_ParseFail;
+ }
+ E = Parser.getTok().getLoc();
+
+ Operands.push_back(ARMOperand::CreateBitfield(LSB, Width, S, E));
return MatchOperand_Success;
}
-/// CvtLdWriteBackRegAddrMode2 - Convert parsed operands to MCInst.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Check for a post-index addressing register operand. Specifically:
+ // postidx_reg := '+' register {, shift}
+ // | '-' register {, shift}
+ // | register {, shift}
+
+ // This method must return MatchOperand_NoMatch without consuming any tokens
+ // in the case where there is no match, as other alternatives take other
+ // parse methods.
+ AsmToken Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+ bool haveEaten = false;
+ bool isAdd = true;
+ int Reg = -1;
+ if (Tok.is(AsmToken::Plus)) {
+ Parser.Lex(); // Eat the '+' token.
+ haveEaten = true;
+ } else if (Tok.is(AsmToken::Minus)) {
+ Parser.Lex(); // Eat the '-' token.
+ isAdd = false;
+ haveEaten = true;
+ }
+ if (Parser.getTok().is(AsmToken::Identifier))
+ Reg = tryParseRegister();
+ if (Reg == -1) {
+ if (!haveEaten)
+ return MatchOperand_NoMatch;
+ Error(Parser.getTok().getLoc(), "register expected");
+ return MatchOperand_ParseFail;
+ }
+ SMLoc E = Parser.getTok().getLoc();
+
+ ARM_AM::ShiftOpc ShiftTy = ARM_AM::no_shift;
+ unsigned ShiftImm = 0;
+ if (Parser.getTok().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the ','.
+ if (parseMemRegOffsetShift(ShiftTy, ShiftImm))
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ShiftTy,
+ ShiftImm, S, E));
+
+ return MatchOperand_Success;
+}
+
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Check for a post-index addressing register operand. Specifically:
+ // am3offset := '+' register
+ // | '-' register
+ // | register
+ // | # imm
+ // | # + imm
+ // | # - imm
+
+ // This method must return MatchOperand_NoMatch without consuming any tokens
+ // in the case where there is no match, as other alternatives take other
+ // parse methods.
+ AsmToken Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+
+ // Do immediates first, as we always parse those if we have a '#'.
+ if (Parser.getTok().is(AsmToken::Hash)) {
+ Parser.Lex(); // Eat the '#'.
+ // Explicitly look for a '-', as we need to encode negative zero
+ // differently.
+ bool isNegative = Parser.getTok().is(AsmToken::Minus);
+ const MCExpr *Offset;
+ if (getParser().ParseExpression(Offset))
+ return MatchOperand_ParseFail;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Offset);
+ if (!CE) {
+ Error(S, "constant expression expected");
+ return MatchOperand_ParseFail;
+ }
+ SMLoc E = Tok.getLoc();
+ // Negative zero is encoded as the flag value INT32_MIN.
+ int32_t Val = CE->getValue();
+ if (isNegative && Val == 0)
+ Val = INT32_MIN;
+
+ Operands.push_back(
+ ARMOperand::CreateImm(MCConstantExpr::Create(Val, getContext()), S, E));
+
+ return MatchOperand_Success;
+ }
+
+
+ bool haveEaten = false;
+ bool isAdd = true;
+ int Reg = -1;
+ if (Tok.is(AsmToken::Plus)) {
+ Parser.Lex(); // Eat the '+' token.
+ haveEaten = true;
+ } else if (Tok.is(AsmToken::Minus)) {
+ Parser.Lex(); // Eat the '-' token.
+ isAdd = false;
+ haveEaten = true;
+ }
+ if (Parser.getTok().is(AsmToken::Identifier))
+ Reg = tryParseRegister();
+ if (Reg == -1) {
+ if (!haveEaten)
+ return MatchOperand_NoMatch;
+ Error(Parser.getTok().getLoc(), "register expected");
+ return MatchOperand_ParseFail;
+ }
+ SMLoc E = Parser.getTok().getLoc();
+
+ Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ARM_AM::no_shift,
+ 0, S, E));
+
+ return MatchOperand_Success;
+}
+
+/// cvtT2LdrdPre - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtT2LdrdPre(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Rt, Rt2
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateReg(0));
+ // addr
+ ((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtT2StrdPre - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtT2StrdPre(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateReg(0));
+ // Rt, Rt2
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
+ // addr
+ ((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtLdWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
bool ARMAsmParser::
-CvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
- ((ARMOperand*)Operands[3])->addMemMode2Operands(Inst, 3);
+ ((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2);
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
return true;
}
-/// CvtStWriteBackRegAddrMode2 - Convert parsed operands to MCInst.
+/// cvtStWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
bool ARMAsmParser::
-CvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
- ((ARMOperand*)Operands[3])->addMemMode2Operands(Inst, 3);
+ ((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2);
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
return true;
}
-/// CvtLdWriteBackRegAddrMode3 - Convert parsed operands to MCInst.
+/// cvtLdWriteBackRegAddrMode2 - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
bool ARMAsmParser::
-CvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+cvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
- ((ARMOperand*)Operands[3])->addMemMode3Operands(Inst, 3);
+ ((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtLdWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+
+ ((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+
+/// cvtStWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtStWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtStWriteBackRegAddrMode2 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3);
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
return true;
}
-/// CvtStWriteBackRegAddrMode3 - Convert parsed operands to MCInst.
+/// cvtStWriteBackRegAddrMode3 - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
bool ARMAsmParser::
-CvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+cvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
- ((ARMOperand*)Operands[3])->addMemMode3Operands(Inst, 3);
+ ((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtLdExtTWriteBackImm - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtLdExtTWriteBackImm(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Rt
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // addr
+ ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1);
+ // offset
+ ((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtLdExtTWriteBackReg - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtLdExtTWriteBackReg(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Rt
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // addr
+ ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1);
+ // offset
+ ((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtStExtTWriteBackImm - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtStExtTWriteBackImm(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // Rt
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ // addr
+ ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1);
+ // offset
+ ((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtStExtTWriteBackReg - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtStExtTWriteBackReg(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // Rt
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ // addr
+ ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1);
+ // offset
+ ((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtLdrdPre - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtLdrdPre(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Rt, Rt2
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // addr
+ ((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtStrdPre - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtStrdPre(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // Rt, Rt2
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
+ // addr
+ ((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3);
+ // pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
return true;
}
+/// cvtLdWriteBackRegAddrMode3 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ ((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// cvtThumbMultiple- Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+cvtThumbMultiply(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // The second source operand must be the same register as the destination
+ // operand.
+ if (Operands.size() == 6 &&
+ (((ARMOperand*)Operands[3])->getReg() !=
+ ((ARMOperand*)Operands[5])->getReg()) &&
+ (((ARMOperand*)Operands[3])->getReg() !=
+ ((ARMOperand*)Operands[4])->getReg())) {
+ Error(Operands[3]->getStartLoc(),
+ "destination register must match source register");
+ return false;
+ }
+ ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[1])->addCCOutOperands(Inst, 1);
+ ((ARMOperand*)Operands[4])->addRegOperands(Inst, 1);
+ // If we have a three-operand form, use that, else the second source operand
+ // is just the destination operand again.
+ if (Operands.size() == 6)
+ ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1);
+ else
+ Inst.addOperand(Inst.getOperand(0));
+ ((ARMOperand*)Operands[2])->addCondCodeOperands(Inst, 2);
+
+ return true;
+}
+
/// Parse an ARM memory expression, return false if successful else return true
/// or an error. The first token must be a '[' when called.
-///
-/// TODO Only preindexing and postindexing addressing are started, unindexed
-/// with option, etc are still to do.
bool ARMAsmParser::
-ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- ARMII::AddrMode AddrMode = ARMII::AddrModeNone) {
+parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S, E;
assert(Parser.getTok().is(AsmToken::LBrac) &&
"Token is not a Left Bracket");
@@ -1552,185 +3105,178 @@ ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
Parser.Lex(); // Eat left bracket token.
const AsmToken &BaseRegTok = Parser.getTok();
- if (BaseRegTok.isNot(AsmToken::Identifier)) {
- Error(BaseRegTok.getLoc(), "register expected");
- return true;
- }
- int BaseRegNum = TryParseRegister();
- if (BaseRegNum == -1) {
- Error(BaseRegTok.getLoc(), "register expected");
- return true;
- }
+ int BaseRegNum = tryParseRegister();
+ if (BaseRegNum == -1)
+ return Error(BaseRegTok.getLoc(), "register expected");
// The next token must either be a comma or a closing bracket.
const AsmToken &Tok = Parser.getTok();
if (!Tok.is(AsmToken::Comma) && !Tok.is(AsmToken::RBrac))
- return true;
+ return Error(Tok.getLoc(), "malformed memory operand");
- bool Preindexed = false;
- bool Postindexed = false;
- bool OffsetIsReg = false;
- bool Negative = false;
- bool Writeback = false;
- ARMOperand *WBOp = 0;
- int OffsetRegNum = -1;
- bool OffsetRegShifted = false;
- enum ARM_AM::ShiftOpc ShiftType = ARM_AM::lsl;
- const MCExpr *ShiftAmount = 0;
- const MCExpr *Offset = 0;
-
- // First look for preindexed address forms, that is after the "[Rn" we now
- // have to see if the next token is a comma.
- if (Tok.is(AsmToken::Comma)) {
- Preindexed = true;
- Parser.Lex(); // Eat comma token.
-
- if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount,
- Offset, OffsetIsReg, OffsetRegNum, E))
- return true;
- const AsmToken &RBracTok = Parser.getTok();
- if (RBracTok.isNot(AsmToken::RBrac)) {
- Error(RBracTok.getLoc(), "']' expected");
- return true;
- }
- E = RBracTok.getLoc();
+ if (Tok.is(AsmToken::RBrac)) {
+ E = Tok.getLoc();
Parser.Lex(); // Eat right bracket token.
- const AsmToken &ExclaimTok = Parser.getTok();
- if (ExclaimTok.is(AsmToken::Exclaim)) {
- // None of addrmode3 instruction uses "!"
- if (AddrMode == ARMII::AddrMode3)
- return true;
+ Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, 0, ARM_AM::no_shift,
+ 0, 0, false, S, E));
- WBOp = ARMOperand::CreateToken(ExclaimTok.getString(),
- ExclaimTok.getLoc());
- Writeback = true;
- Parser.Lex(); // Eat exclaim token
- } else { // In addressing mode 2, pre-indexed mode always end with "!"
- if (AddrMode == ARMII::AddrMode2)
- Preindexed = false;
+ // If there's a pre-indexing writeback marker, '!', just add it as a token
+ // operand. It's rather odd, but syntactically valid.
+ if (Parser.getTok().is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken("!",Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat the '!'.
}
- } else {
- // The "[Rn" we have so far was not followed by a comma.
- // If there's anything other than the right brace, this is a post indexing
- // addressing form.
- E = Tok.getLoc();
- Parser.Lex(); // Eat right bracket token.
+ return false;
+ }
- const AsmToken &NextTok = Parser.getTok();
+ assert(Tok.is(AsmToken::Comma) && "Lost comma in memory operand?!");
+ Parser.Lex(); // Eat the comma.
- if (NextTok.isNot(AsmToken::EndOfStatement)) {
- Postindexed = true;
- Writeback = true;
+ // If we have a ':', it's an alignment specifier.
+ if (Parser.getTok().is(AsmToken::Colon)) {
+ Parser.Lex(); // Eat the ':'.
+ E = Parser.getTok().getLoc();
- if (NextTok.isNot(AsmToken::Comma)) {
- Error(NextTok.getLoc(), "',' expected");
- return true;
- }
-
- Parser.Lex(); // Eat comma token.
+ const MCExpr *Expr;
+ if (getParser().ParseExpression(Expr))
+ return true;
- if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType,
- ShiftAmount, Offset, OffsetIsReg, OffsetRegNum,
- E))
- return true;
+ // The expression has to be a constant. Memory references with relocations
+ // don't come through here, as they use the <label> forms of the relevant
+ // instructions.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr);
+ if (!CE)
+ return Error (E, "constant expression expected");
+
+ unsigned Align = 0;
+ switch (CE->getValue()) {
+ default:
+ return Error(E, "alignment specifier must be 64, 128, or 256 bits");
+ case 64: Align = 8; break;
+ case 128: Align = 16; break;
+ case 256: Align = 32; break;
}
- }
- // Force Offset to exist if used.
- if (!OffsetIsReg) {
- if (!Offset)
- Offset = MCConstantExpr::Create(0, getContext());
- } else {
- if (AddrMode == ARMII::AddrMode3 && OffsetRegShifted) {
- Error(E, "shift amount not supported");
- return true;
+ // Now we should have the closing ']'
+ E = Parser.getTok().getLoc();
+ if (Parser.getTok().isNot(AsmToken::RBrac))
+ return Error(E, "']' expected");
+ Parser.Lex(); // Eat right bracket token.
+
+ // Don't worry about range checking the value here. That's handled by
+ // the is*() predicates.
+ Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, 0,
+ ARM_AM::no_shift, 0, Align,
+ false, S, E));
+
+ // If there's a pre-indexing writeback marker, '!', just add it as a token
+ // operand.
+ if (Parser.getTok().is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken("!",Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat the '!'.
}
+
+ return false;
}
- Operands.push_back(ARMOperand::CreateMem(AddrMode, BaseRegNum, OffsetIsReg,
- Offset, OffsetRegNum, OffsetRegShifted,
- ShiftType, ShiftAmount, Preindexed,
- Postindexed, Negative, Writeback, S, E));
- if (WBOp)
- Operands.push_back(WBOp);
+ // If we have a '#', it's an immediate offset, else assume it's a register
+ // offset.
+ if (Parser.getTok().is(AsmToken::Hash)) {
+ Parser.Lex(); // Eat the '#'.
+ E = Parser.getTok().getLoc();
- return false;
-}
+ bool isNegative = getParser().getTok().is(AsmToken::Minus);
+ const MCExpr *Offset;
+ if (getParser().ParseExpression(Offset))
+ return true;
+
+ // The expression has to be a constant. Memory references with relocations
+ // don't come through here, as they use the <label> forms of the relevant
+ // instructions.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Offset);
+ if (!CE)
+ return Error (E, "constant expression expected");
+
+ // If the constant was #-0, represent it as INT32_MIN.
+ int32_t Val = CE->getValue();
+ if (isNegative && Val == 0)
+ CE = MCConstantExpr::Create(INT32_MIN, getContext());
+
+ // Now we should have the closing ']'
+ E = Parser.getTok().getLoc();
+ if (Parser.getTok().isNot(AsmToken::RBrac))
+ return Error(E, "']' expected");
+ Parser.Lex(); // Eat right bracket token.
-/// Parse the offset of a memory operand after we have seen "[Rn," or "[Rn],"
-/// we will parse the following (were +/- means that a plus or minus is
-/// optional):
-/// +/-Rm
-/// +/-Rm, shift
-/// #offset
-/// we return false on success or an error otherwise.
-bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
- bool &OffsetRegShifted,
- enum ARM_AM::ShiftOpc &ShiftType,
- const MCExpr *&ShiftAmount,
- const MCExpr *&Offset,
- bool &OffsetIsReg,
- int &OffsetRegNum,
- SMLoc &E) {
- Negative = false;
- OffsetRegShifted = false;
- OffsetIsReg = false;
- OffsetRegNum = -1;
- const AsmToken &NextTok = Parser.getTok();
- E = NextTok.getLoc();
- if (NextTok.is(AsmToken::Plus))
- Parser.Lex(); // Eat plus token.
- else if (NextTok.is(AsmToken::Minus)) {
- Negative = true;
- Parser.Lex(); // Eat minus token
- }
- // See if there is a register following the "[Rn," or "[Rn]," we have so far.
- const AsmToken &OffsetRegTok = Parser.getTok();
- if (OffsetRegTok.is(AsmToken::Identifier)) {
- SMLoc CurLoc = OffsetRegTok.getLoc();
- OffsetRegNum = TryParseRegister();
- if (OffsetRegNum != -1) {
- OffsetIsReg = true;
- E = CurLoc;
+ // Don't worry about range checking the value here. That's handled by
+ // the is*() predicates.
+ Operands.push_back(ARMOperand::CreateMem(BaseRegNum, CE, 0,
+ ARM_AM::no_shift, 0, 0,
+ false, S, E));
+
+ // If there's a pre-indexing writeback marker, '!', just add it as a token
+ // operand.
+ if (Parser.getTok().is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken("!",Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat the '!'.
}
- }
- // If we parsed a register as the offset then there can be a shift after that.
- if (OffsetRegNum != -1) {
- // Look for a comma then a shift
- const AsmToken &Tok = Parser.getTok();
- if (Tok.is(AsmToken::Comma)) {
- Parser.Lex(); // Eat comma token.
+ return false;
+ }
- const AsmToken &Tok = Parser.getTok();
- if (ParseShift(ShiftType, ShiftAmount, E))
- return Error(Tok.getLoc(), "shift expected");
- OffsetRegShifted = true;
- }
+ // The register offset is optionally preceded by a '+' or '-'
+ bool isNegative = false;
+ if (Parser.getTok().is(AsmToken::Minus)) {
+ isNegative = true;
+ Parser.Lex(); // Eat the '-'.
+ } else if (Parser.getTok().is(AsmToken::Plus)) {
+ // Nothing to do.
+ Parser.Lex(); // Eat the '+'.
}
- else { // the "[Rn," or "[Rn,]" we have so far was not followed by "Rm"
- // Look for #offset following the "[Rn," or "[Rn],"
- const AsmToken &HashTok = Parser.getTok();
- if (HashTok.isNot(AsmToken::Hash))
- return Error(HashTok.getLoc(), "'#' expected");
- Parser.Lex(); // Eat hash token.
+ E = Parser.getTok().getLoc();
+ int OffsetRegNum = tryParseRegister();
+ if (OffsetRegNum == -1)
+ return Error(E, "register expected");
+
+ // If there's a shift operator, handle it.
+ ARM_AM::ShiftOpc ShiftType = ARM_AM::no_shift;
+ unsigned ShiftImm = 0;
+ if (Parser.getTok().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the ','.
+ if (parseMemRegOffsetShift(ShiftType, ShiftImm))
+ return true;
+ }
- if (getParser().ParseExpression(Offset))
- return true;
- E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ // Now we should have the closing ']'
+ E = Parser.getTok().getLoc();
+ if (Parser.getTok().isNot(AsmToken::RBrac))
+ return Error(E, "']' expected");
+ Parser.Lex(); // Eat right bracket token.
+
+ Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, OffsetRegNum,
+ ShiftType, ShiftImm, 0, isNegative,
+ S, E));
+
+ // If there's a pre-indexing writeback marker, '!', just add it as a token
+ // operand.
+ if (Parser.getTok().is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken("!",Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat the '!'.
}
+
return false;
}
-/// ParseShift as one of these two:
+/// parseMemRegOffsetShift - one of these two:
/// ( lsl | lsr | asr | ror ) , # shift_amount
/// rrx
-/// and returns true if it parses a shift otherwise it returns false.
-bool ARMAsmParser::ParseShift(ARM_AM::ShiftOpc &St,
- const MCExpr *&ShiftAmount, SMLoc &E) {
+/// return true if it parses a shift otherwise it returns false.
+bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
+ unsigned &Amount) {
+ SMLoc Loc = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
return true;
@@ -1746,28 +3292,86 @@ bool ARMAsmParser::ParseShift(ARM_AM::ShiftOpc &St,
else if (ShiftName == "rrx" || ShiftName == "RRX")
St = ARM_AM::rrx;
else
- return true;
+ return Error(Loc, "illegal shift operator");
Parser.Lex(); // Eat shift type token.
- // Rrx stands alone.
- if (St == ARM_AM::rrx)
- return false;
-
- // Otherwise, there must be a '#' and a shift amount.
- const AsmToken &HashTok = Parser.getTok();
- if (HashTok.isNot(AsmToken::Hash))
- return Error(HashTok.getLoc(), "'#' expected");
- Parser.Lex(); // Eat hash token.
+ // rrx stands alone.
+ Amount = 0;
+ if (St != ARM_AM::rrx) {
+ Loc = Parser.getTok().getLoc();
+ // A '#' and a shift amount.
+ const AsmToken &HashTok = Parser.getTok();
+ if (HashTok.isNot(AsmToken::Hash))
+ return Error(HashTok.getLoc(), "'#' expected");
+ Parser.Lex(); // Eat hash token.
- if (getParser().ParseExpression(ShiftAmount))
- return true;
+ const MCExpr *Expr;
+ if (getParser().ParseExpression(Expr))
+ return true;
+ // Range check the immediate.
+ // lsl, ror: 0 <= imm <= 31
+ // lsr, asr: 0 <= imm <= 32
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr);
+ if (!CE)
+ return Error(Loc, "shift amount must be an immediate");
+ int64_t Imm = CE->getValue();
+ if (Imm < 0 ||
+ ((St == ARM_AM::lsl || St == ARM_AM::ror) && Imm > 31) ||
+ ((St == ARM_AM::lsr || St == ARM_AM::asr) && Imm > 32))
+ return Error(Loc, "immediate shift value out of range");
+ Amount = Imm;
+ }
return false;
}
+/// parseFPImm - A floating point immediate expression operand.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+
+ if (Parser.getTok().isNot(AsmToken::Hash))
+ return MatchOperand_NoMatch;
+ Parser.Lex(); // Eat the '#'.
+
+ // Handle negation, as that still comes through as a separate token.
+ bool isNegative = false;
+ if (Parser.getTok().is(AsmToken::Minus)) {
+ isNegative = true;
+ Parser.Lex();
+ }
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.is(AsmToken::Real)) {
+ APFloat RealVal(APFloat::IEEEdouble, Tok.getString());
+ uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
+ // If we had a '-' in front, toggle the sign bit.
+ IntVal ^= (uint64_t)isNegative << 63;
+ int Val = ARM_AM::getFP64Imm(APInt(64, IntVal));
+ Parser.Lex(); // Eat the token.
+ if (Val == -1) {
+ TokError("floating point value out of range");
+ return MatchOperand_ParseFail;
+ }
+ Operands.push_back(ARMOperand::CreateFPImm(Val, S, getContext()));
+ return MatchOperand_Success;
+ }
+ if (Tok.is(AsmToken::Integer)) {
+ int64_t Val = Tok.getIntVal();
+ Parser.Lex(); // Eat the token.
+ if (Val > 255 || Val < 0) {
+ TokError("encoded floating point value out of range");
+ return MatchOperand_ParseFail;
+ }
+ Operands.push_back(ARMOperand::CreateFPImm(Val, S, getContext()));
+ return MatchOperand_Success;
+ }
+
+ TokError("invalid floating point immediate");
+ return MatchOperand_ParseFail;
+}
/// Parse a arm instruction operand. For now this parses the operand regardless
/// of the mnemonic.
-bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
StringRef Mnemonic) {
SMLoc S, E;
@@ -1787,13 +3391,20 @@ bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
Error(Parser.getTok().getLoc(), "unexpected token in operand");
return true;
case AsmToken::Identifier: {
- if (!TryParseRegisterWithWriteBack(Operands))
+ // If this is VMRS, check for the apsr_nzcv operand.
+ if (!tryParseRegisterWithWriteBack(Operands))
return false;
- int Res = TryParseShiftRegister(Operands);
+ int Res = tryParseShiftRegister(Operands);
if (Res == 0) // success
return false;
else if (Res == -1) // irrecoverable error
return true;
+ if (Mnemonic == "vmrs" && Parser.getTok().getString() == "apsr_nzcv") {
+ S = Parser.getTok().getLoc();
+ Parser.Lex();
+ Operands.push_back(ARMOperand::CreateToken("apsr_nzcv", S));
+ return false;
+ }
// Fall though for the Identifier case that is not a register or a
// special name.
@@ -1811,26 +3422,36 @@ bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
return false;
}
case AsmToken::LBrac:
- return ParseMemory(Operands);
+ return parseMemory(Operands);
case AsmToken::LCurly:
- return ParseRegisterList(Operands);
- case AsmToken::Hash:
+ return parseRegisterList(Operands);
+ case AsmToken::Hash: {
// #42 -> immediate.
// TODO: ":lower16:" and ":upper16:" modifiers after # before immediate
S = Parser.getTok().getLoc();
Parser.Lex();
+ bool isNegative = Parser.getTok().is(AsmToken::Minus);
const MCExpr *ImmVal;
if (getParser().ParseExpression(ImmVal))
return true;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal);
+ if (!CE) {
+ Error(S, "constant expression expected");
+ return MatchOperand_ParseFail;
+ }
+ int32_t Val = CE->getValue();
+ if (isNegative && Val == 0)
+ ImmVal = MCConstantExpr::Create(INT32_MIN, getContext());
E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E));
return false;
+ }
case AsmToken::Colon: {
// ":lower16:" and ":upper16:" expression prefixes
// FIXME: Check it's an expression prefix,
// e.g. (FOO - :lower16:BAR) isn't legal.
ARMMCExpr::VariantKind RefKind;
- if (ParsePrefix(RefKind))
+ if (parsePrefix(RefKind))
return true;
const MCExpr *SubExprVal;
@@ -1846,9 +3467,9 @@ bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
}
}
-// ParsePrefix - Parse ARM 16-bit relocations expression prefix, i.e.
+// parsePrefix - Parse ARM 16-bit relocations expression prefix, i.e.
// :lower16: and :upper16:.
-bool ARMAsmParser::ParsePrefix(ARMMCExpr::VariantKind &RefKind) {
+bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
RefKind = ARMMCExpr::VK_ARM_None;
// :lower16: and :upper16: modifiers
@@ -1879,55 +3500,16 @@ bool ARMAsmParser::ParsePrefix(ARMMCExpr::VariantKind &RefKind) {
return false;
}
-const MCExpr *
-ARMAsmParser::ApplyPrefixToExpr(const MCExpr *E,
- MCSymbolRefExpr::VariantKind Variant) {
- // Recurse over the given expression, rebuilding it to apply the given variant
- // to the leftmost symbol.
- if (Variant == MCSymbolRefExpr::VK_None)
- return E;
-
- switch (E->getKind()) {
- case MCExpr::Target:
- llvm_unreachable("Can't handle target expr yet");
- case MCExpr::Constant:
- llvm_unreachable("Can't handle lower16/upper16 of constant yet");
-
- case MCExpr::SymbolRef: {
- const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
-
- if (SRE->getKind() != MCSymbolRefExpr::VK_None)
- return 0;
-
- return MCSymbolRefExpr::Create(&SRE->getSymbol(), Variant, getContext());
- }
-
- case MCExpr::Unary:
- llvm_unreachable("Can't handle unary expressions yet");
-
- case MCExpr::Binary: {
- const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
- const MCExpr *LHS = ApplyPrefixToExpr(BE->getLHS(), Variant);
- const MCExpr *RHS = BE->getRHS();
- if (!LHS)
- return 0;
-
- return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, getContext());
- }
- }
-
- assert(0 && "Invalid expression kind!");
- return 0;
-}
-
/// \brief Given a mnemonic, split out possible predication code and carry
/// setting letters to form a canonical mnemonic and flags.
//
// FIXME: Would be nice to autogen this.
-static StringRef SplitMnemonic(StringRef Mnemonic,
- unsigned &PredicationCode,
- bool &CarrySetting,
- unsigned &ProcessorIMod) {
+// FIXME: This is a bit of a maze of special cases.
+StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
+ unsigned &PredicationCode,
+ bool &CarrySetting,
+ unsigned &ProcessorIMod,
+ StringRef &ITMask) {
PredicationCode = ARMCC::AL;
CarrySetting = false;
ProcessorIMod = 0;
@@ -1935,23 +3517,22 @@ static StringRef SplitMnemonic(StringRef Mnemonic,
// Ignore some mnemonics we know aren't predicated forms.
//
// FIXME: Would be nice to autogen this.
- if (Mnemonic == "teq" || Mnemonic == "vceq" ||
- Mnemonic == "movs" ||
- Mnemonic == "svc" ||
- (Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" ||
- Mnemonic == "vmls" || Mnemonic == "vnmls") ||
- Mnemonic == "vacge" || Mnemonic == "vcge" ||
- Mnemonic == "vclt" ||
- Mnemonic == "vacgt" || Mnemonic == "vcgt" ||
- Mnemonic == "vcle" ||
- (Mnemonic == "smlal" || Mnemonic == "umaal" || Mnemonic == "umlal" ||
- Mnemonic == "vabal" || Mnemonic == "vmlal" || Mnemonic == "vpadal" ||
- Mnemonic == "vqdmlal" || Mnemonic == "bics"))
+ if ((Mnemonic == "movs" && isThumb()) ||
+ Mnemonic == "teq" || Mnemonic == "vceq" || Mnemonic == "svc" ||
+ Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" ||
+ Mnemonic == "vmls" || Mnemonic == "vnmls" || Mnemonic == "vacge" ||
+ Mnemonic == "vcge" || Mnemonic == "vclt" || Mnemonic == "vacgt" ||
+ Mnemonic == "vcgt" || Mnemonic == "vcle" || Mnemonic == "smlal" ||
+ Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" ||
+ Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal")
return Mnemonic;
// First, split out any predication code. Ignore mnemonics we know aren't
// predicated but do have a carry-set and so weren't caught above.
- if (Mnemonic != "adcs") {
+ if (Mnemonic != "adcs" && Mnemonic != "bics" && Mnemonic != "movs" &&
+ Mnemonic != "muls" && Mnemonic != "smlals" && Mnemonic != "smulls" &&
+ Mnemonic != "umlals" && Mnemonic != "umulls" && Mnemonic != "lsls" &&
+ Mnemonic != "sbcs" && Mnemonic != "rscs") {
unsigned CC = StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2))
.Case("eq", ARMCC::EQ)
.Case("ne", ARMCC::NE)
@@ -1980,11 +3561,12 @@ static StringRef SplitMnemonic(StringRef Mnemonic,
// Next, determine if we have a carry setting bit. We explicitly ignore all
// the instructions we know end in 's'.
if (Mnemonic.endswith("s") &&
- !(Mnemonic == "asrs" || Mnemonic == "cps" || Mnemonic == "mls" ||
- Mnemonic == "movs" || Mnemonic == "mrs" || Mnemonic == "smmls" ||
- Mnemonic == "vabs" || Mnemonic == "vcls" || Mnemonic == "vmls" ||
- Mnemonic == "vmrs" || Mnemonic == "vnmls" || Mnemonic == "vqabs" ||
- Mnemonic == "vrecps" || Mnemonic == "vrsqrts")) {
+ !(Mnemonic == "cps" || Mnemonic == "mls" ||
+ Mnemonic == "mrs" || Mnemonic == "smmls" || Mnemonic == "vabs" ||
+ Mnemonic == "vcls" || Mnemonic == "vmls" || Mnemonic == "vmrs" ||
+ Mnemonic == "vnmls" || Mnemonic == "vqabs" || Mnemonic == "vrecps" ||
+ Mnemonic == "vrsqrts" || Mnemonic == "srs" ||
+ (Mnemonic == "movs" && isThumb()))) {
Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1);
CarrySetting = true;
}
@@ -2004,6 +3586,12 @@ static StringRef SplitMnemonic(StringRef Mnemonic,
}
}
+ // The "it" instruction has the condition mask on the end of the mnemonic.
+ if (Mnemonic.startswith("it")) {
+ ITMask = Mnemonic.slice(2, Mnemonic.size());
+ Mnemonic = Mnemonic.slice(0, 2);
+ }
+
return Mnemonic;
}
@@ -2012,37 +3600,154 @@ static StringRef SplitMnemonic(StringRef Mnemonic,
//
// FIXME: It would be nice to autogen this.
void ARMAsmParser::
-GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
+getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
bool &CanAcceptPredicationCode) {
if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" ||
- Mnemonic == "smull" || Mnemonic == "add" || Mnemonic == "adc" ||
+ Mnemonic == "add" || Mnemonic == "adc" ||
Mnemonic == "mul" || Mnemonic == "bic" || Mnemonic == "asr" ||
- Mnemonic == "umlal" || Mnemonic == "orr" || Mnemonic == "mvn" ||
+ Mnemonic == "orr" || Mnemonic == "mvn" ||
Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" ||
- Mnemonic == "sbc" || Mnemonic == "mla" || Mnemonic == "umull" ||
- Mnemonic == "eor" || Mnemonic == "smlal" ||
- (Mnemonic == "mov" && !isThumbOne())) {
+ Mnemonic == "sbc" || Mnemonic == "eor" || Mnemonic == "neg" ||
+ (!isThumb() && (Mnemonic == "smull" || Mnemonic == "mov" ||
+ Mnemonic == "mla" || Mnemonic == "smlal" ||
+ Mnemonic == "umlal" || Mnemonic == "umull"))) {
CanAcceptCarrySet = true;
- } else {
+ } else
CanAcceptCarrySet = false;
- }
if (Mnemonic == "cbnz" || Mnemonic == "setend" || Mnemonic == "dmb" ||
Mnemonic == "cps" || Mnemonic == "mcr2" || Mnemonic == "it" ||
Mnemonic == "mcrr2" || Mnemonic == "cbz" || Mnemonic == "cdp2" ||
Mnemonic == "trap" || Mnemonic == "mrc2" || Mnemonic == "mrrc2" ||
- Mnemonic == "dsb" || Mnemonic == "movs" || Mnemonic == "isb" ||
- Mnemonic == "clrex" || Mnemonic.startswith("cps")) {
+ Mnemonic == "dsb" || Mnemonic == "isb" || Mnemonic == "setend" ||
+ (Mnemonic == "clrex" && !isThumb()) ||
+ (Mnemonic == "nop" && isThumbOne()) ||
+ ((Mnemonic == "pld" || Mnemonic == "pli" || Mnemonic == "pldw" ||
+ Mnemonic == "ldc2" || Mnemonic == "ldc2l" ||
+ Mnemonic == "stc2" || Mnemonic == "stc2l") && !isThumb()) ||
+ ((Mnemonic.startswith("rfe") || Mnemonic.startswith("srs")) &&
+ !isThumb()) ||
+ Mnemonic.startswith("cps") || (Mnemonic == "movs" && isThumbOne())) {
CanAcceptPredicationCode = false;
- } else {
+ } else
CanAcceptPredicationCode = true;
- }
- if (isThumb())
+ if (isThumb()) {
if (Mnemonic == "bkpt" || Mnemonic == "mcr" || Mnemonic == "mcrr" ||
Mnemonic == "mrc" || Mnemonic == "mrrc" || Mnemonic == "cdp")
CanAcceptPredicationCode = false;
+ }
+}
+
+bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // FIXME: This is all horribly hacky. We really need a better way to deal
+ // with optional operands like this in the matcher table.
+
+ // The 'mov' mnemonic is special. One variant has a cc_out operand, while
+ // another does not. Specifically, the MOVW instruction does not. So we
+ // special case it here and remove the defaulted (non-setting) cc_out
+ // operand if that's the instruction we're trying to match.
+ //
+ // We do this as post-processing of the explicit operands rather than just
+ // conditionally adding the cc_out in the first place because we need
+ // to check the type of the parsed immediate operand.
+ if (Mnemonic == "mov" && Operands.size() > 4 && !isThumb() &&
+ !static_cast<ARMOperand*>(Operands[4])->isARMSOImm() &&
+ static_cast<ARMOperand*>(Operands[4])->isImm0_65535Expr() &&
+ static_cast<ARMOperand*>(Operands[1])->getReg() == 0)
+ return true;
+
+ // Register-register 'add' for thumb does not have a cc_out operand
+ // when there are only two register operands.
+ if (isThumb() && Mnemonic == "add" && Operands.size() == 5 &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->isReg() &&
+ static_cast<ARMOperand*>(Operands[1])->getReg() == 0)
+ return true;
+ // Register-register 'add' for thumb does not have a cc_out operand
+ // when it's an ADD Rdm, SP, {Rdm|#imm0_255} instruction. We do
+ // have to check the immediate range here since Thumb2 has a variant
+ // that can handle a different range and has a cc_out operand.
+ if (((isThumb() && Mnemonic == "add") ||
+ (isThumbTwo() && Mnemonic == "sub")) &&
+ Operands.size() == 6 &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->getReg() == ARM::SP &&
+ static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
+ (static_cast<ARMOperand*>(Operands[5])->isReg() ||
+ static_cast<ARMOperand*>(Operands[5])->isImm0_1020s4()))
+ return true;
+ // For Thumb2, add/sub immediate does not have a cc_out operand for the
+ // imm0_4095 variant. That's the least-preferred variant when
+ // selecting via the generic "add" mnemonic, so to know that we
+ // should remove the cc_out operand, we have to explicitly check that
+ // it's not one of the other variants. Ugh.
+ if (isThumbTwo() && (Mnemonic == "add" || Mnemonic == "sub") &&
+ Operands.size() == 6 &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->isReg() &&
+ static_cast<ARMOperand*>(Operands[5])->isImm()) {
+ // Nest conditions rather than one big 'if' statement for readability.
+ //
+ // If either register is a high reg, it's either one of the SP
+ // variants (handled above) or a 32-bit encoding, so we just
+ // check against T3.
+ if ((!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) ||
+ !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg())) &&
+ static_cast<ARMOperand*>(Operands[5])->isT2SOImm())
+ return false;
+ // If both registers are low, we're in an IT block, and the immediate is
+ // in range, we should use encoding T1 instead, which has a cc_out.
+ if (inITBlock() &&
+ isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) &&
+ isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) &&
+ static_cast<ARMOperand*>(Operands[5])->isImm0_7())
+ return false;
+
+ // Otherwise, we use encoding T4, which does not have a cc_out
+ // operand.
+ return true;
+ }
+
+ // The thumb2 multiply instruction doesn't have a CCOut register, so
+ // if we have a "mul" mnemonic in Thumb mode, check if we'll be able to
+ // use the 16-bit encoding or not.
+ if (isThumbTwo() && Mnemonic == "mul" && Operands.size() == 6 &&
+ static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->isReg() &&
+ static_cast<ARMOperand*>(Operands[5])->isReg() &&
+ // If the registers aren't low regs, the destination reg isn't the
+ // same as one of the source regs, or the cc_out operand is zero
+ // outside of an IT block, we have to use the 32-bit encoding, so
+ // remove the cc_out operand.
+ (!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) ||
+ !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) ||
+ !inITBlock() ||
+ (static_cast<ARMOperand*>(Operands[3])->getReg() !=
+ static_cast<ARMOperand*>(Operands[5])->getReg() &&
+ static_cast<ARMOperand*>(Operands[3])->getReg() !=
+ static_cast<ARMOperand*>(Operands[4])->getReg())))
+ return true;
+
+
+
+ // Register-register 'add/sub' for thumb does not have a cc_out operand
+ // when it's an ADD/SUB SP, #imm. Be lenient on count since there's also
+ // the "add/sub SP, SP, #imm" version. If the follow-up operands aren't
+ // right, this will result in better diagnostics (which operand is off)
+ // anyway.
+ if (isThumb() && (Mnemonic == "add" || Mnemonic == "sub") &&
+ (Operands.size() == 5 || Operands.size() == 6) &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[3])->getReg() == ARM::SP &&
+ static_cast<ARMOperand*>(Operands[1])->getReg() == 0)
+ return true;
+
+ return false;
}
/// Parse an arm instruction mnemonic followed by its operands.
@@ -2050,16 +3755,51 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Create the leading tokens for the mnemonic, split by '.' characters.
size_t Start = 0, Next = Name.find('.');
- StringRef Head = Name.slice(Start, Next);
+ StringRef Mnemonic = Name.slice(Start, Next);
// Split out the predication code and carry setting flag from the mnemonic.
unsigned PredicationCode;
unsigned ProcessorIMod;
bool CarrySetting;
- Head = SplitMnemonic(Head, PredicationCode, CarrySetting,
- ProcessorIMod);
+ StringRef ITMask;
+ Mnemonic = splitMnemonic(Mnemonic, PredicationCode, CarrySetting,
+ ProcessorIMod, ITMask);
+
+ // In Thumb1, only the branch (B) instruction can be predicated.
+ if (isThumbOne() && PredicationCode != ARMCC::AL && Mnemonic != "b") {
+ Parser.EatToEndOfStatement();
+ return Error(NameLoc, "conditional execution not supported in Thumb1");
+ }
+
+ Operands.push_back(ARMOperand::CreateToken(Mnemonic, NameLoc));
- Operands.push_back(ARMOperand::CreateToken(Head, NameLoc));
+ // Handle the IT instruction ITMask. Convert it to a bitmask. This
+ // is the mask as it will be for the IT encoding if the conditional
+ // encoding has a '1' as it's bit0 (i.e. 't' ==> '1'). In the case
+ // where the conditional bit0 is zero, the instruction post-processing
+ // will adjust the mask accordingly.
+ if (Mnemonic == "it") {
+ SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + 2);
+ if (ITMask.size() > 3) {
+ Parser.EatToEndOfStatement();
+ return Error(Loc, "too many conditions on IT instruction");
+ }
+ unsigned Mask = 8;
+ for (unsigned i = ITMask.size(); i != 0; --i) {
+ char pos = ITMask[i - 1];
+ if (pos != 't' && pos != 'e') {
+ Parser.EatToEndOfStatement();
+ return Error(Loc, "illegal IT block condition mask '" + ITMask + "'");
+ }
+ Mask >>= 1;
+ if (ITMask[i - 1] == 't')
+ Mask |= 8;
+ }
+ Operands.push_back(ARMOperand::CreateITMask(Mask, Loc));
+ }
+
+ // FIXME: This is all a pretty gross hack. We should automatically handle
+ // optional operands like this via tblgen.
// Next, add the CCOut and ConditionCode operands, if needed.
//
@@ -2069,34 +3809,36 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
// the matcher deal with finding the right instruction or generating an
// appropriate error.
bool CanAcceptCarrySet, CanAcceptPredicationCode;
- GetMnemonicAcceptInfo(Head, CanAcceptCarrySet, CanAcceptPredicationCode);
+ getMnemonicAcceptInfo(Mnemonic, CanAcceptCarrySet, CanAcceptPredicationCode);
// If we had a carry-set on an instruction that can't do that, issue an
// error.
if (!CanAcceptCarrySet && CarrySetting) {
Parser.EatToEndOfStatement();
- return Error(NameLoc, "instruction '" + Head +
+ return Error(NameLoc, "instruction '" + Mnemonic +
"' can not set flags, but 's' suffix specified");
}
+ // If we had a predication code on an instruction that can't do that, issue an
+ // error.
+ if (!CanAcceptPredicationCode && PredicationCode != ARMCC::AL) {
+ Parser.EatToEndOfStatement();
+ return Error(NameLoc, "instruction '" + Mnemonic +
+ "' is not predicable, but condition code specified");
+ }
// Add the carry setting operand, if necessary.
- //
- // FIXME: It would be awesome if we could somehow invent a location such that
- // match errors on this operand would print a nice diagnostic about how the
- // 's' character in the mnemonic resulted in a CCOut operand.
- if (CanAcceptCarrySet)
+ if (CanAcceptCarrySet) {
+ SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Mnemonic.size());
Operands.push_back(ARMOperand::CreateCCOut(CarrySetting ? ARM::CPSR : 0,
- NameLoc));
+ Loc));
+ }
// Add the predication code operand, if necessary.
if (CanAcceptPredicationCode) {
+ SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Mnemonic.size() +
+ CarrySetting);
Operands.push_back(ARMOperand::CreateCondCode(
- ARMCC::CondCodes(PredicationCode), NameLoc));
- } else {
- // This mnemonic can't ever accept a predication code, but the user wrote
- // one (or misspelled another mnemonic).
-
- // FIXME: Issue a nice error.
+ ARMCC::CondCodes(PredicationCode), Loc));
}
// Add the processor imod operand, if necessary.
@@ -2104,11 +3846,6 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
Operands.push_back(ARMOperand::CreateImm(
MCConstantExpr::Create(ProcessorIMod, getContext()),
NameLoc, NameLoc));
- } else {
- // This mnemonic can't ever accept a imod, but the user wrote
- // one (or misspelled another mnemonic).
-
- // FIXME: Issue a nice error.
}
// Add the remaining tokens in the mnemonic.
@@ -2117,13 +3854,19 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
Next = Name.find('.', Start + 1);
StringRef ExtraToken = Name.slice(Start, Next);
- Operands.push_back(ARMOperand::CreateToken(ExtraToken, NameLoc));
+ // For now, we're only parsing Thumb1 (for the most part), so
+ // just ignore ".n" qualifiers. We'll use them to restrict
+ // matching when we do Thumb2.
+ if (ExtraToken != ".n") {
+ SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Start);
+ Operands.push_back(ARMOperand::CreateToken(ExtraToken, Loc));
+ }
}
// Read the remaining operands.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
// Read the first operand.
- if (ParseOperand(Operands, Head)) {
+ if (parseOperand(Operands, Mnemonic)) {
Parser.EatToEndOfStatement();
return true;
}
@@ -2132,7 +3875,7 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
Parser.Lex(); // Eat the comma.
// Parse and remember the operand.
- if (ParseOperand(Operands, Head)) {
+ if (parseOperand(Operands, Mnemonic)) {
Parser.EatToEndOfStatement();
return true;
}
@@ -2140,75 +3883,548 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
}
if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ SMLoc Loc = getLexer().getLoc();
Parser.EatToEndOfStatement();
- return TokError("unexpected token in argument list");
+ return Error(Loc, "unexpected token in argument list");
}
Parser.Lex(); // Consume the EndOfStatement
+
+ // Some instructions, mostly Thumb, have forms for the same mnemonic that
+ // do and don't have a cc_out optional-def operand. With some spot-checks
+ // of the operand list, we can figure out which variant we're trying to
+ // parse and adjust accordingly before actually matching. We shouldn't ever
+ // try to remove a cc_out operand that was explicitly set on the the
+ // mnemonic, of course (CarrySetting == true). Reason number #317 the
+ // table driven matcher doesn't fit well with the ARM instruction set.
+ if (!CarrySetting && shouldOmitCCOutOperand(Mnemonic, Operands)) {
+ ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]);
+ Operands.erase(Operands.begin() + 1);
+ delete Op;
+ }
+
+ // ARM mode 'blx' need special handling, as the register operand version
+ // is predicable, but the label operand version is not. So, we can't rely
+ // on the Mnemonic based checking to correctly figure out when to put
+ // a k_CondCode operand in the list. If we're trying to match the label
+ // version, remove the k_CondCode operand here.
+ if (!isThumb() && Mnemonic == "blx" && Operands.size() == 3 &&
+ static_cast<ARMOperand*>(Operands[2])->isImm()) {
+ ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]);
+ Operands.erase(Operands.begin() + 1);
+ delete Op;
+ }
+
+ // The vector-compare-to-zero instructions have a literal token "#0" at
+ // the end that comes to here as an immediate operand. Convert it to a
+ // token to play nicely with the matcher.
+ if ((Mnemonic == "vceq" || Mnemonic == "vcge" || Mnemonic == "vcgt" ||
+ Mnemonic == "vcle" || Mnemonic == "vclt") && Operands.size() == 6 &&
+ static_cast<ARMOperand*>(Operands[5])->isImm()) {
+ ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]);
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
+ if (CE && CE->getValue() == 0) {
+ Operands.erase(Operands.begin() + 5);
+ Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc()));
+ delete Op;
+ }
+ }
+ // VCMP{E} does the same thing, but with a different operand count.
+ if ((Mnemonic == "vcmp" || Mnemonic == "vcmpe") && Operands.size() == 5 &&
+ static_cast<ARMOperand*>(Operands[4])->isImm()) {
+ ARMOperand *Op = static_cast<ARMOperand*>(Operands[4]);
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
+ if (CE && CE->getValue() == 0) {
+ Operands.erase(Operands.begin() + 4);
+ Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc()));
+ delete Op;
+ }
+ }
+ // Similarly, the Thumb1 "RSB" instruction has a literal "#0" on the
+ // end. Convert it to a token here.
+ if (Mnemonic == "rsb" && isThumb() && Operands.size() == 6 &&
+ static_cast<ARMOperand*>(Operands[5])->isImm()) {
+ ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]);
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
+ if (CE && CE->getValue() == 0) {
+ Operands.erase(Operands.begin() + 5);
+ Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc()));
+ delete Op;
+ }
+ }
+
+ return false;
+}
+
+// Validate context-sensitive operand constraints.
+
+// return 'true' if register list contains non-low GPR registers,
+// 'false' otherwise. If Reg is in the register list or is HiReg, set
+// 'containsReg' to true.
+static bool checkLowRegisterList(MCInst Inst, unsigned OpNo, unsigned Reg,
+ unsigned HiReg, bool &containsReg) {
+ containsReg = false;
+ for (unsigned i = OpNo; i < Inst.getNumOperands(); ++i) {
+ unsigned OpReg = Inst.getOperand(i).getReg();
+ if (OpReg == Reg)
+ containsReg = true;
+ // Anything other than a low register isn't legal here.
+ if (!isARMLowRegister(OpReg) && (!HiReg || OpReg != HiReg))
+ return true;
+ }
+ return false;
+}
+
+// Check if the specified regisgter is in the register list of the inst,
+// starting at the indicated operand number.
+static bool listContainsReg(MCInst &Inst, unsigned OpNo, unsigned Reg) {
+ for (unsigned i = OpNo; i < Inst.getNumOperands(); ++i) {
+ unsigned OpReg = Inst.getOperand(i).getReg();
+ if (OpReg == Reg)
+ return true;
+ }
+ return false;
+}
+
+// FIXME: We would really prefer to have MCInstrInfo (the wrapper around
+// the ARMInsts array) instead. Getting that here requires awkward
+// API changes, though. Better way?
+namespace llvm {
+extern MCInstrDesc ARMInsts[];
+}
+static MCInstrDesc &getInstDesc(unsigned Opcode) {
+ return ARMInsts[Opcode];
+}
+
+// FIXME: We would really like to be able to tablegen'erate this.
+bool ARMAsmParser::
+validateInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
+ SMLoc Loc = Operands[0]->getStartLoc();
+ // Check the IT block state first.
+ // NOTE: In Thumb mode, the BKPT instruction has the interesting property of
+ // being allowed in IT blocks, but not being predicable. It just always
+ // executes.
+ if (inITBlock() && Inst.getOpcode() != ARM::tBKPT) {
+ unsigned bit = 1;
+ if (ITState.FirstCond)
+ ITState.FirstCond = false;
+ else
+ bit = (ITState.Mask >> (5 - ITState.CurPosition)) & 1;
+ // The instruction must be predicable.
+ if (!MCID.isPredicable())
+ return Error(Loc, "instructions in IT block must be predicable");
+ unsigned Cond = Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm();
+ unsigned ITCond = bit ? ITState.Cond :
+ ARMCC::getOppositeCondition(ITState.Cond);
+ if (Cond != ITCond) {
+ // Find the condition code Operand to get its SMLoc information.
+ SMLoc CondLoc;
+ for (unsigned i = 1; i < Operands.size(); ++i)
+ if (static_cast<ARMOperand*>(Operands[i])->isCondCode())
+ CondLoc = Operands[i]->getStartLoc();
+ return Error(CondLoc, "incorrect condition in IT block; got '" +
+ StringRef(ARMCondCodeToString(ARMCC::CondCodes(Cond))) +
+ "', but expected '" +
+ ARMCondCodeToString(ARMCC::CondCodes(ITCond)) + "'");
+ }
+ // Check for non-'al' condition codes outside of the IT block.
+ } else if (isThumbTwo() && MCID.isPredicable() &&
+ Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm() !=
+ ARMCC::AL && Inst.getOpcode() != ARM::tB &&
+ Inst.getOpcode() != ARM::t2B)
+ return Error(Loc, "predicated instructions must be in IT block");
+
+ switch (Inst.getOpcode()) {
+ case ARM::LDRD:
+ case ARM::LDRD_PRE:
+ case ARM::LDRD_POST:
+ case ARM::LDREXD: {
+ // Rt2 must be Rt + 1.
+ unsigned Rt = getARMRegisterNumbering(Inst.getOperand(0).getReg());
+ unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(1).getReg());
+ if (Rt2 != Rt + 1)
+ return Error(Operands[3]->getStartLoc(),
+ "destination operands must be sequential");
+ return false;
+ }
+ case ARM::STRD: {
+ // Rt2 must be Rt + 1.
+ unsigned Rt = getARMRegisterNumbering(Inst.getOperand(0).getReg());
+ unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(1).getReg());
+ if (Rt2 != Rt + 1)
+ return Error(Operands[3]->getStartLoc(),
+ "source operands must be sequential");
+ return false;
+ }
+ case ARM::STRD_PRE:
+ case ARM::STRD_POST:
+ case ARM::STREXD: {
+ // Rt2 must be Rt + 1.
+ unsigned Rt = getARMRegisterNumbering(Inst.getOperand(1).getReg());
+ unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(2).getReg());
+ if (Rt2 != Rt + 1)
+ return Error(Operands[3]->getStartLoc(),
+ "source operands must be sequential");
+ return false;
+ }
+ case ARM::SBFX:
+ case ARM::UBFX: {
+ // width must be in range [1, 32-lsb]
+ unsigned lsb = Inst.getOperand(2).getImm();
+ unsigned widthm1 = Inst.getOperand(3).getImm();
+ if (widthm1 >= 32 - lsb)
+ return Error(Operands[5]->getStartLoc(),
+ "bitfield width must be in range [1,32-lsb]");
+ return false;
+ }
+ case ARM::tLDMIA: {
+ // If we're parsing Thumb2, the .w variant is available and handles
+ // most cases that are normally illegal for a Thumb1 LDM
+ // instruction. We'll make the transformation in processInstruction()
+ // if necessary.
+ //
+ // Thumb LDM instructions are writeback iff the base register is not
+ // in the register list.
+ unsigned Rn = Inst.getOperand(0).getReg();
+ bool hasWritebackToken =
+ (static_cast<ARMOperand*>(Operands[3])->isToken() &&
+ static_cast<ARMOperand*>(Operands[3])->getToken() == "!");
+ bool listContainsBase;
+ if (checkLowRegisterList(Inst, 3, Rn, 0, listContainsBase) && !isThumbTwo())
+ return Error(Operands[3 + hasWritebackToken]->getStartLoc(),
+ "registers must be in range r0-r7");
+ // If we should have writeback, then there should be a '!' token.
+ if (!listContainsBase && !hasWritebackToken && !isThumbTwo())
+ return Error(Operands[2]->getStartLoc(),
+ "writeback operator '!' expected");
+ // If we should not have writeback, there must not be a '!'. This is
+ // true even for the 32-bit wide encodings.
+ if (listContainsBase && hasWritebackToken)
+ return Error(Operands[3]->getStartLoc(),
+ "writeback operator '!' not allowed when base register "
+ "in register list");
+
+ break;
+ }
+ case ARM::t2LDMIA_UPD: {
+ if (listContainsReg(Inst, 3, Inst.getOperand(0).getReg()))
+ return Error(Operands[4]->getStartLoc(),
+ "writeback operator '!' not allowed when base register "
+ "in register list");
+ break;
+ }
+ case ARM::tPOP: {
+ bool listContainsBase;
+ if (checkLowRegisterList(Inst, 3, 0, ARM::PC, listContainsBase))
+ return Error(Operands[2]->getStartLoc(),
+ "registers must be in range r0-r7 or pc");
+ break;
+ }
+ case ARM::tPUSH: {
+ bool listContainsBase;
+ if (checkLowRegisterList(Inst, 3, 0, ARM::LR, listContainsBase))
+ return Error(Operands[2]->getStartLoc(),
+ "registers must be in range r0-r7 or lr");
+ break;
+ }
+ case ARM::tSTMIA_UPD: {
+ bool listContainsBase;
+ if (checkLowRegisterList(Inst, 4, 0, 0, listContainsBase) && !isThumbTwo())
+ return Error(Operands[4]->getStartLoc(),
+ "registers must be in range r0-r7");
+ break;
+ }
+ }
+
return false;
}
+void ARMAsmParser::
+processInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ switch (Inst.getOpcode()) {
+ case ARM::LDMIA_UPD:
+ // If this is a load of a single register via a 'pop', then we should use
+ // a post-indexed LDR instruction instead, per the ARM ARM.
+ if (static_cast<ARMOperand*>(Operands[0])->getToken() == "pop" &&
+ Inst.getNumOperands() == 5) {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::LDR_POST_IMM);
+ TmpInst.addOperand(Inst.getOperand(4)); // Rt
+ TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // am2offset
+ TmpInst.addOperand(MCOperand::CreateImm(4));
+ TmpInst.addOperand(Inst.getOperand(2)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ }
+ break;
+ case ARM::STMDB_UPD:
+ // If this is a store of a single register via a 'push', then we should use
+ // a pre-indexed STR instruction instead, per the ARM ARM.
+ if (static_cast<ARMOperand*>(Operands[0])->getToken() == "push" &&
+ Inst.getNumOperands() == 5) {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::STR_PRE_IMM);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(4)); // Rt
+ TmpInst.addOperand(Inst.getOperand(1)); // addrmode_imm12
+ TmpInst.addOperand(MCOperand::CreateImm(-4));
+ TmpInst.addOperand(Inst.getOperand(2)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ }
+ break;
+ case ARM::tADDi8:
+ // If the immediate is in the range 0-7, we want tADDi3 iff Rd was
+ // explicitly specified. From the ARM ARM: "Encoding T1 is preferred
+ // to encoding T2 if <Rd> is specified and encoding T2 is preferred
+ // to encoding T1 if <Rd> is omitted."
+ if (Inst.getOperand(3).getImm() < 8 && Operands.size() == 6)
+ Inst.setOpcode(ARM::tADDi3);
+ break;
+ case ARM::tSUBi8:
+ // If the immediate is in the range 0-7, we want tADDi3 iff Rd was
+ // explicitly specified. From the ARM ARM: "Encoding T1 is preferred
+ // to encoding T2 if <Rd> is specified and encoding T2 is preferred
+ // to encoding T1 if <Rd> is omitted."
+ if (Inst.getOperand(3).getImm() < 8 && Operands.size() == 6)
+ Inst.setOpcode(ARM::tSUBi3);
+ break;
+ case ARM::tB:
+ // A Thumb conditional branch outside of an IT block is a tBcc.
+ if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock())
+ Inst.setOpcode(ARM::tBcc);
+ break;
+ case ARM::t2B:
+ // A Thumb2 conditional branch outside of an IT block is a t2Bcc.
+ if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock())
+ Inst.setOpcode(ARM::t2Bcc);
+ break;
+ case ARM::t2Bcc:
+ // If the conditional is AL or we're in an IT block, we really want t2B.
+ if (Inst.getOperand(1).getImm() == ARMCC::AL || inITBlock())
+ Inst.setOpcode(ARM::t2B);
+ break;
+ case ARM::tBcc:
+ // If the conditional is AL, we really want tB.
+ if (Inst.getOperand(1).getImm() == ARMCC::AL)
+ Inst.setOpcode(ARM::tB);
+ break;
+ case ARM::tLDMIA: {
+ // If the register list contains any high registers, or if the writeback
+ // doesn't match what tLDMIA can do, we need to use the 32-bit encoding
+ // instead if we're in Thumb2. Otherwise, this should have generated
+ // an error in validateInstruction().
+ unsigned Rn = Inst.getOperand(0).getReg();
+ bool hasWritebackToken =
+ (static_cast<ARMOperand*>(Operands[3])->isToken() &&
+ static_cast<ARMOperand*>(Operands[3])->getToken() == "!");
+ bool listContainsBase;
+ if (checkLowRegisterList(Inst, 3, Rn, 0, listContainsBase) ||
+ (!listContainsBase && !hasWritebackToken) ||
+ (listContainsBase && hasWritebackToken)) {
+ // 16-bit encoding isn't sufficient. Switch to the 32-bit version.
+ assert (isThumbTwo());
+ Inst.setOpcode(hasWritebackToken ? ARM::t2LDMIA_UPD : ARM::t2LDMIA);
+ // If we're switching to the updating version, we need to insert
+ // the writeback tied operand.
+ if (hasWritebackToken)
+ Inst.insert(Inst.begin(),
+ MCOperand::CreateReg(Inst.getOperand(0).getReg()));
+ }
+ break;
+ }
+ case ARM::tSTMIA_UPD: {
+ // If the register list contains any high registers, we need to use
+ // the 32-bit encoding instead if we're in Thumb2. Otherwise, this
+ // should have generated an error in validateInstruction().
+ unsigned Rn = Inst.getOperand(0).getReg();
+ bool listContainsBase;
+ if (checkLowRegisterList(Inst, 4, Rn, 0, listContainsBase)) {
+ // 16-bit encoding isn't sufficient. Switch to the 32-bit version.
+ assert (isThumbTwo());
+ Inst.setOpcode(ARM::t2STMIA_UPD);
+ }
+ break;
+ }
+ case ARM::t2MOVi: {
+ // If we can use the 16-bit encoding and the user didn't explicitly
+ // request the 32-bit variant, transform it here.
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ Inst.getOperand(1).getImm() <= 255 &&
+ ((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL &&
+ Inst.getOperand(4).getReg() == ARM::CPSR) ||
+ (inITBlock() && Inst.getOperand(4).getReg() == 0)) &&
+ (!static_cast<ARMOperand*>(Operands[2])->isToken() ||
+ static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) {
+ // The operands aren't in the same order for tMOVi8...
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tMOVi8);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(4));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ }
+ break;
+ }
+ case ARM::t2MOVr: {
+ // If we can use the 16-bit encoding and the user didn't explicitly
+ // request the 32-bit variant, transform it here.
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ Inst.getOperand(2).getImm() == ARMCC::AL &&
+ Inst.getOperand(4).getReg() == ARM::CPSR &&
+ (!static_cast<ARMOperand*>(Operands[2])->isToken() ||
+ static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) {
+ // The operands aren't the same for tMOV[S]r... (no cc_out)
+ MCInst TmpInst;
+ TmpInst.setOpcode(Inst.getOperand(4).getReg() ? ARM::tMOVSr : ARM::tMOVr);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ }
+ break;
+ }
+ case ARM::t2SXTH:
+ case ARM::t2SXTB:
+ case ARM::t2UXTH:
+ case ARM::t2UXTB: {
+ // If we can use the 16-bit encoding and the user didn't explicitly
+ // request the 32-bit variant, transform it here.
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ Inst.getOperand(2).getImm() == 0 &&
+ (!static_cast<ARMOperand*>(Operands[2])->isToken() ||
+ static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) {
+ unsigned NewOpc;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("Illegal opcode!");
+ case ARM::t2SXTH: NewOpc = ARM::tSXTH; break;
+ case ARM::t2SXTB: NewOpc = ARM::tSXTB; break;
+ case ARM::t2UXTH: NewOpc = ARM::tUXTH; break;
+ case ARM::t2UXTB: NewOpc = ARM::tUXTB; break;
+ }
+ // The operands aren't the same for thumb1 (no rotate operand).
+ MCInst TmpInst;
+ TmpInst.setOpcode(NewOpc);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ }
+ break;
+ }
+ case ARM::t2IT: {
+ // The mask bits for all but the first condition are represented as
+ // the low bit of the condition code value implies 't'. We currently
+ // always have 1 implies 't', so XOR toggle the bits if the low bit
+ // of the condition code is zero. The encoding also expects the low
+ // bit of the condition to be encoded as bit 4 of the mask operand,
+ // so mask that in if needed
+ MCOperand &MO = Inst.getOperand(1);
+ unsigned Mask = MO.getImm();
+ unsigned OrigMask = Mask;
+ unsigned TZ = CountTrailingZeros_32(Mask);
+ if ((Inst.getOperand(0).getImm() & 1) == 0) {
+ assert(Mask && TZ <= 3 && "illegal IT mask value!");
+ for (unsigned i = 3; i != TZ; --i)
+ Mask ^= 1 << i;
+ } else
+ Mask |= 0x10;
+ MO.setImm(Mask);
+
+ // Set up the IT block state according to the IT instruction we just
+ // matched.
+ assert(!inITBlock() && "nested IT blocks?!");
+ ITState.Cond = ARMCC::CondCodes(Inst.getOperand(0).getImm());
+ ITState.Mask = OrigMask; // Use the original mask, not the updated one.
+ ITState.CurPosition = 0;
+ ITState.FirstCond = true;
+ break;
+ }
+ }
+}
+
+unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
+ // 16-bit thumb arithmetic instructions either require or preclude the 'S'
+ // suffix depending on whether they're in an IT block or not.
+ unsigned Opc = Inst.getOpcode();
+ MCInstrDesc &MCID = getInstDesc(Opc);
+ if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) {
+ assert(MCID.hasOptionalDef() &&
+ "optionally flag setting instruction missing optional def operand");
+ assert(MCID.NumOperands == Inst.getNumOperands() &&
+ "operand count mismatch!");
+ // Find the optional-def operand (cc_out).
+ unsigned OpNo;
+ for (OpNo = 0;
+ !MCID.OpInfo[OpNo].isOptionalDef() && OpNo < MCID.NumOperands;
+ ++OpNo)
+ ;
+ // If we're parsing Thumb1, reject it completely.
+ if (isThumbOne() && Inst.getOperand(OpNo).getReg() != ARM::CPSR)
+ return Match_MnemonicFail;
+ // If we're parsing Thumb2, which form is legal depends on whether we're
+ // in an IT block.
+ if (isThumbTwo() && Inst.getOperand(OpNo).getReg() != ARM::CPSR &&
+ !inITBlock())
+ return Match_RequiresITBlock;
+ if (isThumbTwo() && Inst.getOperand(OpNo).getReg() == ARM::CPSR &&
+ inITBlock())
+ return Match_RequiresNotITBlock;
+ }
+ // Some high-register supporting Thumb1 encodings only allow both registers
+ // to be from r0-r7 when in Thumb2.
+ else if (Opc == ARM::tADDhirr && isThumbOne() &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ isARMLowRegister(Inst.getOperand(2).getReg()))
+ return Match_RequiresThumb2;
+ // Others only require ARMv6 or later.
+ else if (Opc == ARM::tMOVr && isThumbOne() && !hasV6Ops() &&
+ isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ isARMLowRegister(Inst.getOperand(1).getReg()))
+ return Match_RequiresV6;
+ return Match_Success;
+}
+
bool ARMAsmParser::
MatchAndEmitInstruction(SMLoc IDLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out) {
MCInst Inst;
unsigned ErrorInfo;
- MatchResultTy MatchResult, MatchResult2;
+ unsigned MatchResult;
MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo);
- if (MatchResult != Match_Success) {
- // If we get a Match_InvalidOperand it might be some arithmetic instruction
- // that does not update the condition codes. So try adding a CCOut operand
- // with a value of reg0.
- if (MatchResult == Match_InvalidOperand) {
- Operands.insert(Operands.begin() + 1,
- ARMOperand::CreateCCOut(0,
- ((ARMOperand*)Operands[0])->getStartLoc()));
- MatchResult2 = MatchInstructionImpl(Operands, Inst, ErrorInfo);
- if (MatchResult2 == Match_Success)
- MatchResult = Match_Success;
- else {
- ARMOperand *CCOut = ((ARMOperand*)Operands[1]);
- Operands.erase(Operands.begin() + 1);
- delete CCOut;
- }
- }
- // If we get a Match_MnemonicFail it might be some arithmetic instruction
- // that updates the condition codes if it ends in 's'. So see if the
- // mnemonic ends in 's' and if so try removing the 's' and adding a CCOut
- // operand with a value of CPSR.
- else if (MatchResult == Match_MnemonicFail) {
- // Get the instruction mnemonic, which is the first token.
- StringRef Mnemonic = ((ARMOperand*)Operands[0])->getToken();
- if (Mnemonic.substr(Mnemonic.size()-1) == "s") {
- // removed the 's' from the mnemonic for matching.
- StringRef MnemonicNoS = Mnemonic.slice(0, Mnemonic.size() - 1);
- SMLoc NameLoc = ((ARMOperand*)Operands[0])->getStartLoc();
- ARMOperand *OldMnemonic = ((ARMOperand*)Operands[0]);
- Operands.erase(Operands.begin());
- delete OldMnemonic;
- Operands.insert(Operands.begin(),
- ARMOperand::CreateToken(MnemonicNoS, NameLoc));
- Operands.insert(Operands.begin() + 1,
- ARMOperand::CreateCCOut(ARM::CPSR, NameLoc));
- MatchResult2 = MatchInstructionImpl(Operands, Inst, ErrorInfo);
- if (MatchResult2 == Match_Success)
- MatchResult = Match_Success;
- else {
- ARMOperand *OldMnemonic = ((ARMOperand*)Operands[0]);
- Operands.erase(Operands.begin());
- delete OldMnemonic;
- Operands.insert(Operands.begin(),
- ARMOperand::CreateToken(Mnemonic, NameLoc));
- ARMOperand *CCOut = ((ARMOperand*)Operands[1]);
- Operands.erase(Operands.begin() + 1);
- delete CCOut;
- }
- }
- }
- }
switch (MatchResult) {
+ default: break;
case Match_Success:
+ // Context sensitive operand constraints aren't handled by the matcher,
+ // so check them here.
+ if (validateInstruction(Inst, Operands)) {
+ // Still progress the IT block, otherwise one wrong condition causes
+ // nasty cascading errors.
+ forwardITPosition();
+ return true;
+ }
+
+ // Some instructions need post-processing to, for example, tweak which
+ // encoding is selected.
+ processInstruction(Inst, Operands);
+
+ // Only move forward at the very end so that everything in validate
+ // and process gets a consistent answer about whether we're in an IT
+ // block.
+ forwardITPosition();
+
Out.EmitInstruction(Inst);
return false;
case Match_MissingFeature:
@@ -2227,34 +4443,43 @@ MatchAndEmitInstruction(SMLoc IDLoc,
return Error(ErrorLoc, "invalid operand for instruction");
}
case Match_MnemonicFail:
- return Error(IDLoc, "unrecognized instruction mnemonic");
+ return Error(IDLoc, "invalid instruction");
case Match_ConversionFail:
- return Error(IDLoc, "unable to convert operands to instruction");
+ // The converter function will have already emited a diagnostic.
+ return true;
+ case Match_RequiresNotITBlock:
+ return Error(IDLoc, "flag setting instruction only valid outside IT block");
+ case Match_RequiresITBlock:
+ return Error(IDLoc, "instruction only valid inside IT block");
+ case Match_RequiresV6:
+ return Error(IDLoc, "instruction variant requires ARMv6 or later");
+ case Match_RequiresThumb2:
+ return Error(IDLoc, "instruction variant requires Thumb2");
}
llvm_unreachable("Implement any new match types added!");
return true;
}
-/// ParseDirective parses the arm specific directives
+/// parseDirective parses the arm specific directives
bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getIdentifier();
if (IDVal == ".word")
- return ParseDirectiveWord(4, DirectiveID.getLoc());
+ return parseDirectiveWord(4, DirectiveID.getLoc());
else if (IDVal == ".thumb")
- return ParseDirectiveThumb(DirectiveID.getLoc());
+ return parseDirectiveThumb(DirectiveID.getLoc());
else if (IDVal == ".thumb_func")
- return ParseDirectiveThumbFunc(DirectiveID.getLoc());
+ return parseDirectiveThumbFunc(DirectiveID.getLoc());
else if (IDVal == ".code")
- return ParseDirectiveCode(DirectiveID.getLoc());
+ return parseDirectiveCode(DirectiveID.getLoc());
else if (IDVal == ".syntax")
- return ParseDirectiveSyntax(DirectiveID.getLoc());
+ return parseDirectiveSyntax(DirectiveID.getLoc());
return true;
}
-/// ParseDirectiveWord
+/// parseDirectiveWord
/// ::= .word [ expression (, expression)* ]
-bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+bool ARMAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
const MCExpr *Value;
@@ -2277,9 +4502,9 @@ bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
return false;
}
-/// ParseDirectiveThumb
+/// parseDirectiveThumb
/// ::= .thumb
-bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) {
+bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
if (getLexer().isNot(AsmToken::EndOfStatement))
return Error(L, "unexpected token in directive");
Parser.Lex();
@@ -2290,9 +4515,9 @@ bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) {
return false;
}
-/// ParseDirectiveThumbFunc
+/// parseDirectiveThumbFunc
/// ::= .thumbfunc symbol_name
-bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) {
+bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) {
const MCAsmInfo &MAI = getParser().getStreamer().getContext().getAsmInfo();
bool isMachO = MAI.hasSubsectionsViaSymbols();
StringRef Name;
@@ -2322,9 +4547,9 @@ bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) {
return false;
}
-/// ParseDirectiveSyntax
+/// parseDirectiveSyntax
/// ::= .syntax unified | divided
-bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) {
+bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) {
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
return Error(L, "unexpected token in .syntax directive");
@@ -2345,9 +4570,9 @@ bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) {
return false;
}
-/// ParseDirectiveCode
+/// parseDirectiveCode
/// ::= .code 16 | 32
-bool ARMAsmParser::ParseDirectiveCode(SMLoc L) {
+bool ARMAsmParser::parseDirectiveCode(SMLoc L) {
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Integer))
return Error(L, "unexpected token in .code directive");
@@ -2380,8 +4605,8 @@ extern "C" void LLVMInitializeARMAsmLexer();
/// Force static initialization.
extern "C" void LLVMInitializeARMAsmParser() {
- RegisterAsmParser<ARMAsmParser> X(TheARMTarget);
- RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget);
+ RegisterMCAsmParser<ARMAsmParser> X(TheARMTarget);
+ RegisterMCAsmParser<ARMAsmParser> Y(TheThumbTarget);
LLVMInitializeARMAsmLexer();
}
diff --git a/lib/Target/ARM/AsmParser/CMakeLists.txt b/lib/Target/ARM/AsmParser/CMakeLists.txt
index 9ba7c01..3f5ad39 100644
--- a/lib/Target/ARM/AsmParser/CMakeLists.txt
+++ b/lib/Target/ARM/AsmParser/CMakeLists.txt
@@ -5,3 +5,12 @@ add_llvm_library(LLVMARMAsmParser
ARMAsmParser.cpp
)
+add_dependencies(LLVMARMAsmParser ARMCommonTableGen)
+
+add_llvm_library_dependencies(LLVMARMAsmParser
+ LLVMARMDesc
+ LLVMARMInfo
+ LLVMMC
+ LLVMMCParser
+ LLVMSupport
+ )
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 21608d0..f045e83 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -1,21 +1,21 @@
set(LLVM_TARGET_DEFINITIONS ARM.td)
-tablegen(ARMGenRegisterInfo.inc -gen-register-info)
-tablegen(ARMGenInstrInfo.inc -gen-instr-info)
-tablegen(ARMGenCodeEmitter.inc -gen-emitter)
-tablegen(ARMGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
-tablegen(ARMGenMCPseudoLowering.inc -gen-pseudo-lowering)
-tablegen(ARMGenAsmWriter.inc -gen-asm-writer)
-tablegen(ARMGenAsmMatcher.inc -gen-asm-matcher)
-tablegen(ARMGenDAGISel.inc -gen-dag-isel)
-tablegen(ARMGenFastISel.inc -gen-fast-isel)
-tablegen(ARMGenCallingConv.inc -gen-callingconv)
-tablegen(ARMGenSubtargetInfo.inc -gen-subtarget)
-tablegen(ARMGenEDInfo.inc -gen-enhanced-disassembly-info)
-tablegen(ARMGenDecoderTables.inc -gen-arm-decoder)
+llvm_tablegen(ARMGenRegisterInfo.inc -gen-register-info)
+llvm_tablegen(ARMGenInstrInfo.inc -gen-instr-info)
+llvm_tablegen(ARMGenCodeEmitter.inc -gen-emitter)
+llvm_tablegen(ARMGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+llvm_tablegen(ARMGenMCPseudoLowering.inc -gen-pseudo-lowering)
+llvm_tablegen(ARMGenAsmWriter.inc -gen-asm-writer)
+llvm_tablegen(ARMGenAsmMatcher.inc -gen-asm-matcher)
+llvm_tablegen(ARMGenDAGISel.inc -gen-dag-isel)
+llvm_tablegen(ARMGenFastISel.inc -gen-fast-isel)
+llvm_tablegen(ARMGenCallingConv.inc -gen-callingconv)
+llvm_tablegen(ARMGenSubtargetInfo.inc -gen-subtarget)
+llvm_tablegen(ARMGenEDInfo.inc -gen-enhanced-disassembly-info)
+llvm_tablegen(ARMGenDisassemblerTables.inc -gen-disassembler)
+add_public_tablegen_target(ARMCommonTableGen)
add_llvm_target(ARMCodeGen
- ARMAsmBackend.cpp
ARMAsmPrinter.cpp
ARMBaseInstrInfo.cpp
ARMBaseRegisterInfo.cpp
@@ -32,9 +32,6 @@ add_llvm_target(ARMCodeGen
ARMISelLowering.cpp
ARMInstrInfo.cpp
ARMJITInfo.cpp
- ARMMachObjectWriter.cpp
- ARMMCCodeEmitter.cpp
- ARMMCExpr.cpp
ARMLoadStoreOptimizer.cpp
ARMMCInstLower.cpp
ARMRegisterInfo.cpp
@@ -43,9 +40,8 @@ add_llvm_target(ARMCodeGen
ARMTargetMachine.cpp
ARMTargetObjectFile.cpp
MLxExpansionPass.cpp
- NEONMoveFix.cpp
- Thumb1InstrInfo.cpp
Thumb1FrameLowering.cpp
+ Thumb1InstrInfo.cpp
Thumb1RegisterInfo.cpp
Thumb2ITBlockPass.cpp
Thumb2InstrInfo.cpp
@@ -53,6 +49,20 @@ add_llvm_target(ARMCodeGen
Thumb2SizeReduction.cpp
)
+add_llvm_library_dependencies(LLVMARMCodeGen
+ LLVMARMAsmPrinter
+ LLVMARMDesc
+ LLVMARMInfo
+ LLVMAnalysis
+ LLVMAsmPrinter
+ LLVMCodeGen
+ LLVMCore
+ LLVMMC
+ LLVMSelectionDAG
+ LLVMSupport
+ LLVMTarget
+ )
+
# workaround for hanging compilation on MSVC10
if( MSVC_VERSION EQUAL 1600 )
set_property(
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index bdce2c4..8f2f813 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -6,584 +6,4077 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// This file is part of the ARM Disassembler.
-// It contains code to implement the public interfaces of ARMDisassembler and
-// ThumbDisassembler, both of which are instances of MCDisassembler.
-//
-//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "arm-disassembler"
-#include "ARMDisassembler.h"
-#include "ARMDisassemblerCore.h"
-
-#include "llvm/ADT/OwningPtr.h"
+#include "ARM.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMMCExpr.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
#include "llvm/MC/EDInstInfo.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
-//#define DEBUG(X) do { X; } while (0)
-
-/// ARMGenDecoderTables.inc - ARMDecoderTables.inc is tblgen'ed from
-/// ARMDecoderEmitter.cpp TableGen backend. It contains:
-///
-/// o Mappings from opcode to ARM/Thumb instruction format
-///
-/// o static uint16_t decodeInstruction(uint32_t insn) - the decoding function
-/// for an ARM instruction.
-///
-/// o static uint16_t decodeThumbInstruction(field_t insn) - the decoding
-/// function for a Thumb instruction.
-///
-#include "ARMGenDecoderTables.inc"
+using namespace llvm;
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+namespace {
+/// ARMDisassembler - ARM disassembler for all ARM platforms.
+class ARMDisassembler : public MCDisassembler {
+public:
+ /// Constructor - Initializes the disassembler.
+ ///
+ ARMDisassembler(const MCSubtargetInfo &STI) :
+ MCDisassembler(STI) {
+ }
+
+ ~ARMDisassembler() {
+ }
+
+ /// getInstruction - See MCDisassembler.
+ DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const;
+
+ /// getEDInfo - See MCDisassembler.
+ EDInstInfo *getEDInfo() const;
+private:
+};
+
+/// ThumbDisassembler - Thumb disassembler for all Thumb platforms.
+class ThumbDisassembler : public MCDisassembler {
+public:
+ /// Constructor - Initializes the disassembler.
+ ///
+ ThumbDisassembler(const MCSubtargetInfo &STI) :
+ MCDisassembler(STI) {
+ }
+
+ ~ThumbDisassembler() {
+ }
+
+ /// getInstruction - See MCDisassembler.
+ DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const;
+
+ /// getEDInfo - See MCDisassembler.
+ EDInstInfo *getEDInfo() const;
+private:
+ mutable std::vector<unsigned> ITBlock;
+ DecodeStatus AddThumbPredicate(MCInst&) const;
+ void UpdateThumbVFPPredicate(MCInst&) const;
+};
+}
+
+static bool Check(DecodeStatus &Out, DecodeStatus In) {
+ switch (In) {
+ case MCDisassembler::Success:
+ // Out stays the same.
+ return true;
+ case MCDisassembler::SoftFail:
+ Out = In;
+ return true;
+ case MCDisassembler::Fail:
+ Out = In;
+ return false;
+ }
+ return false;
+}
+
+
+// Forward declare these because the autogenerated code will reference them.
+// Definitions are further down.
+static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeGPRnopcRegisterClass(llvm::MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodetGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecoderGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrMode3Instruction(llvm::MCInst &Inst,unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst & Inst,
+ unsigned Insn,
+ uint64_t Adddress,
+ const void *Decoder);
+static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrMode7Operand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeBranchImmInstruction(llvm::MCInst &Inst,unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVCVTImmOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeNEONModImmInstruction(llvm::MCInst &Inst,unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeShiftRight8Imm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeShiftRight16Imm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeShiftRight32Imm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeShiftRight64Imm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodePostIdxReg(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMSRMask(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbBROperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2BROperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbCmpBROperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbBLXOffset(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbBCCTargetOperand(llvm::MCInst &Inst,unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2LDRDPreInstruction(llvm::MCInst &Inst,unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2STRDPreInstruction(llvm::MCInst &Inst,unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+
+
+
+#include "ARMGenDisassemblerTables.inc"
+#include "ARMGenInstrInfo.inc"
#include "ARMGenEDInfo.inc"
-using namespace llvm;
+static MCDisassembler *createARMDisassembler(const Target &T, const MCSubtargetInfo &STI) {
+ return new ARMDisassembler(STI);
+}
+
+static MCDisassembler *createThumbDisassembler(const Target &T, const MCSubtargetInfo &STI) {
+ return new ThumbDisassembler(STI);
+}
-/// showBitVector - Use the raw_ostream to log a diagnostic message describing
-/// the inidividual bits of the instruction.
-///
-static inline void showBitVector(raw_ostream &os, const uint32_t &insn) {
- // Split the bit position markers into more than one lines to fit 80 columns.
- os << " 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11"
- << " 10 9 8 7 6 5 4 3 2 1 0 \n";
- os << "---------------------------------------------------------------"
- << "----------------------------------\n";
- os << '|';
- for (unsigned i = 32; i != 0; --i) {
- if (insn >> (i - 1) & 0x01)
- os << " 1";
+EDInstInfo *ARMDisassembler::getEDInfo() const {
+ return instInfoARM;
+}
+
+EDInstInfo *ThumbDisassembler::getEDInfo() const {
+ return instInfoARM;
+}
+
+DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
+ const MemoryObject &Region,
+ uint64_t Address,
+ raw_ostream &os,
+ raw_ostream &cs) const {
+ CommentStream = &cs;
+
+ uint8_t bytes[4];
+
+ assert(!(STI.getFeatureBits() & ARM::ModeThumb) &&
+ "Asked to disassemble an ARM instruction but Subtarget is in Thumb mode!");
+
+ // We want to read exactly 4 bytes of data.
+ if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+
+ // Encoded as a small-endian 32-bit word in the stream.
+ uint32_t insn = (bytes[3] << 24) |
+ (bytes[2] << 16) |
+ (bytes[1] << 8) |
+ (bytes[0] << 0);
+
+ // Calling the auto-generated decoder function.
+ DecodeStatus result = decodeARMInstruction32(MI, insn, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ return result;
+ }
+
+ // VFP and NEON instructions, similarly, are shared between ARM
+ // and Thumb modes.
+ MI.clear();
+ result = decodeVFPInstruction32(MI, insn, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ return result;
+ }
+
+ MI.clear();
+ result = decodeNEONDataInstruction32(MI, insn, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ // Add a fake predicate operand, because we share these instruction
+ // definitions with Thumb2 where these instructions are predicable.
+ if (!DecodePredicateOperand(MI, 0xE, Address, this))
+ return MCDisassembler::Fail;
+ return result;
+ }
+
+ MI.clear();
+ result = decodeNEONLoadStoreInstruction32(MI, insn, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ // Add a fake predicate operand, because we share these instruction
+ // definitions with Thumb2 where these instructions are predicable.
+ if (!DecodePredicateOperand(MI, 0xE, Address, this))
+ return MCDisassembler::Fail;
+ return result;
+ }
+
+ MI.clear();
+ result = decodeNEONDupInstruction32(MI, insn, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ // Add a fake predicate operand, because we share these instruction
+ // definitions with Thumb2 where these instructions are predicable.
+ if (!DecodePredicateOperand(MI, 0xE, Address, this))
+ return MCDisassembler::Fail;
+ return result;
+ }
+
+ MI.clear();
+
+ Size = 0;
+ return MCDisassembler::Fail;
+}
+
+namespace llvm {
+extern MCInstrDesc ARMInsts[];
+}
+
+/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
+/// immediate Value in the MCInst. The immediate Value has had any PC
+/// adjustment made by the caller. If the instruction is a branch instruction
+/// then isBranch is true, else false. If the getOpInfo() function was set as
+/// part of the setupForSymbolicDisassembly() call then that function is called
+/// to get any symbolic information at the Address for this instruction. If
+/// that returns non-zero then the symbolic information it returns is used to
+/// create an MCExpr and that is added as an operand to the MCInst. If
+/// getOpInfo() returns zero and isBranch is true then a symbol look up for
+/// Value is done and if a symbol is found an MCExpr is created with that, else
+/// an MCExpr with Value is created. This function returns true if it adds an
+/// operand to the MCInst and false otherwise.
+static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
+ bool isBranch, uint64_t InstSize,
+ MCInst &MI, const void *Decoder) {
+ const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
+ LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback();
+ if (!getOpInfo)
+ return false;
+
+ struct LLVMOpInfo1 SymbolicOp;
+ SymbolicOp.Value = Value;
+ void *DisInfo = Dis->getDisInfoBlock();
+ if (!getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
+ if (isBranch) {
+ LLVMSymbolLookupCallback SymbolLookUp =
+ Dis->getLLVMSymbolLookupCallback();
+ if (SymbolLookUp) {
+ uint64_t ReferenceType;
+ ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
+ const char *ReferenceName;
+ const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
+ &ReferenceName);
+ if (Name) {
+ SymbolicOp.AddSymbol.Name = Name;
+ SymbolicOp.AddSymbol.Present = true;
+ SymbolicOp.Value = 0;
+ }
+ else {
+ SymbolicOp.Value = Value;
+ }
+ if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
+ (*Dis->CommentStream) << "symbol stub for: " << ReferenceName;
+ }
+ else {
+ return false;
+ }
+ }
+ else {
+ return false;
+ }
+ }
+
+ MCContext *Ctx = Dis->getMCContext();
+ const MCExpr *Add = NULL;
+ if (SymbolicOp.AddSymbol.Present) {
+ if (SymbolicOp.AddSymbol.Name) {
+ StringRef Name(SymbolicOp.AddSymbol.Name);
+ MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
+ Add = MCSymbolRefExpr::Create(Sym, *Ctx);
+ } else {
+ Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx);
+ }
+ }
+
+ const MCExpr *Sub = NULL;
+ if (SymbolicOp.SubtractSymbol.Present) {
+ if (SymbolicOp.SubtractSymbol.Name) {
+ StringRef Name(SymbolicOp.SubtractSymbol.Name);
+ MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
+ Sub = MCSymbolRefExpr::Create(Sym, *Ctx);
+ } else {
+ Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx);
+ }
+ }
+
+ const MCExpr *Off = NULL;
+ if (SymbolicOp.Value != 0)
+ Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx);
+
+ const MCExpr *Expr;
+ if (Sub) {
+ const MCExpr *LHS;
+ if (Add)
+ LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx);
else
- os << " 0";
- os << (i%4 == 1 ? '|' : ':');
- }
- os << '\n';
- // Split the bit position markers into more than one lines to fit 80 columns.
- os << "---------------------------------------------------------------"
- << "----------------------------------\n";
- os << '\n';
-}
-
-/// decodeARMInstruction is a decorator function which tries special cases of
-/// instruction matching before calling the auto-generated decoder function.
-static unsigned decodeARMInstruction(uint32_t &insn) {
- if (slice(insn, 31, 28) == 15)
- goto AutoGenedDecoder;
-
- // Special case processing, if any, goes here....
-
- // LLVM combines the offset mode of A8.6.197 & A8.6.198 into STRB.
- // The insufficient encoding information of the combined instruction confuses
- // the decoder wrt BFC/BFI. Therefore, we try to recover here.
- // For BFC, Inst{27-21} = 0b0111110 & Inst{6-0} = 0b0011111.
- // For BFI, Inst{27-21} = 0b0111110 & Inst{6-4} = 0b001 & Inst{3-0} =! 0b1111.
- if (slice(insn, 27, 21) == 0x3e && slice(insn, 6, 4) == 1) {
- if (slice(insn, 3, 0) == 15)
- return ARM::BFC;
+ LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx);
+ if (Off != 0)
+ Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx);
else
- return ARM::BFI;
- }
-
- // Ditto for STRBT, which is a super-instruction for A8.6.199 Encodings
- // A1 & A2.
- // As a result, the decoder fails to deocode USAT properly.
- if (slice(insn, 27, 21) == 0x37 && slice(insn, 5, 4) == 1)
- return ARM::USAT;
- // As a result, the decoder fails to deocode UQADD16 properly.
- if (slice(insn, 27, 20) == 0x66 && slice(insn, 7, 4) == 1)
- return ARM::UQADD16;
-
- // Ditto for ADDSrs, which is a super-instruction for A8.6.7 & A8.6.8.
- // As a result, the decoder fails to decode UMULL properly.
- if (slice(insn, 27, 21) == 0x04 && slice(insn, 7, 4) == 9) {
- return ARM::UMULL;
- }
-
- // Ditto for STR_PRE, which is a super-instruction for A8.6.194 & A8.6.195.
- // As a result, the decoder fails to decode SBFX properly.
- if (slice(insn, 27, 21) == 0x3d && slice(insn, 6, 4) == 5)
- return ARM::SBFX;
-
- // And STRB_PRE, which is a super-instruction for A8.6.197 & A8.6.198.
- // As a result, the decoder fails to decode UBFX properly.
- if (slice(insn, 27, 21) == 0x3f && slice(insn, 6, 4) == 5)
- return ARM::UBFX;
-
- // Ditto for STRT, which is a super-instruction for A8.6.210 Encoding A1 & A2.
- // As a result, the decoder fails to deocode SSAT properly.
- if (slice(insn, 27, 21) == 0x35 && slice(insn, 5, 4) == 1)
- return ARM::SSAT;
-
- // Ditto for RSCrs, which is a super-instruction for A8.6.146 & A8.6.147.
- // As a result, the decoder fails to decode STRHT/LDRHT/LDRSHT/LDRSBT.
- if (slice(insn, 27, 24) == 0) {
- switch (slice(insn, 21, 20)) {
- case 2:
- switch (slice(insn, 7, 4)) {
- case 11:
- return ARM::STRHT;
- default:
- break; // fallthrough
+ Expr = LHS;
+ } else if (Add) {
+ if (Off != 0)
+ Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx);
+ else
+ Expr = Add;
+ } else {
+ if (Off != 0)
+ Expr = Off;
+ else
+ Expr = MCConstantExpr::Create(0, *Ctx);
+ }
+
+ if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_HI16)
+ MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateUpper16(Expr, *Ctx)));
+ else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_LO16)
+ MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateLower16(Expr, *Ctx)));
+ else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None)
+ MI.addOperand(MCOperand::CreateExpr(Expr));
+ else
+ assert(0 && "bad SymbolicOp.VariantKind");
+
+ return true;
+}
+
+/// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
+/// referenced by a load instruction with the base register that is the Pc.
+/// These can often be values in a literal pool near the Address of the
+/// instruction. The Address of the instruction and its immediate Value are
+/// used as a possible literal pool entry. The SymbolLookUp call back will
+/// return the name of a symbol referenced by the the literal pool's entry if
+/// the referenced address is that of a symbol. Or it will return a pointer to
+/// a literal 'C' string if the referenced address of the literal pool's entry
+/// is an address into a section with 'C' string literals.
+static void tryAddingPcLoadReferenceComment(uint64_t Address, int Value,
+ const void *Decoder) {
+ const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
+ LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
+ if (SymbolLookUp) {
+ void *DisInfo = Dis->getDisInfoBlock();
+ uint64_t ReferenceType;
+ ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load;
+ const char *ReferenceName;
+ (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName);
+ if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr ||
+ ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr)
+ (*Dis->CommentStream) << "literal pool for: " << ReferenceName;
+ }
+}
+
+// Thumb1 instructions don't have explicit S bits. Rather, they
+// implicitly set CPSR. Since it's not represented in the encoding, the
+// auto-generated decoder won't inject the CPSR operand. We need to fix
+// that as a post-pass.
+static void AddThumb1SBit(MCInst &MI, bool InITBlock) {
+ const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo;
+ unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands;
+ MCInst::iterator I = MI.begin();
+ for (unsigned i = 0; i < NumOps; ++i, ++I) {
+ if (I == MI.end()) break;
+ if (OpInfo[i].isOptionalDef() && OpInfo[i].RegClass == ARM::CCRRegClassID) {
+ if (i > 0 && OpInfo[i-1].isPredicate()) continue;
+ MI.insert(I, MCOperand::CreateReg(InITBlock ? 0 : ARM::CPSR));
+ return;
+ }
+ }
+
+ MI.insert(I, MCOperand::CreateReg(InITBlock ? 0 : ARM::CPSR));
+}
+
+// Most Thumb instructions don't have explicit predicates in the
+// encoding, but rather get their predicates from IT context. We need
+// to fix up the predicate operands using this context information as a
+// post-pass.
+MCDisassembler::DecodeStatus
+ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
+ MCDisassembler::DecodeStatus S = Success;
+
+ // A few instructions actually have predicates encoded in them. Don't
+ // try to overwrite it if we're seeing one of those.
+ switch (MI.getOpcode()) {
+ case ARM::tBcc:
+ case ARM::t2Bcc:
+ case ARM::tCBZ:
+ case ARM::tCBNZ:
+ case ARM::tCPS:
+ case ARM::t2CPS3p:
+ case ARM::t2CPS2p:
+ case ARM::t2CPS1p:
+ case ARM::tMOVSr:
+ case ARM::tSETEND:
+ // Some instructions (mostly conditional branches) are not
+ // allowed in IT blocks.
+ if (!ITBlock.empty())
+ S = SoftFail;
+ else
+ return Success;
+ break;
+ case ARM::tB:
+ case ARM::t2B:
+ case ARM::t2TBB:
+ case ARM::t2TBH:
+ // Some instructions (mostly unconditional branches) can
+ // only appears at the end of, or outside of, an IT.
+ if (ITBlock.size() > 1)
+ S = SoftFail;
+ break;
+ default:
+ break;
+ }
+
+ // If we're in an IT block, base the predicate on that. Otherwise,
+ // assume a predicate of AL.
+ unsigned CC;
+ if (!ITBlock.empty()) {
+ CC = ITBlock.back();
+ if (CC == 0xF)
+ CC = ARMCC::AL;
+ ITBlock.pop_back();
+ } else
+ CC = ARMCC::AL;
+
+ const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo;
+ unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands;
+ MCInst::iterator I = MI.begin();
+ for (unsigned i = 0; i < NumOps; ++i, ++I) {
+ if (I == MI.end()) break;
+ if (OpInfo[i].isPredicate()) {
+ I = MI.insert(I, MCOperand::CreateImm(CC));
+ ++I;
+ if (CC == ARMCC::AL)
+ MI.insert(I, MCOperand::CreateReg(0));
+ else
+ MI.insert(I, MCOperand::CreateReg(ARM::CPSR));
+ return S;
+ }
+ }
+
+ I = MI.insert(I, MCOperand::CreateImm(CC));
+ ++I;
+ if (CC == ARMCC::AL)
+ MI.insert(I, MCOperand::CreateReg(0));
+ else
+ MI.insert(I, MCOperand::CreateReg(ARM::CPSR));
+
+ return S;
+}
+
+// Thumb VFP instructions are a special case. Because we share their
+// encodings between ARM and Thumb modes, and they are predicable in ARM
+// mode, the auto-generated decoder will give them an (incorrect)
+// predicate operand. We need to rewrite these operands based on the IT
+// context as a post-pass.
+void ThumbDisassembler::UpdateThumbVFPPredicate(MCInst &MI) const {
+ unsigned CC;
+ if (!ITBlock.empty()) {
+ CC = ITBlock.back();
+ ITBlock.pop_back();
+ } else
+ CC = ARMCC::AL;
+
+ const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo;
+ MCInst::iterator I = MI.begin();
+ unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands;
+ for (unsigned i = 0; i < NumOps; ++i, ++I) {
+ if (OpInfo[i].isPredicate() ) {
+ I->setImm(CC);
+ ++I;
+ if (CC == ARMCC::AL)
+ I->setReg(0);
+ else
+ I->setReg(ARM::CPSR);
+ return;
+ }
+ }
+}
+
+DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
+ const MemoryObject &Region,
+ uint64_t Address,
+ raw_ostream &os,
+ raw_ostream &cs) const {
+ CommentStream = &cs;
+
+ uint8_t bytes[4];
+
+ assert((STI.getFeatureBits() & ARM::ModeThumb) &&
+ "Asked to disassemble in Thumb mode but Subtarget is in ARM mode!");
+
+ // We want to read exactly 2 bytes of data.
+ if (Region.readBytes(Address, 2, (uint8_t*)bytes, NULL) == -1) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+
+ uint16_t insn16 = (bytes[1] << 8) | bytes[0];
+ DecodeStatus result = decodeThumbInstruction16(MI, insn16, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 2;
+ Check(result, AddThumbPredicate(MI));
+ return result;
+ }
+
+ MI.clear();
+ result = decodeThumbSBitInstruction16(MI, insn16, Address, this, STI);
+ if (result) {
+ Size = 2;
+ bool InITBlock = !ITBlock.empty();
+ Check(result, AddThumbPredicate(MI));
+ AddThumb1SBit(MI, InITBlock);
+ return result;
+ }
+
+ MI.clear();
+ result = decodeThumb2Instruction16(MI, insn16, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 2;
+
+ // Nested IT blocks are UNPREDICTABLE. Must be checked before we add
+ // the Thumb predicate.
+ if (MI.getOpcode() == ARM::t2IT && !ITBlock.empty())
+ result = MCDisassembler::SoftFail;
+
+ Check(result, AddThumbPredicate(MI));
+
+ // If we find an IT instruction, we need to parse its condition
+ // code and mask operands so that we can apply them correctly
+ // to the subsequent instructions.
+ if (MI.getOpcode() == ARM::t2IT) {
+
+ // (3 - the number of trailing zeros) is the number of then / else.
+ unsigned firstcond = MI.getOperand(0).getImm();
+ unsigned Mask = MI.getOperand(1).getImm();
+ unsigned CondBit0 = Mask >> 4 & 1;
+ unsigned NumTZ = CountTrailingZeros_32(Mask);
+ assert(NumTZ <= 3 && "Invalid IT mask!");
+ for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
+ bool T = ((Mask >> Pos) & 1) == CondBit0;
+ if (T)
+ ITBlock.insert(ITBlock.begin(), firstcond);
+ else
+ ITBlock.insert(ITBlock.begin(), firstcond ^ 1);
}
+
+ ITBlock.push_back(firstcond);
+ }
+
+ return result;
+ }
+
+ // We want to read exactly 4 bytes of data.
+ if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+
+ uint32_t insn32 = (bytes[3] << 8) |
+ (bytes[2] << 0) |
+ (bytes[1] << 24) |
+ (bytes[0] << 16);
+ MI.clear();
+ result = decodeThumbInstruction32(MI, insn32, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ bool InITBlock = ITBlock.size();
+ Check(result, AddThumbPredicate(MI));
+ AddThumb1SBit(MI, InITBlock);
+ return result;
+ }
+
+ MI.clear();
+ result = decodeThumb2Instruction32(MI, insn32, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ Check(result, AddThumbPredicate(MI));
+ return result;
+ }
+
+ MI.clear();
+ result = decodeVFPInstruction32(MI, insn32, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ UpdateThumbVFPPredicate(MI);
+ return result;
+ }
+
+ MI.clear();
+ result = decodeNEONDupInstruction32(MI, insn32, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ Check(result, AddThumbPredicate(MI));
+ return result;
+ }
+
+ if (fieldFromInstruction32(insn32, 24, 8) == 0xF9) {
+ MI.clear();
+ uint32_t NEONLdStInsn = insn32;
+ NEONLdStInsn &= 0xF0FFFFFF;
+ NEONLdStInsn |= 0x04000000;
+ result = decodeNEONLoadStoreInstruction32(MI, NEONLdStInsn, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ Check(result, AddThumbPredicate(MI));
+ return result;
+ }
+ }
+
+ if (fieldFromInstruction32(insn32, 24, 4) == 0xF) {
+ MI.clear();
+ uint32_t NEONDataInsn = insn32;
+ NEONDataInsn &= 0xF0FFFFFF; // Clear bits 27-24
+ NEONDataInsn |= (NEONDataInsn & 0x10000000) >> 4; // Move bit 28 to bit 24
+ NEONDataInsn |= 0x12000000; // Set bits 28 and 25
+ result = decodeNEONDataInstruction32(MI, NEONDataInsn, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ Check(result, AddThumbPredicate(MI));
+ return result;
+ }
+ }
+
+ Size = 0;
+ return MCDisassembler::Fail;
+}
+
+
+extern "C" void LLVMInitializeARMDisassembler() {
+ TargetRegistry::RegisterMCDisassembler(TheARMTarget,
+ createARMDisassembler);
+ TargetRegistry::RegisterMCDisassembler(TheThumbTarget,
+ createThumbDisassembler);
+}
+
+static const unsigned GPRDecoderTable[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+ ARM::R8, ARM::R9, ARM::R10, ARM::R11,
+ ARM::R12, ARM::SP, ARM::LR, ARM::PC
+};
+
+static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 15)
+ return MCDisassembler::Fail;
+
+ unsigned Register = GPRDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeGPRnopcRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo == 15) return MCDisassembler::Fail;
+ return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static DecodeStatus DecodetGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 7)
+ return MCDisassembler::Fail;
+ return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ unsigned Register = 0;
+ switch (RegNo) {
+ case 0:
+ Register = ARM::R0;
+ break;
+ case 1:
+ Register = ARM::R1;
+ break;
+ case 2:
+ Register = ARM::R2;
break;
case 3:
- switch (slice(insn, 7, 4)) {
- case 11:
- return ARM::LDRHT;
- case 13:
- return ARM::LDRSBT;
- case 15:
- return ARM::LDRSHT;
- default:
- break; // fallthrough
- }
+ Register = ARM::R3;
+ break;
+ case 9:
+ Register = ARM::R9;
+ break;
+ case 12:
+ Register = ARM::R12;
break;
default:
- break; // fallthrough
+ return MCDisassembler::Fail;
}
+
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecoderGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo == 13 || RegNo == 15) return MCDisassembler::Fail;
+ return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static const unsigned SPRDecoderTable[] = {
+ ARM::S0, ARM::S1, ARM::S2, ARM::S3,
+ ARM::S4, ARM::S5, ARM::S6, ARM::S7,
+ ARM::S8, ARM::S9, ARM::S10, ARM::S11,
+ ARM::S12, ARM::S13, ARM::S14, ARM::S15,
+ ARM::S16, ARM::S17, ARM::S18, ARM::S19,
+ ARM::S20, ARM::S21, ARM::S22, ARM::S23,
+ ARM::S24, ARM::S25, ARM::S26, ARM::S27,
+ ARM::S28, ARM::S29, ARM::S30, ARM::S31
+};
+
+static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ unsigned Register = SPRDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static const unsigned DPRDecoderTable[] = {
+ ARM::D0, ARM::D1, ARM::D2, ARM::D3,
+ ARM::D4, ARM::D5, ARM::D6, ARM::D7,
+ ARM::D8, ARM::D9, ARM::D10, ARM::D11,
+ ARM::D12, ARM::D13, ARM::D14, ARM::D15,
+ ARM::D16, ARM::D17, ARM::D18, ARM::D19,
+ ARM::D20, ARM::D21, ARM::D22, ARM::D23,
+ ARM::D24, ARM::D25, ARM::D26, ARM::D27,
+ ARM::D28, ARM::D29, ARM::D30, ARM::D31
+};
+
+static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ unsigned Register = DPRDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 7)
+ return MCDisassembler::Fail;
+ return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static DecodeStatus
+DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 15)
+ return MCDisassembler::Fail;
+ return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static const unsigned QPRDecoderTable[] = {
+ ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3,
+ ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7,
+ ARM::Q8, ARM::Q9, ARM::Q10, ARM::Q11,
+ ARM::Q12, ARM::Q13, ARM::Q14, ARM::Q15
+};
+
+
+static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+ RegNo >>= 1;
+
+ unsigned Register = QPRDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (Val == 0xF) return MCDisassembler::Fail;
+ // AL predicate is not allowed on Thumb1 branches.
+ if (Inst.getOpcode() == ARM::tBcc && Val == 0xE)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ if (Val == ARMCC::AL) {
+ Inst.addOperand(MCOperand::CreateReg(0));
+ } else
+ Inst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (Val)
+ Inst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+ else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ uint32_t imm = Val & 0xFF;
+ uint32_t rot = (Val & 0xF00) >> 7;
+ uint32_t rot_imm = (imm >> rot) | (imm << ((32-rot) & 0x1F));
+ Inst.addOperand(MCOperand::CreateImm(rot_imm));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rm = fieldFromInstruction32(Val, 0, 4);
+ unsigned type = fieldFromInstruction32(Val, 5, 2);
+ unsigned imm = fieldFromInstruction32(Val, 7, 5);
+
+ // Register-immediate
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ ARM_AM::ShiftOpc Shift = ARM_AM::lsl;
+ switch (type) {
+ case 0:
+ Shift = ARM_AM::lsl;
+ break;
+ case 1:
+ Shift = ARM_AM::lsr;
+ break;
+ case 2:
+ Shift = ARM_AM::asr;
+ break;
+ case 3:
+ Shift = ARM_AM::ror;
+ break;
}
- // Ditto for SBCrs, which is a super-instruction for A8.6.152 & A8.6.153.
- // As a result, the decoder fails to decode STRH_Post/LDRD_POST/STRD_POST
- // properly.
- if (slice(insn, 27, 25) == 0 && slice(insn, 20, 20) == 0) {
- unsigned PW = slice(insn, 24, 24) << 1 | slice(insn, 21, 21);
- switch (slice(insn, 7, 4)) {
- case 11:
- switch (PW) {
- case 2: // Offset
- return ARM::STRH;
- case 3: // Pre-indexed
- return ARM::STRH_PRE;
- case 0: // Post-indexed
- return ARM::STRH_POST;
- default:
- break; // fallthrough
- }
+ if (Shift == ARM_AM::ror && imm == 0)
+ Shift = ARM_AM::rrx;
+
+ unsigned Op = Shift | (imm << 3);
+ Inst.addOperand(MCOperand::CreateImm(Op));
+
+ return S;
+}
+
+static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rm = fieldFromInstruction32(Val, 0, 4);
+ unsigned type = fieldFromInstruction32(Val, 5, 2);
+ unsigned Rs = fieldFromInstruction32(Val, 8, 4);
+
+ // Register-register
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rs, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ ARM_AM::ShiftOpc Shift = ARM_AM::lsl;
+ switch (type) {
+ case 0:
+ Shift = ARM_AM::lsl;
break;
- case 13:
- switch (PW) {
- case 2: // Offset
- return ARM::LDRD;
- case 3: // Pre-indexed
- return ARM::LDRD_PRE;
- case 0: // Post-indexed
- return ARM::LDRD_POST;
- default:
- break; // fallthrough
- }
+ case 1:
+ Shift = ARM_AM::lsr;
break;
- case 15:
- switch (PW) {
- case 2: // Offset
- return ARM::STRD;
- case 3: // Pre-indexed
- return ARM::STRD_PRE;
- case 0: // Post-indexed
- return ARM::STRD_POST;
- default:
- break; // fallthrough
- }
+ case 2:
+ Shift = ARM_AM::asr;
break;
+ case 3:
+ Shift = ARM_AM::ror;
+ break;
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(Shift));
+
+ return S;
+}
+
+static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ bool writebackLoad = false;
+ unsigned writebackReg = 0;
+ switch (Inst.getOpcode()) {
default:
- break; // fallthrough
+ break;
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::LDMDA_UPD:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ writebackLoad = true;
+ writebackReg = Inst.getOperand(0).getReg();
+ break;
+ }
+
+ // Empty register lists are not allowed.
+ if (CountPopulation_32(Val) == 0) return MCDisassembler::Fail;
+ for (unsigned i = 0; i < 16; ++i) {
+ if (Val & (1 << i)) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, i, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // Writeback not allowed if Rn is in the target list.
+ if (writebackLoad && writebackReg == Inst.end()[-1].getReg())
+ Check(S, MCDisassembler::SoftFail);
}
}
- // Ditto for SBCSSrs, which is a super-instruction for A8.6.152 & A8.6.153.
- // As a result, the decoder fails to decode LDRH_POST/LDRSB_POST/LDRSH_POST
- // properly.
- if (slice(insn, 27, 25) == 0 && slice(insn, 20, 20) == 1) {
- unsigned PW = slice(insn, 24, 24) << 1 | slice(insn, 21, 21);
- switch (slice(insn, 7, 4)) {
- case 11:
- switch (PW) {
- case 2: // Offset
- return ARM::LDRH;
- case 3: // Pre-indexed
- return ARM::LDRH_PRE;
- case 0: // Post-indexed
- return ARM::LDRH_POST;
- default:
- break; // fallthrough
- }
+ return S;
+}
+
+static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Vd = fieldFromInstruction32(Val, 8, 4);
+ unsigned regs = Val & 0xFF;
+
+ if (!Check(S, DecodeSPRRegisterClass(Inst, Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ for (unsigned i = 0; i < (regs - 1); ++i) {
+ if (!Check(S, DecodeSPRRegisterClass(Inst, ++Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Vd = fieldFromInstruction32(Val, 8, 4);
+ unsigned regs = (Val & 0xFF) / 2;
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ for (unsigned i = 0; i < (regs - 1); ++i) {
+ if (!Check(S, DecodeDPRRegisterClass(Inst, ++Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ // This operand encodes a mask of contiguous zeros between a specified MSB
+ // and LSB. To decode it, we create the mask of all bits MSB-and-lower,
+ // the mask of all bits LSB-and-lower, and then xor them to create
+ // the mask of that's all ones on [msb, lsb]. Finally we not it to
+ // create the final mask.
+ unsigned msb = fieldFromInstruction32(Val, 5, 5);
+ unsigned lsb = fieldFromInstruction32(Val, 0, 5);
+
+ DecodeStatus S = MCDisassembler::Success;
+ if (lsb > msb) Check(S, MCDisassembler::SoftFail);
+
+ uint32_t msb_mask = 0xFFFFFFFF;
+ if (msb != 31) msb_mask = (1U << (msb+1)) - 1;
+ uint32_t lsb_mask = (1U << lsb) - 1;
+
+ Inst.addOperand(MCOperand::CreateImm(~(msb_mask ^ lsb_mask)));
+ return S;
+}
+
+static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned CRd = fieldFromInstruction32(Insn, 12, 4);
+ unsigned coproc = fieldFromInstruction32(Insn, 8, 4);
+ unsigned imm = fieldFromInstruction32(Insn, 0, 8);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned U = fieldFromInstruction32(Insn, 23, 1);
+
+ switch (Inst.getOpcode()) {
+ case ARM::LDC_OFFSET:
+ case ARM::LDC_PRE:
+ case ARM::LDC_POST:
+ case ARM::LDC_OPTION:
+ case ARM::LDCL_OFFSET:
+ case ARM::LDCL_PRE:
+ case ARM::LDCL_POST:
+ case ARM::LDCL_OPTION:
+ case ARM::STC_OFFSET:
+ case ARM::STC_PRE:
+ case ARM::STC_POST:
+ case ARM::STC_OPTION:
+ case ARM::STCL_OFFSET:
+ case ARM::STCL_PRE:
+ case ARM::STCL_POST:
+ case ARM::STCL_OPTION:
+ case ARM::t2LDC_OFFSET:
+ case ARM::t2LDC_PRE:
+ case ARM::t2LDC_POST:
+ case ARM::t2LDC_OPTION:
+ case ARM::t2LDCL_OFFSET:
+ case ARM::t2LDCL_PRE:
+ case ARM::t2LDCL_POST:
+ case ARM::t2LDCL_OPTION:
+ case ARM::t2STC_OFFSET:
+ case ARM::t2STC_PRE:
+ case ARM::t2STC_POST:
+ case ARM::t2STC_OPTION:
+ case ARM::t2STCL_OFFSET:
+ case ARM::t2STCL_PRE:
+ case ARM::t2STCL_POST:
+ case ARM::t2STCL_OPTION:
+ if (coproc == 0xA || coproc == 0xB)
+ return MCDisassembler::Fail;
+ break;
+ default:
break;
- case 13:
- switch (PW) {
- case 2: // Offset
- return ARM::LDRSB;
- case 3: // Pre-indexed
- return ARM::LDRSB_PRE;
- case 0: // Post-indexed
- return ARM::LDRSB_POST;
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(coproc));
+ Inst.addOperand(MCOperand::CreateImm(CRd));
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ unsigned P = fieldFromInstruction32(Insn, 24, 1);
+ unsigned W = fieldFromInstruction32(Insn, 21, 1);
+
+ bool writeback = (P == 0) || (W == 1);
+ unsigned idx_mode = 0;
+ if (P && writeback)
+ idx_mode = ARMII::IndexModePre;
+ else if (!P && writeback)
+ idx_mode = ARMII::IndexModePost;
+
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDC2_OFFSET:
+ case ARM::t2LDC2L_OFFSET:
+ case ARM::t2LDC2_PRE:
+ case ARM::t2LDC2L_PRE:
+ case ARM::t2STC2_OFFSET:
+ case ARM::t2STC2L_OFFSET:
+ case ARM::t2STC2_PRE:
+ case ARM::t2STC2L_PRE:
+ case ARM::LDC2_OFFSET:
+ case ARM::LDC2L_OFFSET:
+ case ARM::LDC2_PRE:
+ case ARM::LDC2L_PRE:
+ case ARM::STC2_OFFSET:
+ case ARM::STC2L_OFFSET:
+ case ARM::STC2_PRE:
+ case ARM::STC2L_PRE:
+ case ARM::t2LDC_OFFSET:
+ case ARM::t2LDCL_OFFSET:
+ case ARM::t2LDC_PRE:
+ case ARM::t2LDCL_PRE:
+ case ARM::t2STC_OFFSET:
+ case ARM::t2STCL_OFFSET:
+ case ARM::t2STC_PRE:
+ case ARM::t2STCL_PRE:
+ case ARM::LDC_OFFSET:
+ case ARM::LDCL_OFFSET:
+ case ARM::LDC_PRE:
+ case ARM::LDCL_PRE:
+ case ARM::STC_OFFSET:
+ case ARM::STCL_OFFSET:
+ case ARM::STC_PRE:
+ case ARM::STCL_PRE:
+ imm = ARM_AM::getAM5Opc(U ? ARM_AM::add : ARM_AM::sub, imm);
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ break;
+ case ARM::t2LDC2_POST:
+ case ARM::t2LDC2L_POST:
+ case ARM::t2STC2_POST:
+ case ARM::t2STC2L_POST:
+ case ARM::LDC2_POST:
+ case ARM::LDC2L_POST:
+ case ARM::STC2_POST:
+ case ARM::STC2L_POST:
+ case ARM::t2LDC_POST:
+ case ARM::t2LDCL_POST:
+ case ARM::t2STC_POST:
+ case ARM::t2STCL_POST:
+ case ARM::LDC_POST:
+ case ARM::LDCL_POST:
+ case ARM::STC_POST:
+ case ARM::STCL_POST:
+ imm |= U << 8;
+ // fall through.
+ default:
+ // The 'option' variant doesn't encode 'U' in the immediate since
+ // the immediate is unsigned [0,255].
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ break;
+ }
+
+ switch (Inst.getOpcode()) {
+ case ARM::LDC_OFFSET:
+ case ARM::LDC_PRE:
+ case ARM::LDC_POST:
+ case ARM::LDC_OPTION:
+ case ARM::LDCL_OFFSET:
+ case ARM::LDCL_PRE:
+ case ARM::LDCL_POST:
+ case ARM::LDCL_OPTION:
+ case ARM::STC_OFFSET:
+ case ARM::STC_PRE:
+ case ARM::STC_POST:
+ case ARM::STC_OPTION:
+ case ARM::STCL_OFFSET:
+ case ARM::STCL_PRE:
+ case ARM::STCL_POST:
+ case ARM::STCL_OPTION:
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ return S;
+}
+
+static DecodeStatus
+DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned imm = fieldFromInstruction32(Insn, 0, 12);
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned reg = fieldFromInstruction32(Insn, 25, 1);
+ unsigned P = fieldFromInstruction32(Insn, 24, 1);
+ unsigned W = fieldFromInstruction32(Insn, 21, 1);
+
+ // On stores, the writeback operand precedes Rt.
+ switch (Inst.getOpcode()) {
+ case ARM::STR_POST_IMM:
+ case ARM::STR_POST_REG:
+ case ARM::STRB_POST_IMM:
+ case ARM::STRB_POST_REG:
+ case ARM::STRT_POST_REG:
+ case ARM::STRT_POST_IMM:
+ case ARM::STRBT_POST_REG:
+ case ARM::STRBT_POST_IMM:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ // On loads, the writeback operand comes after Rt.
+ switch (Inst.getOpcode()) {
+ case ARM::LDR_POST_IMM:
+ case ARM::LDR_POST_REG:
+ case ARM::LDRB_POST_IMM:
+ case ARM::LDRB_POST_REG:
+ case ARM::LDRBT_POST_REG:
+ case ARM::LDRBT_POST_IMM:
+ case ARM::LDRT_POST_REG:
+ case ARM::LDRT_POST_IMM:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ ARM_AM::AddrOpc Op = ARM_AM::add;
+ if (!fieldFromInstruction32(Insn, 23, 1))
+ Op = ARM_AM::sub;
+
+ bool writeback = (P == 0) || (W == 1);
+ unsigned idx_mode = 0;
+ if (P && writeback)
+ idx_mode = ARMII::IndexModePre;
+ else if (!P && writeback)
+ idx_mode = ARMII::IndexModePost;
+
+ if (writeback && (Rn == 15 || Rn == Rt))
+ S = MCDisassembler::SoftFail; // UNPREDICTABLE
+
+ if (reg) {
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ ARM_AM::ShiftOpc Opc = ARM_AM::lsl;
+ switch( fieldFromInstruction32(Insn, 5, 2)) {
+ case 0:
+ Opc = ARM_AM::lsl;
+ break;
+ case 1:
+ Opc = ARM_AM::lsr;
+ break;
+ case 2:
+ Opc = ARM_AM::asr;
+ break;
+ case 3:
+ Opc = ARM_AM::ror;
+ break;
default:
- break; // fallthrough
- }
+ return MCDisassembler::Fail;
+ }
+ unsigned amt = fieldFromInstruction32(Insn, 7, 5);
+ unsigned imm = ARM_AM::getAM2Opc(Op, amt, Opc, idx_mode);
+
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ } else {
+ Inst.addOperand(MCOperand::CreateReg(0));
+ unsigned tmp = ARM_AM::getAM2Opc(Op, imm, ARM_AM::lsl, idx_mode);
+ Inst.addOperand(MCOperand::CreateImm(tmp));
+ }
+
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Val, 13, 4);
+ unsigned Rm = fieldFromInstruction32(Val, 0, 4);
+ unsigned type = fieldFromInstruction32(Val, 5, 2);
+ unsigned imm = fieldFromInstruction32(Val, 7, 5);
+ unsigned U = fieldFromInstruction32(Val, 12, 1);
+
+ ARM_AM::ShiftOpc ShOp = ARM_AM::lsl;
+ switch (type) {
+ case 0:
+ ShOp = ARM_AM::lsl;
+ break;
+ case 1:
+ ShOp = ARM_AM::lsr;
+ break;
+ case 2:
+ ShOp = ARM_AM::asr;
+ break;
+ case 3:
+ ShOp = ARM_AM::ror;
+ break;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ unsigned shift;
+ if (U)
+ shift = ARM_AM::getAM2Opc(ARM_AM::add, imm, ShOp);
+ else
+ shift = ARM_AM::getAM2Opc(ARM_AM::sub, imm, ShOp);
+ Inst.addOperand(MCOperand::CreateImm(shift));
+
+ return S;
+}
+
+static DecodeStatus
+DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned type = fieldFromInstruction32(Insn, 22, 1);
+ unsigned imm = fieldFromInstruction32(Insn, 8, 4);
+ unsigned U = ((~fieldFromInstruction32(Insn, 23, 1)) & 1) << 8;
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned W = fieldFromInstruction32(Insn, 21, 1);
+ unsigned P = fieldFromInstruction32(Insn, 24, 1);
+
+ bool writeback = (W == 1) | (P == 0);
+
+ // For {LD,ST}RD, Rt must be even, else undefined.
+ switch (Inst.getOpcode()) {
+ case ARM::STRD:
+ case ARM::STRD_PRE:
+ case ARM::STRD_POST:
+ case ARM::LDRD:
+ case ARM::LDRD_PRE:
+ case ARM::LDRD_POST:
+ if (Rt & 0x1) return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ if (writeback) { // Writeback
+ if (P)
+ U |= ARMII::IndexModePre << 9;
+ else
+ U |= ARMII::IndexModePost << 9;
+
+ // On stores, the writeback operand precedes Rt.
+ switch (Inst.getOpcode()) {
+ case ARM::STRD:
+ case ARM::STRD_PRE:
+ case ARM::STRD_POST:
+ case ARM::STRH:
+ case ARM::STRH_PRE:
+ case ARM::STRH_POST:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ switch (Inst.getOpcode()) {
+ case ARM::STRD:
+ case ARM::STRD_PRE:
+ case ARM::STRD_POST:
+ case ARM::LDRD:
+ case ARM::LDRD_PRE:
+ case ARM::LDRD_POST:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt+1, Address, Decoder)))
+ return MCDisassembler::Fail;
break;
- case 15:
- switch (PW) {
- case 2: // Offset
- return ARM::LDRSH;
- case 3: // Pre-indexed
- return ARM::LDRSH_PRE;
- case 0: // Post-indexed
- return ARM::LDRSH_POST;
+ default:
+ break;
+ }
+
+ if (writeback) {
+ // On loads, the writeback operand comes after Rt.
+ switch (Inst.getOpcode()) {
+ case ARM::LDRD:
+ case ARM::LDRD_PRE:
+ case ARM::LDRD_POST:
+ case ARM::LDRH:
+ case ARM::LDRH_PRE:
+ case ARM::LDRH_POST:
+ case ARM::LDRSH:
+ case ARM::LDRSH_PRE:
+ case ARM::LDRSH_POST:
+ case ARM::LDRSB:
+ case ARM::LDRSB_PRE:
+ case ARM::LDRSB_POST:
+ case ARM::LDRHTr:
+ case ARM::LDRSBTr:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (type) {
+ Inst.addOperand(MCOperand::CreateReg(0));
+ Inst.addOperand(MCOperand::CreateImm(U | (imm << 4) | Rm));
+ } else {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(U));
+ }
+
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeRFEInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned mode = fieldFromInstruction32(Insn, 23, 2);
+
+ switch (mode) {
+ case 0:
+ mode = ARM_AM::da;
+ break;
+ case 1:
+ mode = ARM_AM::ia;
+ break;
+ case 2:
+ mode = ARM_AM::db;
+ break;
+ case 3:
+ mode = ARM_AM::ib;
+ break;
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(mode));
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned reglist = fieldFromInstruction32(Insn, 0, 16);
+
+ if (pred == 0xF) {
+ switch (Inst.getOpcode()) {
+ case ARM::LDMDA:
+ Inst.setOpcode(ARM::RFEDA);
+ break;
+ case ARM::LDMDA_UPD:
+ Inst.setOpcode(ARM::RFEDA_UPD);
+ break;
+ case ARM::LDMDB:
+ Inst.setOpcode(ARM::RFEDB);
+ break;
+ case ARM::LDMDB_UPD:
+ Inst.setOpcode(ARM::RFEDB_UPD);
+ break;
+ case ARM::LDMIA:
+ Inst.setOpcode(ARM::RFEIA);
+ break;
+ case ARM::LDMIA_UPD:
+ Inst.setOpcode(ARM::RFEIA_UPD);
+ break;
+ case ARM::LDMIB:
+ Inst.setOpcode(ARM::RFEIB);
+ break;
+ case ARM::LDMIB_UPD:
+ Inst.setOpcode(ARM::RFEIB_UPD);
+ break;
+ case ARM::STMDA:
+ Inst.setOpcode(ARM::SRSDA);
+ break;
+ case ARM::STMDA_UPD:
+ Inst.setOpcode(ARM::SRSDA_UPD);
+ break;
+ case ARM::STMDB:
+ Inst.setOpcode(ARM::SRSDB);
+ break;
+ case ARM::STMDB_UPD:
+ Inst.setOpcode(ARM::SRSDB_UPD);
+ break;
+ case ARM::STMIA:
+ Inst.setOpcode(ARM::SRSIA);
+ break;
+ case ARM::STMIA_UPD:
+ Inst.setOpcode(ARM::SRSIA_UPD);
+ break;
+ case ARM::STMIB:
+ Inst.setOpcode(ARM::SRSIB);
+ break;
+ case ARM::STMIB_UPD:
+ Inst.setOpcode(ARM::SRSIB_UPD);
+ break;
default:
- break; // fallthrough
- }
+ if (!Check(S, MCDisassembler::Fail)) return MCDisassembler::Fail;
+ }
+
+ // For stores (which become SRS's, the only operand is the mode.
+ if (fieldFromInstruction32(Insn, 20, 1) == 0) {
+ Inst.addOperand(
+ MCOperand::CreateImm(fieldFromInstruction32(Insn, 0, 4)));
+ return S;
+ }
+
+ return DecodeRFEInstruction(Inst, Insn, Address, Decoder);
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail; // Tied
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeRegListOperand(Inst, reglist, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned imod = fieldFromInstruction32(Insn, 18, 2);
+ unsigned M = fieldFromInstruction32(Insn, 17, 1);
+ unsigned iflags = fieldFromInstruction32(Insn, 6, 3);
+ unsigned mode = fieldFromInstruction32(Insn, 0, 5);
+
+ DecodeStatus S = MCDisassembler::Success;
+
+ // imod == '01' --> UNPREDICTABLE
+ // NOTE: Even though this is technically UNPREDICTABLE, we choose to
+ // return failure here. The '01' imod value is unprintable, so there's
+ // nothing useful we could do even if we returned UNPREDICTABLE.
+
+ if (imod == 1) return MCDisassembler::Fail;
+
+ if (imod && M) {
+ Inst.setOpcode(ARM::CPS3p);
+ Inst.addOperand(MCOperand::CreateImm(imod));
+ Inst.addOperand(MCOperand::CreateImm(iflags));
+ Inst.addOperand(MCOperand::CreateImm(mode));
+ } else if (imod && !M) {
+ Inst.setOpcode(ARM::CPS2p);
+ Inst.addOperand(MCOperand::CreateImm(imod));
+ Inst.addOperand(MCOperand::CreateImm(iflags));
+ if (mode) S = MCDisassembler::SoftFail;
+ } else if (!imod && M) {
+ Inst.setOpcode(ARM::CPS1p);
+ Inst.addOperand(MCOperand::CreateImm(mode));
+ if (iflags) S = MCDisassembler::SoftFail;
+ } else {
+ // imod == '00' && M == '0' --> UNPREDICTABLE
+ Inst.setOpcode(ARM::CPS1p);
+ Inst.addOperand(MCOperand::CreateImm(mode));
+ S = MCDisassembler::SoftFail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned imod = fieldFromInstruction32(Insn, 9, 2);
+ unsigned M = fieldFromInstruction32(Insn, 8, 1);
+ unsigned iflags = fieldFromInstruction32(Insn, 5, 3);
+ unsigned mode = fieldFromInstruction32(Insn, 0, 5);
+
+ DecodeStatus S = MCDisassembler::Success;
+
+ // imod == '01' --> UNPREDICTABLE
+ // NOTE: Even though this is technically UNPREDICTABLE, we choose to
+ // return failure here. The '01' imod value is unprintable, so there's
+ // nothing useful we could do even if we returned UNPREDICTABLE.
+
+ if (imod == 1) return MCDisassembler::Fail;
+
+ if (imod && M) {
+ Inst.setOpcode(ARM::t2CPS3p);
+ Inst.addOperand(MCOperand::CreateImm(imod));
+ Inst.addOperand(MCOperand::CreateImm(iflags));
+ Inst.addOperand(MCOperand::CreateImm(mode));
+ } else if (imod && !M) {
+ Inst.setOpcode(ARM::t2CPS2p);
+ Inst.addOperand(MCOperand::CreateImm(imod));
+ Inst.addOperand(MCOperand::CreateImm(iflags));
+ if (mode) S = MCDisassembler::SoftFail;
+ } else if (!imod && M) {
+ Inst.setOpcode(ARM::t2CPS1p);
+ Inst.addOperand(MCOperand::CreateImm(mode));
+ if (iflags) S = MCDisassembler::SoftFail;
+ } else {
+ // imod == '00' && M == '0' --> UNPREDICTABLE
+ Inst.setOpcode(ARM::t2CPS1p);
+ Inst.addOperand(MCOperand::CreateImm(mode));
+ S = MCDisassembler::SoftFail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 8, 4);
+ unsigned imm = 0;
+
+ imm |= (fieldFromInstruction32(Insn, 0, 8) << 0);
+ imm |= (fieldFromInstruction32(Insn, 12, 3) << 8);
+ imm |= (fieldFromInstruction32(Insn, 16, 4) << 12);
+ imm |= (fieldFromInstruction32(Insn, 26, 1) << 11);
+
+ if (Inst.getOpcode() == ARM::t2MOVTi16)
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (!tryAddingSymbolicOperand(Address, imm, false, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned imm = 0;
+
+ imm |= (fieldFromInstruction32(Insn, 0, 12) << 0);
+ imm |= (fieldFromInstruction32(Insn, 16, 4) << 12);
+
+ if (Inst.getOpcode() == ARM::MOVTi16)
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (!tryAddingSymbolicOperand(Address, imm, false, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rn = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 8, 4);
+ unsigned Ra = fieldFromInstruction32(Insn, 12, 4);
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+
+ if (pred == 0xF)
+ return DecodeCPSInstruction(Inst, Insn, Address, Decoder);
+
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Ra, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned add = fieldFromInstruction32(Val, 12, 1);
+ unsigned imm = fieldFromInstruction32(Val, 0, 12);
+ unsigned Rn = fieldFromInstruction32(Val, 13, 4);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (!add) imm *= -1;
+ if (imm == 0 && !add) imm = INT32_MIN;
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ if (Rn == 15)
+ tryAddingPcLoadReferenceComment(Address, Address + imm + 8, Decoder);
+
+ return S;
+}
+
+static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Val, 9, 4);
+ unsigned U = fieldFromInstruction32(Val, 8, 1);
+ unsigned imm = fieldFromInstruction32(Val, 0, 8);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (U)
+ Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::add, imm)));
+ else
+ Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::sub, imm)));
+
+ return S;
+}
+
+static DecodeStatus DecodeAddrMode7Operand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ return DecodeGPRRegisterClass(Inst, Val, Address, Decoder);
+}
+
+static DecodeStatus
+DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned imm = fieldFromInstruction32(Insn, 0, 24) << 2;
+
+ if (pred == 0xF) {
+ Inst.setOpcode(ARM::BLXi);
+ imm |= fieldFromInstruction32(Insn, 24, 1) << 1;
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm)));
+ return S;
+ }
+
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8, true,
+ 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm)));
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+
+static DecodeStatus DecodeVCVTImmOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(64 - Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rm = fieldFromInstruction32(Val, 0, 4);
+ unsigned align = fieldFromInstruction32(Val, 4, 2);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!align)
+ Inst.addOperand(MCOperand::CreateImm(0));
+ else
+ Inst.addOperand(MCOperand::CreateImm(4 << align));
+
+ return S;
+}
+
+static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned wb = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ Rn |= fieldFromInstruction32(Insn, 4, 2) << 4;
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+
+ // First output register
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ // Second output register
+ switch (Inst.getOpcode()) {
+ case ARM::VLD1q8:
+ case ARM::VLD1q16:
+ case ARM::VLD1q32:
+ case ARM::VLD1q64:
+ case ARM::VLD1q8_UPD:
+ case ARM::VLD1q16_UPD:
+ case ARM::VLD1q32_UPD:
+ case ARM::VLD1q64_UPD:
+ case ARM::VLD1d8T:
+ case ARM::VLD1d16T:
+ case ARM::VLD1d32T:
+ case ARM::VLD1d64T:
+ case ARM::VLD1d8T_UPD:
+ case ARM::VLD1d16T_UPD:
+ case ARM::VLD1d32T_UPD:
+ case ARM::VLD1d64T_UPD:
+ case ARM::VLD1d8Q:
+ case ARM::VLD1d16Q:
+ case ARM::VLD1d32Q:
+ case ARM::VLD1d64Q:
+ case ARM::VLD1d8Q_UPD:
+ case ARM::VLD1d16Q_UPD:
+ case ARM::VLD1d32Q_UPD:
+ case ARM::VLD1d64Q_UPD:
+ case ARM::VLD2d8:
+ case ARM::VLD2d16:
+ case ARM::VLD2d32:
+ case ARM::VLD2d8_UPD:
+ case ARM::VLD2d16_UPD:
+ case ARM::VLD2d32_UPD:
+ case ARM::VLD2q8:
+ case ARM::VLD2q16:
+ case ARM::VLD2q32:
+ case ARM::VLD2q8_UPD:
+ case ARM::VLD2q16_UPD:
+ case ARM::VLD2q32_UPD:
+ case ARM::VLD3d8:
+ case ARM::VLD3d16:
+ case ARM::VLD3d32:
+ case ARM::VLD3d8_UPD:
+ case ARM::VLD3d16_UPD:
+ case ARM::VLD3d32_UPD:
+ case ARM::VLD4d8:
+ case ARM::VLD4d16:
+ case ARM::VLD4d32:
+ case ARM::VLD4d8_UPD:
+ case ARM::VLD4d16_UPD:
+ case ARM::VLD4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
break;
+ case ARM::VLD2b8:
+ case ARM::VLD2b16:
+ case ARM::VLD2b32:
+ case ARM::VLD2b8_UPD:
+ case ARM::VLD2b16_UPD:
+ case ARM::VLD2b32_UPD:
+ case ARM::VLD3q8:
+ case ARM::VLD3q16:
+ case ARM::VLD3q32:
+ case ARM::VLD3q8_UPD:
+ case ARM::VLD3q16_UPD:
+ case ARM::VLD3q32_UPD:
+ case ARM::VLD4q8:
+ case ARM::VLD4q16:
+ case ARM::VLD4q32:
+ case ARM::VLD4q8_UPD:
+ case ARM::VLD4q16_UPD:
+ case ARM::VLD4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
default:
- break; // fallthrough
+ break;
+ }
+
+ // Third output register
+ switch(Inst.getOpcode()) {
+ case ARM::VLD1d8T:
+ case ARM::VLD1d16T:
+ case ARM::VLD1d32T:
+ case ARM::VLD1d64T:
+ case ARM::VLD1d8T_UPD:
+ case ARM::VLD1d16T_UPD:
+ case ARM::VLD1d32T_UPD:
+ case ARM::VLD1d64T_UPD:
+ case ARM::VLD1d8Q:
+ case ARM::VLD1d16Q:
+ case ARM::VLD1d32Q:
+ case ARM::VLD1d64Q:
+ case ARM::VLD1d8Q_UPD:
+ case ARM::VLD1d16Q_UPD:
+ case ARM::VLD1d32Q_UPD:
+ case ARM::VLD1d64Q_UPD:
+ case ARM::VLD2q8:
+ case ARM::VLD2q16:
+ case ARM::VLD2q32:
+ case ARM::VLD2q8_UPD:
+ case ARM::VLD2q16_UPD:
+ case ARM::VLD2q32_UPD:
+ case ARM::VLD3d8:
+ case ARM::VLD3d16:
+ case ARM::VLD3d32:
+ case ARM::VLD3d8_UPD:
+ case ARM::VLD3d16_UPD:
+ case ARM::VLD3d32_UPD:
+ case ARM::VLD4d8:
+ case ARM::VLD4d16:
+ case ARM::VLD4d32:
+ case ARM::VLD4d8_UPD:
+ case ARM::VLD4d16_UPD:
+ case ARM::VLD4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VLD3q8:
+ case ARM::VLD3q16:
+ case ARM::VLD3q32:
+ case ARM::VLD3q8_UPD:
+ case ARM::VLD3q16_UPD:
+ case ARM::VLD3q32_UPD:
+ case ARM::VLD4q8:
+ case ARM::VLD4q16:
+ case ARM::VLD4q32:
+ case ARM::VLD4q8_UPD:
+ case ARM::VLD4q16_UPD:
+ case ARM::VLD4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+4)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // Fourth output register
+ switch (Inst.getOpcode()) {
+ case ARM::VLD1d8Q:
+ case ARM::VLD1d16Q:
+ case ARM::VLD1d32Q:
+ case ARM::VLD1d64Q:
+ case ARM::VLD1d8Q_UPD:
+ case ARM::VLD1d16Q_UPD:
+ case ARM::VLD1d32Q_UPD:
+ case ARM::VLD1d64Q_UPD:
+ case ARM::VLD2q8:
+ case ARM::VLD2q16:
+ case ARM::VLD2q32:
+ case ARM::VLD2q8_UPD:
+ case ARM::VLD2q16_UPD:
+ case ARM::VLD2q32_UPD:
+ case ARM::VLD4d8:
+ case ARM::VLD4d16:
+ case ARM::VLD4d32:
+ case ARM::VLD4d8_UPD:
+ case ARM::VLD4d16_UPD:
+ case ARM::VLD4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+3)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VLD4q8:
+ case ARM::VLD4q16:
+ case ARM::VLD4q32:
+ case ARM::VLD4q8_UPD:
+ case ARM::VLD4q16_UPD:
+ case ARM::VLD4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+6)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // Writeback operand
+ switch (Inst.getOpcode()) {
+ case ARM::VLD1d8_UPD:
+ case ARM::VLD1d16_UPD:
+ case ARM::VLD1d32_UPD:
+ case ARM::VLD1d64_UPD:
+ case ARM::VLD1q8_UPD:
+ case ARM::VLD1q16_UPD:
+ case ARM::VLD1q32_UPD:
+ case ARM::VLD1q64_UPD:
+ case ARM::VLD1d8T_UPD:
+ case ARM::VLD1d16T_UPD:
+ case ARM::VLD1d32T_UPD:
+ case ARM::VLD1d64T_UPD:
+ case ARM::VLD1d8Q_UPD:
+ case ARM::VLD1d16Q_UPD:
+ case ARM::VLD1d32Q_UPD:
+ case ARM::VLD1d64Q_UPD:
+ case ARM::VLD2d8_UPD:
+ case ARM::VLD2d16_UPD:
+ case ARM::VLD2d32_UPD:
+ case ARM::VLD2q8_UPD:
+ case ARM::VLD2q16_UPD:
+ case ARM::VLD2q32_UPD:
+ case ARM::VLD2b8_UPD:
+ case ARM::VLD2b16_UPD:
+ case ARM::VLD2b32_UPD:
+ case ARM::VLD3d8_UPD:
+ case ARM::VLD3d16_UPD:
+ case ARM::VLD3d32_UPD:
+ case ARM::VLD3q8_UPD:
+ case ARM::VLD3q16_UPD:
+ case ARM::VLD3q32_UPD:
+ case ARM::VLD4d8_UPD:
+ case ARM::VLD4d16_UPD:
+ case ARM::VLD4d32_UPD:
+ case ARM::VLD4q8_UPD:
+ case ARM::VLD4q16_UPD:
+ case ARM::VLD4q32_UPD:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, wb, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // AddrMode6 Base (register+alignment)
+ if (!Check(S, DecodeAddrMode6Operand(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ // AddrMode6 Offset (register)
+ if (Rm == 0xD)
+ Inst.addOperand(MCOperand::CreateReg(0));
+ else if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned wb = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ Rn |= fieldFromInstruction32(Insn, 4, 2) << 4;
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+
+ // Writeback Operand
+ switch (Inst.getOpcode()) {
+ case ARM::VST1d8_UPD:
+ case ARM::VST1d16_UPD:
+ case ARM::VST1d32_UPD:
+ case ARM::VST1d64_UPD:
+ case ARM::VST1q8_UPD:
+ case ARM::VST1q16_UPD:
+ case ARM::VST1q32_UPD:
+ case ARM::VST1q64_UPD:
+ case ARM::VST1d8T_UPD:
+ case ARM::VST1d16T_UPD:
+ case ARM::VST1d32T_UPD:
+ case ARM::VST1d64T_UPD:
+ case ARM::VST1d8Q_UPD:
+ case ARM::VST1d16Q_UPD:
+ case ARM::VST1d32Q_UPD:
+ case ARM::VST1d64Q_UPD:
+ case ARM::VST2d8_UPD:
+ case ARM::VST2d16_UPD:
+ case ARM::VST2d32_UPD:
+ case ARM::VST2q8_UPD:
+ case ARM::VST2q16_UPD:
+ case ARM::VST2q32_UPD:
+ case ARM::VST2b8_UPD:
+ case ARM::VST2b16_UPD:
+ case ARM::VST2b32_UPD:
+ case ARM::VST3d8_UPD:
+ case ARM::VST3d16_UPD:
+ case ARM::VST3d32_UPD:
+ case ARM::VST3q8_UPD:
+ case ARM::VST3q16_UPD:
+ case ARM::VST3q32_UPD:
+ case ARM::VST4d8_UPD:
+ case ARM::VST4d16_UPD:
+ case ARM::VST4d32_UPD:
+ case ARM::VST4q8_UPD:
+ case ARM::VST4q16_UPD:
+ case ARM::VST4q32_UPD:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, wb, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // AddrMode6 Base (register+alignment)
+ if (!Check(S, DecodeAddrMode6Operand(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ // AddrMode6 Offset (register)
+ if (Rm == 0xD)
+ Inst.addOperand(MCOperand::CreateReg(0));
+ else if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ // First input register
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ // Second input register
+ switch (Inst.getOpcode()) {
+ case ARM::VST1q8:
+ case ARM::VST1q16:
+ case ARM::VST1q32:
+ case ARM::VST1q64:
+ case ARM::VST1q8_UPD:
+ case ARM::VST1q16_UPD:
+ case ARM::VST1q32_UPD:
+ case ARM::VST1q64_UPD:
+ case ARM::VST1d8T:
+ case ARM::VST1d16T:
+ case ARM::VST1d32T:
+ case ARM::VST1d64T:
+ case ARM::VST1d8T_UPD:
+ case ARM::VST1d16T_UPD:
+ case ARM::VST1d32T_UPD:
+ case ARM::VST1d64T_UPD:
+ case ARM::VST1d8Q:
+ case ARM::VST1d16Q:
+ case ARM::VST1d32Q:
+ case ARM::VST1d64Q:
+ case ARM::VST1d8Q_UPD:
+ case ARM::VST1d16Q_UPD:
+ case ARM::VST1d32Q_UPD:
+ case ARM::VST1d64Q_UPD:
+ case ARM::VST2d8:
+ case ARM::VST2d16:
+ case ARM::VST2d32:
+ case ARM::VST2d8_UPD:
+ case ARM::VST2d16_UPD:
+ case ARM::VST2d32_UPD:
+ case ARM::VST2q8:
+ case ARM::VST2q16:
+ case ARM::VST2q32:
+ case ARM::VST2q8_UPD:
+ case ARM::VST2q16_UPD:
+ case ARM::VST2q32_UPD:
+ case ARM::VST3d8:
+ case ARM::VST3d16:
+ case ARM::VST3d32:
+ case ARM::VST3d8_UPD:
+ case ARM::VST3d16_UPD:
+ case ARM::VST3d32_UPD:
+ case ARM::VST4d8:
+ case ARM::VST4d16:
+ case ARM::VST4d32:
+ case ARM::VST4d8_UPD:
+ case ARM::VST4d16_UPD:
+ case ARM::VST4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VST2b8:
+ case ARM::VST2b16:
+ case ARM::VST2b32:
+ case ARM::VST2b8_UPD:
+ case ARM::VST2b16_UPD:
+ case ARM::VST2b32_UPD:
+ case ARM::VST3q8:
+ case ARM::VST3q16:
+ case ARM::VST3q32:
+ case ARM::VST3q8_UPD:
+ case ARM::VST3q16_UPD:
+ case ARM::VST3q32_UPD:
+ case ARM::VST4q8:
+ case ARM::VST4q16:
+ case ARM::VST4q32:
+ case ARM::VST4q8_UPD:
+ case ARM::VST4q16_UPD:
+ case ARM::VST4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // Third input register
+ switch (Inst.getOpcode()) {
+ case ARM::VST1d8T:
+ case ARM::VST1d16T:
+ case ARM::VST1d32T:
+ case ARM::VST1d64T:
+ case ARM::VST1d8T_UPD:
+ case ARM::VST1d16T_UPD:
+ case ARM::VST1d32T_UPD:
+ case ARM::VST1d64T_UPD:
+ case ARM::VST1d8Q:
+ case ARM::VST1d16Q:
+ case ARM::VST1d32Q:
+ case ARM::VST1d64Q:
+ case ARM::VST1d8Q_UPD:
+ case ARM::VST1d16Q_UPD:
+ case ARM::VST1d32Q_UPD:
+ case ARM::VST1d64Q_UPD:
+ case ARM::VST2q8:
+ case ARM::VST2q16:
+ case ARM::VST2q32:
+ case ARM::VST2q8_UPD:
+ case ARM::VST2q16_UPD:
+ case ARM::VST2q32_UPD:
+ case ARM::VST3d8:
+ case ARM::VST3d16:
+ case ARM::VST3d32:
+ case ARM::VST3d8_UPD:
+ case ARM::VST3d16_UPD:
+ case ARM::VST3d32_UPD:
+ case ARM::VST4d8:
+ case ARM::VST4d16:
+ case ARM::VST4d32:
+ case ARM::VST4d8_UPD:
+ case ARM::VST4d16_UPD:
+ case ARM::VST4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VST3q8:
+ case ARM::VST3q16:
+ case ARM::VST3q32:
+ case ARM::VST3q8_UPD:
+ case ARM::VST3q16_UPD:
+ case ARM::VST3q32_UPD:
+ case ARM::VST4q8:
+ case ARM::VST4q16:
+ case ARM::VST4q32:
+ case ARM::VST4q8_UPD:
+ case ARM::VST4q16_UPD:
+ case ARM::VST4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+4)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ // Fourth input register
+ switch (Inst.getOpcode()) {
+ case ARM::VST1d8Q:
+ case ARM::VST1d16Q:
+ case ARM::VST1d32Q:
+ case ARM::VST1d64Q:
+ case ARM::VST1d8Q_UPD:
+ case ARM::VST1d16Q_UPD:
+ case ARM::VST1d32Q_UPD:
+ case ARM::VST1d64Q_UPD:
+ case ARM::VST2q8:
+ case ARM::VST2q16:
+ case ARM::VST2q32:
+ case ARM::VST2q8_UPD:
+ case ARM::VST2q16_UPD:
+ case ARM::VST2q32_UPD:
+ case ARM::VST4d8:
+ case ARM::VST4d16:
+ case ARM::VST4d32:
+ case ARM::VST4d8_UPD:
+ case ARM::VST4d16_UPD:
+ case ARM::VST4d32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+3)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VST4q8:
+ case ARM::VST4q16:
+ case ARM::VST4q32:
+ case ARM::VST4q8_UPD:
+ case ARM::VST4q16_UPD:
+ case ARM::VST4q32_UPD:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+6)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned align = fieldFromInstruction32(Insn, 4, 1);
+ unsigned size = fieldFromInstruction32(Insn, 6, 2);
+ unsigned regs = fieldFromInstruction32(Insn, 5, 1) + 1;
+
+ align *= (1 << size);
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (regs == 2) {
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+
+ if (Rm == 0xD)
+ Inst.addOperand(MCOperand::CreateReg(0));
+ else if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned align = fieldFromInstruction32(Insn, 4, 1);
+ unsigned size = 1 << fieldFromInstruction32(Insn, 6, 2);
+ unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1;
+ align *= 2*size;
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+inc)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+
+ if (Rm == 0xD)
+ Inst.addOperand(MCOperand::CreateReg(0));
+ else if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1;
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+inc)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2*inc)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(0));
+
+ if (Rm == 0xD)
+ Inst.addOperand(MCOperand::CreateReg(0));
+ else if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned size = fieldFromInstruction32(Insn, 6, 2);
+ unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1;
+ unsigned align = fieldFromInstruction32(Insn, 4, 1);
+
+ if (size == 0x3) {
+ size = 4;
+ align = 16;
+ } else {
+ if (size == 2) {
+ size = 1 << size;
+ align *= 8;
+ } else {
+ size = 1 << size;
+ align *= 4*size;
}
}
-AutoGenedDecoder:
- // Calling the auto-generated decoder function.
- return decodeInstruction(insn);
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+inc)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2*inc)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+3*inc)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+
+ if (Rm == 0xD)
+ Inst.addOperand(MCOperand::CreateReg(0));
+ else if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
}
-// Helper function for special case handling of LDR (literal) and friends.
-// See, for example, A6.3.7 Load word: Table A6-18 Load word.
-// See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode
-// before returning it.
-static unsigned T2Morph2LoadLiteral(unsigned Opcode) {
- switch (Opcode) {
- default:
- return Opcode; // Return unmorphed opcode.
+static DecodeStatus
+DecodeNEONModImmInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned imm = fieldFromInstruction32(Insn, 0, 4);
+ imm |= fieldFromInstruction32(Insn, 16, 3) << 4;
+ imm |= fieldFromInstruction32(Insn, 24, 1) << 7;
+ imm |= fieldFromInstruction32(Insn, 8, 4) << 8;
+ imm |= fieldFromInstruction32(Insn, 5, 1) << 12;
+ unsigned Q = fieldFromInstruction32(Insn, 6, 1);
+
+ if (Q) {
+ if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else {
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ switch (Inst.getOpcode()) {
+ case ARM::VORRiv4i16:
+ case ARM::VORRiv2i32:
+ case ARM::VBICiv4i16:
+ case ARM::VBICiv2i32:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VORRiv8i16:
+ case ARM::VORRiv4i32:
+ case ARM::VBICiv8i16:
+ case ARM::VBICiv4i32:
+ if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
+ unsigned size = fieldFromInstruction32(Insn, 18, 2);
+
+ if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(8 << size));
+
+ return S;
+}
+
+static DecodeStatus DecodeShiftRight8Imm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(8 - Val));
+ return MCDisassembler::Success;
+}
- case ARM::t2LDR_POST: case ARM::t2LDR_PRE:
- case ARM::t2LDRi12: case ARM::t2LDRi8:
- case ARM::t2LDRs: case ARM::t2LDRT:
- return ARM::t2LDRpci;
+static DecodeStatus DecodeShiftRight16Imm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(16 - Val));
+ return MCDisassembler::Success;
+}
- case ARM::t2LDRB_POST: case ARM::t2LDRB_PRE:
- case ARM::t2LDRBi12: case ARM::t2LDRBi8:
- case ARM::t2LDRBs: case ARM::t2LDRBT:
- return ARM::t2LDRBpci;
+static DecodeStatus DecodeShiftRight32Imm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(32 - Val));
+ return MCDisassembler::Success;
+}
- case ARM::t2LDRH_POST: case ARM::t2LDRH_PRE:
- case ARM::t2LDRHi12: case ARM::t2LDRHi8:
- case ARM::t2LDRHs: case ARM::t2LDRHT:
- return ARM::t2LDRHpci;
+static DecodeStatus DecodeShiftRight64Imm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(64 - Val));
+ return MCDisassembler::Success;
+}
- case ARM::t2LDRSB_POST: case ARM::t2LDRSB_PRE:
- case ARM::t2LDRSBi12: case ARM::t2LDRSBi8:
- case ARM::t2LDRSBs: case ARM::t2LDRSBT:
- return ARM::t2LDRSBpci;
+static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ Rn |= fieldFromInstruction32(Insn, 7, 1) << 4;
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
+ unsigned op = fieldFromInstruction32(Insn, 6, 1);
+ unsigned length = fieldFromInstruction32(Insn, 8, 2) + 1;
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (op) {
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail; // Writeback
+ }
- case ARM::t2LDRSH_POST: case ARM::t2LDRSH_PRE:
- case ARM::t2LDRSHi12: case ARM::t2LDRSHi8:
- case ARM::t2LDRSHs: case ARM::t2LDRSHT:
- return ARM::t2LDRSHpci;
+ for (unsigned i = 0; i < length; ++i) {
+ if (!Check(S, DecodeDPRRegisterClass(Inst, (Rn+i)%32, Address, Decoder)))
+ return MCDisassembler::Fail;
}
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
}
-// Helper function for special case handling of PLD (literal) and friends.
-// See A8.6.117 T1 & T2 and friends for why we morphed the opcode
-// before returning it.
-static unsigned T2Morph2PLDLiteral(unsigned Opcode) {
- switch (Opcode) {
- default:
- return Opcode; // Return unmorphed opcode.
-
- case ARM::t2PLDi8: case ARM::t2PLDs:
- case ARM::t2PLDWi12: case ARM::t2PLDWi8:
- case ARM::t2PLDWs:
- return ARM::t2PLDi12;
-
- case ARM::t2PLIi8: case ARM::t2PLIs:
- return ARM::t2PLIi12;
- }
-}
-
-/// decodeThumbSideEffect is a decorator function which can potentially twiddle
-/// the instruction or morph the returned opcode under Thumb2.
-///
-/// First it checks whether the insn is a NEON or VFP instr; if true, bit
-/// twiddling could be performed on insn to turn it into an ARM NEON/VFP
-/// equivalent instruction and decodeInstruction is called with the transformed
-/// insn.
-///
-/// Next, there is special handling for Load byte/halfword/word instruction by
-/// checking whether Rn=0b1111 and call T2Morph2LoadLiteral() on the decoded
-/// Thumb2 instruction. See comments below for further details.
-///
-/// Finally, one last check is made to see whether the insn is a NEON/VFP and
-/// decodeInstruction(insn) is invoked on the original insn.
-///
-/// Otherwise, decodeThumbInstruction is called with the original insn.
-static unsigned decodeThumbSideEffect(bool IsThumb2, unsigned &insn) {
- if (IsThumb2) {
- uint16_t op1 = slice(insn, 28, 27);
- uint16_t op2 = slice(insn, 26, 20);
-
- // A6.3 32-bit Thumb instruction encoding
- // Table A6-9 32-bit Thumb instruction encoding
-
- // The coprocessor instructions of interest are transformed to their ARM
- // equivalents.
-
- // --------- Transform Begin Marker ---------
- if ((op1 == 1 || op1 == 3) && slice(op2, 6, 4) == 7) {
- // A7.4 Advanced SIMD data-processing instructions
- // U bit of Thumb corresponds to Inst{24} of ARM.
- uint16_t U = slice(op1, 1, 1);
-
- // Inst{28-24} of ARM = {1,0,0,1,U};
- uint16_t bits28_24 = 9 << 1 | U;
- DEBUG(showBitVector(errs(), insn));
- setSlice(insn, 28, 24, bits28_24);
- return decodeInstruction(insn);
+static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned dst = fieldFromInstruction16(Insn, 8, 3);
+ unsigned imm = fieldFromInstruction16(Insn, 0, 8);
+
+ if (!Check(S, DecodetGPRRegisterClass(Inst, dst, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ switch(Inst.getOpcode()) {
+ default:
+ return MCDisassembler::Fail;
+ case ARM::tADR:
+ break; // tADR does not explicitly represent the PC as an operand.
+ case ARM::tADDrSPi:
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+ break;
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ return S;
+}
+
+static DecodeStatus DecodeThumbBROperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<12>(Val << 1)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeT2BROperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<21>(Val)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeThumbCmpBROperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<7>(Val << 1)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Val, 0, 3);
+ unsigned Rm = fieldFromInstruction32(Val, 3, 3);
+
+ if (!Check(S, DecodetGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodetGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Val, 0, 3);
+ unsigned imm = fieldFromInstruction32(Val, 3, 5);
+
+ if (!Check(S, DecodetGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ unsigned imm = Val << 2;
+
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ tryAddingPcLoadReferenceComment(Address, (Address & ~2u) + imm + 4, Decoder);
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+ Inst.addOperand(MCOperand::CreateImm(Val));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Val, 6, 4);
+ unsigned Rm = fieldFromInstruction32(Val, 2, 4);
+ unsigned imm = fieldFromInstruction32(Val, 0, 2);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ switch (Inst.getOpcode()) {
+ case ARM::t2PLDs:
+ case ARM::t2PLDWs:
+ case ARM::t2PLIs:
+ break;
+ default: {
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
}
+ }
- if (op1 == 3 && slice(op2, 6, 4) == 1 && slice(op2, 0, 0) == 0) {
- // A7.7 Advanced SIMD element or structure load/store instructions
- // Inst{27-24} of Thumb = 0b1001
- // Inst{27-24} of ARM = 0b0100
- DEBUG(showBitVector(errs(), insn));
- setSlice(insn, 27, 24, 4);
- return decodeInstruction(insn);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ if (Rn == 0xF) {
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDRBs:
+ Inst.setOpcode(ARM::t2LDRBpci);
+ break;
+ case ARM::t2LDRHs:
+ Inst.setOpcode(ARM::t2LDRHpci);
+ break;
+ case ARM::t2LDRSHs:
+ Inst.setOpcode(ARM::t2LDRSHpci);
+ break;
+ case ARM::t2LDRSBs:
+ Inst.setOpcode(ARM::t2LDRSBpci);
+ break;
+ case ARM::t2PLDs:
+ Inst.setOpcode(ARM::t2PLDi12);
+ Inst.addOperand(MCOperand::CreateReg(ARM::PC));
+ break;
+ default:
+ return MCDisassembler::Fail;
}
- // --------- Transform End Marker ---------
-
- unsigned unmorphed = decodeThumbInstruction(insn);
-
- // See, for example, A6.3.7 Load word: Table A6-18 Load word.
- // See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode
- // before returning it to our caller.
- if (op1 == 3 && slice(op2, 6, 5) == 0 && slice(op2, 0, 0) == 1
- && slice(insn, 19, 16) == 15) {
- unsigned morphed = T2Morph2LoadLiteral(unmorphed);
- if (morphed != unmorphed)
- return morphed;
+
+ int imm = fieldFromInstruction32(Insn, 0, 12);
+ if (!fieldFromInstruction32(Insn, 23, 1)) imm *= -1;
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return S;
+ }
+
+ unsigned addrmode = fieldFromInstruction32(Insn, 4, 2);
+ addrmode |= fieldFromInstruction32(Insn, 0, 4) << 2;
+ addrmode |= fieldFromInstruction32(Insn, 16, 4) << 6;
+ if (!Check(S, DecodeT2AddrModeSOReg(Inst, addrmode, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ int imm = Val & 0xFF;
+ if (!(Val & 0x100)) imm *= -1;
+ Inst.addOperand(MCOperand::CreateImm(imm << 2));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Val, 9, 4);
+ unsigned imm = fieldFromInstruction32(Val, 0, 9);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeT2Imm8S4(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Val, 8, 4);
+ unsigned imm = fieldFromInstruction32(Val, 0, 8);
+
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ int imm = Val & 0xFF;
+ if (Val == 0)
+ imm = INT32_MIN;
+ else if (!(Val & 0x100))
+ imm *= -1;
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return MCDisassembler::Success;
+}
+
+
+static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Val, 9, 4);
+ unsigned imm = fieldFromInstruction32(Val, 0, 9);
+
+ // Some instructions always use an additive offset.
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDRT:
+ case ARM::t2LDRBT:
+ case ARM::t2LDRHT:
+ case ARM::t2LDRSBT:
+ case ARM::t2LDRSHT:
+ case ARM::t2STRT:
+ case ARM::t2STRBT:
+ case ARM::t2STRHT:
+ imm |= 0x100;
+ break;
+ default:
+ break;
+ }
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeT2Imm8(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned addr = fieldFromInstruction32(Insn, 0, 8);
+ addr |= fieldFromInstruction32(Insn, 9, 1) << 8;
+ addr |= Rn << 9;
+ unsigned load = fieldFromInstruction32(Insn, 20, 1);
+
+ if (!load) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (load) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeT2AddrModeImm8(Inst, addr, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Val, 13, 4);
+ unsigned imm = fieldFromInstruction32(Val, 0, 12);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return S;
+}
+
+
+static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned imm = fieldFromInstruction16(Insn, 0, 7);
+
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ if (Inst.getOpcode() == ARM::tADDrSP) {
+ unsigned Rdm = fieldFromInstruction16(Insn, 0, 3);
+ Rdm |= fieldFromInstruction16(Insn, 7, 1) << 3;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+ } else if (Inst.getOpcode() == ARM::tADDspr) {
+ unsigned Rm = fieldFromInstruction16(Insn, 3, 4);
+
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+ Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ return S;
+}
+
+static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned imod = fieldFromInstruction16(Insn, 4, 1) | 0x2;
+ unsigned flags = fieldFromInstruction16(Insn, 0, 3);
+
+ Inst.addOperand(MCOperand::CreateImm(imod));
+ Inst.addOperand(MCOperand::CreateImm(flags));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodePostIdxReg(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned add = fieldFromInstruction32(Insn, 4, 1);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(add));
+
+ return S;
+}
+
+static DecodeStatus DecodeThumbBLXOffset(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (!tryAddingSymbolicOperand(Address,
+ (Address & ~2u) + SignExtend32<22>(Val << 1) + 4,
+ true, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (Val == 0xA || Val == 0xB)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+
+ if (Rn == ARM::SP) S = MCDisassembler::SoftFail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ return S;
+}
+
+static DecodeStatus
+DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned pred = fieldFromInstruction32(Insn, 22, 4);
+ if (pred == 0xE || pred == 0xF) {
+ unsigned opc = fieldFromInstruction32(Insn, 4, 28);
+ switch (opc) {
+ default:
+ return MCDisassembler::Fail;
+ case 0xf3bf8f4:
+ Inst.setOpcode(ARM::t2DSB);
+ break;
+ case 0xf3bf8f5:
+ Inst.setOpcode(ARM::t2DMB);
+ break;
+ case 0xf3bf8f6:
+ Inst.setOpcode(ARM::t2ISB);
+ break;
}
- // See, for example, A8.6.117 PLD,PLDW (immediate) T1 & T2, and friends for
- // why we morphed the opcode before returning it to our caller.
- if (slice(insn, 31, 25) == 0x7C && slice(insn, 15, 12) == 0xF
- && slice(insn, 22, 22) == 0 && slice(insn, 20, 20) == 1
- && slice(insn, 19, 16) == 15) {
- unsigned morphed = T2Morph2PLDLiteral(unmorphed);
- if (morphed != unmorphed)
- return morphed;
+ unsigned imm = fieldFromInstruction32(Insn, 0, 4);
+ return DecodeMemBarrierOption(Inst, imm, Address, Decoder);
+ }
+
+ unsigned brtarget = fieldFromInstruction32(Insn, 0, 11) << 1;
+ brtarget |= fieldFromInstruction32(Insn, 11, 1) << 19;
+ brtarget |= fieldFromInstruction32(Insn, 13, 1) << 18;
+ brtarget |= fieldFromInstruction32(Insn, 16, 6) << 12;
+ brtarget |= fieldFromInstruction32(Insn, 26, 1) << 20;
+
+ if (!Check(S, DecodeT2BROperand(Inst, brtarget, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+// Decode a shifted immediate operand. These basically consist
+// of an 8-bit value, and a 4-bit directive that specifies either
+// a splat operation or a rotation.
+static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ unsigned ctrl = fieldFromInstruction32(Val, 10, 2);
+ if (ctrl == 0) {
+ unsigned byte = fieldFromInstruction32(Val, 8, 2);
+ unsigned imm = fieldFromInstruction32(Val, 0, 8);
+ switch (byte) {
+ case 0:
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ break;
+ case 1:
+ Inst.addOperand(MCOperand::CreateImm((imm << 16) | imm));
+ break;
+ case 2:
+ Inst.addOperand(MCOperand::CreateImm((imm << 24) | (imm << 8)));
+ break;
+ case 3:
+ Inst.addOperand(MCOperand::CreateImm((imm << 24) | (imm << 16) |
+ (imm << 8) | imm));
+ break;
}
+ } else {
+ unsigned unrot = fieldFromInstruction32(Val, 0, 7) | 0x80;
+ unsigned rot = fieldFromInstruction32(Val, 7, 5);
+ unsigned imm = (unrot >> rot) | (unrot << ((32-rot)&31));
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ }
- // One last check for NEON/VFP instructions.
- if ((op1 == 1 || op1 == 3) && slice(op2, 6, 6) == 1)
- return decodeInstruction(insn);
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeThumbBCCTargetOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder){
+ Inst.addOperand(MCOperand::CreateImm(Val << 1));
+ return MCDisassembler::Success;
+}
- // Fall through.
+static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder){
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ switch (Val) {
+ default:
+ return MCDisassembler::Fail;
+ case 0xF: // SY
+ case 0xE: // ST
+ case 0xB: // ISH
+ case 0xA: // ISHST
+ case 0x7: // NSH
+ case 0x6: // NSHST
+ case 0x3: // OSH
+ case 0x2: // OSHST
+ break;
}
- return decodeThumbInstruction(insn);
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return MCDisassembler::Success;
}
-//
-// Public interface for the disassembler
-//
+static DecodeStatus DecodeMSRMask(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (!Val) return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return MCDisassembler::Success;
+}
-bool ARMDisassembler::getInstruction(MCInst &MI,
- uint64_t &Size,
- const MemoryObject &Region,
- uint64_t Address,
- raw_ostream &os) const {
- // The machine instruction.
- uint32_t insn;
- uint8_t bytes[4];
+static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
- // We want to read exactly 4 bytes of data.
- if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1)
- return false;
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
- // Encoded as a small-endian 32-bit word in the stream.
- insn = (bytes[3] << 24) |
- (bytes[2] << 16) |
- (bytes[1] << 8) |
- (bytes[0] << 0);
-
- unsigned Opcode = decodeARMInstruction(insn);
- ARMFormat Format = ARMFormats[Opcode];
- Size = 4;
-
- DEBUG({
- errs() << "\nOpcode=" << Opcode << " Name=" <<ARMUtils::OpcodeName(Opcode)
- << " Format=" << stringForARMFormat(Format) << '(' << (int)Format
- << ")\n";
- showBitVector(errs(), insn);
- });
-
- OwningPtr<ARMBasicMCBuilder> Builder(CreateMCBuilder(Opcode, Format));
- if (!Builder)
- return false;
+ if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail;
- Builder->setupBuilderForSymbolicDisassembly(getLLVMOpInfoCallback(),
- getDisInfoBlock(), getMCContext(),
- Address);
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt+1, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
- if (!Builder->Build(MI, insn))
- return false;
+ return S;
+}
- return true;
+
+static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder){
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rt = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail;
+ if (Rd == Rn || Rd == Rt || Rd == Rt+1) return MCDisassembler::Fail;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt+1, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
}
-bool ThumbDisassembler::getInstruction(MCInst &MI,
- uint64_t &Size,
- const MemoryObject &Region,
- uint64_t Address,
- raw_ostream &os) const {
- // The Thumb instruction stream is a sequence of halfwords.
-
- // This represents the first halfword as well as the machine instruction
- // passed to decodeThumbInstruction(). For 16-bit Thumb instruction, the top
- // halfword of insn is 0x00 0x00; otherwise, the first halfword is moved to
- // the top half followed by the second halfword.
- unsigned insn = 0;
- // Possible second halfword.
- uint16_t insn1 = 0;
-
- // A6.1 Thumb instruction set encoding
- //
- // If bits [15:11] of the halfword being decoded take any of the following
- // values, the halfword is the first halfword of a 32-bit instruction:
- // o 0b11101
- // o 0b11110
- // o 0b11111.
- //
- // Otherwise, the halfword is a 16-bit instruction.
-
- // Read 2 bytes of data first.
- uint8_t bytes[2];
- if (Region.readBytes(Address, 2, (uint8_t*)bytes, NULL) == -1)
- return false;
+static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction32(Insn, 0, 12);
+ imm |= fieldFromInstruction32(Insn, 16, 4) << 13;
+ imm |= fieldFromInstruction32(Insn, 23, 1) << 12;
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+
+ if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeAddrModeImm12Operand(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
- // Encoded as a small-endian 16-bit halfword in the stream.
- insn = (bytes[1] << 8) | bytes[0];
- unsigned bits15_11 = slice(insn, 15, 11);
- bool IsThumb2 = false;
+static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction32(Insn, 0, 12);
+ imm |= fieldFromInstruction32(Insn, 16, 4) << 13;
+ imm |= fieldFromInstruction32(Insn, 23, 1) << 12;
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+
+ if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
+ if (Rm == 0xF) S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeSORegMemOperand(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
- // 32-bit instructions if the bits [15:11] of the halfword matches
- // { 0b11101 /* 0x1D */, 0b11110 /* 0x1E */, ob11111 /* 0x1F */ }.
- if (bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F) {
- IsThumb2 = true;
- if (Region.readBytes(Address + 2, 2, (uint8_t*)bytes, NULL) == -1)
- return false;
- // Encoded as a small-endian 16-bit halfword in the stream.
- insn1 = (bytes[1] << 8) | bytes[0];
- insn = (insn << 16 | insn1);
- }
-
- // The insn could potentially be bit-twiddled in order to be decoded as an ARM
- // NEON/VFP opcode. In such case, the modified insn is later disassembled as
- // an ARM NEON/VFP instruction.
- //
- // This is a short term solution for lack of encoding bits specified for the
- // Thumb2 NEON/VFP instructions. The long term solution could be adding some
- // infrastructure to have each instruction support more than one encodings.
- // Which encoding is used would be based on which subtarget the compiler/
- // disassembler is working with at the time. This would allow the sharing of
- // the NEON patterns between ARM and Thumb2, as well as potential greater
- // sharing between the regular ARM instructions and the 32-bit wide Thumb2
- // instructions as well.
- unsigned Opcode = decodeThumbSideEffect(IsThumb2, insn);
-
- ARMFormat Format = ARMFormats[Opcode];
- Size = IsThumb2 ? 4 : 2;
-
- DEBUG({
- errs() << "Opcode=" << Opcode << " Name=" << ARMUtils::OpcodeName(Opcode)
- << " Format=" << stringForARMFormat(Format) << '(' << (int)Format
- << ")\n";
- showBitVector(errs(), insn);
- });
-
- OwningPtr<ARMBasicMCBuilder> Builder(CreateMCBuilder(Opcode, Format));
- if (!Builder)
- return false;
- Builder->SetSession(const_cast<Session *>(&SO));
+static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
- Builder->setupBuilderForSymbolicDisassembly(getLLVMOpInfoCallback(),
- getDisInfoBlock(), getMCContext(),
- Address);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction32(Insn, 0, 12);
+ imm |= fieldFromInstruction32(Insn, 16, 4) << 13;
+ imm |= fieldFromInstruction32(Insn, 23, 1) << 12;
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
- if (!Builder->Build(MI, insn))
- return false;
+ if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
- return true;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeAddrModeImm12Operand(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
}
-// A8.6.50
-// Valid return values are {1, 2, 3, 4}, with 0 signifying an error condition.
-static unsigned short CountITSize(unsigned ITMask) {
- // First count the trailing zeros of the IT mask.
- unsigned TZ = CountTrailingZeros_32(ITMask);
- if (TZ > 3) {
- DEBUG(errs() << "Encoding error: IT Mask '0000'");
- return 0;
+static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction32(Insn, 0, 12);
+ imm |= fieldFromInstruction32(Insn, 16, 4) << 13;
+ imm |= fieldFromInstruction32(Insn, 23, 1) << 12;
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+
+ if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeSORegMemOperand(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction32(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction32(Insn, 5, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 6, 2);
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction32(Insn, 6, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 7, 1);
+ if (fieldFromInstruction32(Insn, 4, 2) != 0)
+ align = 4;
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
}
- return (4 - TZ);
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
}
-/// Init ITState. Note that at least one bit is always 1 in mask.
-bool Session::InitIT(unsigned short bits7_0) {
- ITCounter = CountITSize(slice(bits7_0, 3, 0));
- if (ITCounter == 0)
- return false;
+static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
- // A8.6.50 IT
- unsigned short FirstCond = slice(bits7_0, 7, 4);
- if (FirstCond == 0xF) {
- DEBUG(errs() << "Encoding error: IT FirstCond '1111'");
- return false;
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction32(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction32(Insn, 5, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 6, 2);
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction32(Insn, 6, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 7, 1);
+ if (fieldFromInstruction32(Insn, 4, 2) != 0)
+ align = 4;
}
- if (FirstCond == 0xE && ITCounter != 1) {
- DEBUG(errs() << "Encoding error: IT FirstCond '1110' && Mask != '1000'");
- return false;
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
}
- ITState = bits7_0;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
- return true;
+ return S;
}
-/// Update ITState if necessary.
-void Session::UpdateIT() {
- assert(ITCounter);
- --ITCounter;
- if (ITCounter == 0)
- ITState = 0;
- else {
- unsigned short NewITState4_0 = slice(ITState, 4, 0) << 1;
- setSlice(ITState, 4, 0, NewITState4_0);
+
+static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction32(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ index = fieldFromInstruction32(Insn, 5, 3);
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 2;
+ break;
+ case 1:
+ index = fieldFromInstruction32(Insn, 6, 2);
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 4;
+ if (fieldFromInstruction32(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction32(Insn, 5, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 7, 1);
+ if (fieldFromInstruction32(Insn, 4, 1) != 0)
+ align = 8;
+ if (fieldFromInstruction32(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
}
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
}
-static MCDisassembler *createARMDisassembler(const Target &T) {
- return new ARMDisassembler;
+static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction32(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ index = fieldFromInstruction32(Insn, 5, 3);
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 2;
+ break;
+ case 1:
+ index = fieldFromInstruction32(Insn, 6, 2);
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 4;
+ if (fieldFromInstruction32(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction32(Insn, 5, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 7, 1);
+ if (fieldFromInstruction32(Insn, 4, 1) != 0)
+ align = 8;
+ if (fieldFromInstruction32(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
}
-static MCDisassembler *createThumbDisassembler(const Target &T) {
- return new ThumbDisassembler;
+
+static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction32(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 6, 2);
+ if (fieldFromInstruction32(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction32(Insn, 4, 2))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 7, 1);
+ if (fieldFromInstruction32(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
}
-extern "C" void LLVMInitializeARMDisassembler() {
- // Register the disassembler.
- TargetRegistry::RegisterMCDisassembler(TheARMTarget,
- createARMDisassembler);
- TargetRegistry::RegisterMCDisassembler(TheThumbTarget,
- createThumbDisassembler);
+static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction32(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 6, 2);
+ if (fieldFromInstruction32(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction32(Insn, 4, 2))
+ return MCDisassembler::Fail; // UNDEFINED
+ index = fieldFromInstruction32(Insn, 7, 1);
+ if (fieldFromInstruction32(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
}
-EDInstInfo *ARMDisassembler::getEDInfo() const {
- return instInfoARM;
+
+static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction32(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 4;
+ index = fieldFromInstruction32(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 8;
+ index = fieldFromInstruction32(Insn, 6, 2);
+ if (fieldFromInstruction32(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction32(Insn, 4, 2))
+ align = 4 << fieldFromInstruction32(Insn, 4, 2);
+ index = fieldFromInstruction32(Insn, 7, 1);
+ if (fieldFromInstruction32(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+3*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+3*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
}
-EDInstInfo *ThumbDisassembler::getEDInfo() const {
- return instInfoARM;
+static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
+ Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction32(Insn, 10, 2);
+
+ unsigned align = 0;
+ unsigned index = 0;
+ unsigned inc = 1;
+ switch (size) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 4;
+ index = fieldFromInstruction32(Insn, 5, 3);
+ break;
+ case 1:
+ if (fieldFromInstruction32(Insn, 4, 1))
+ align = 8;
+ index = fieldFromInstruction32(Insn, 6, 2);
+ if (fieldFromInstruction32(Insn, 5, 1))
+ inc = 2;
+ break;
+ case 2:
+ if (fieldFromInstruction32(Insn, 4, 2))
+ align = 4 << fieldFromInstruction32(Insn, 4, 2);
+ index = fieldFromInstruction32(Insn, 7, 1);
+ if (fieldFromInstruction32(Insn, 6, 1))
+ inc = 2;
+ break;
+ }
+
+ if (Rm != 0xF) { // Writeback
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(align));
+ if (Rm != 0xF) {
+ if (Rm != 0xD) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+3*inc, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(index));
+
+ return S;
+}
+
+static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
+
+ if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F)
+ S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeSPRRegisterClass(Inst, Rm , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeSPRRegisterClass(Inst, Rm+1, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt2 , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
+
+ if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F)
+ S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt2 , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeSPRRegisterClass(Inst, Rm , Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeSPRRegisterClass(Inst, Rm+1, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ unsigned pred = fieldFromInstruction16(Insn, 4, 4);
+ // The InstPrinter needs to have the low bit of the predicate in
+ // the mask operand to be able to print it properly.
+ unsigned mask = fieldFromInstruction16(Insn, 0, 5);
+
+ if (pred == 0xF) {
+ pred = 0xE;
+ S = MCDisassembler::SoftFail;
+ }
+
+ if ((mask & 0xF) == 0) {
+ // Preserve the high bit of the mask, which is the low bit of
+ // the predicate.
+ mask &= 0x10;
+ mask |= 0x8;
+ S = MCDisassembler::SoftFail;
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(pred));
+ Inst.addOperand(MCOperand::CreateImm(mask));
+ return S;
}
+
+static DecodeStatus
+DecodeT2LDRDPreInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction32(Insn, 8, 4);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned addr = fieldFromInstruction32(Insn, 0, 8);
+ unsigned W = fieldFromInstruction32(Insn, 21, 1);
+ unsigned U = fieldFromInstruction32(Insn, 23, 1);
+ unsigned P = fieldFromInstruction32(Insn, 24, 1);
+ bool writeback = (W == 1) | (P == 0);
+
+ addr |= (U << 8) | (Rn << 9);
+
+ if (writeback && (Rn == Rt || Rn == Rt2))
+ Check(S, MCDisassembler::SoftFail);
+ if (Rt == Rt2)
+ Check(S, MCDisassembler::SoftFail);
+
+ // Rt
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // Rt2
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rt2, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // Writeback operand
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // addr
+ if (!Check(S, DecodeT2AddrModeImm8s4(Inst, addr, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus
+DecodeT2STRDPreInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction32(Insn, 8, 4);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned addr = fieldFromInstruction32(Insn, 0, 8);
+ unsigned W = fieldFromInstruction32(Insn, 21, 1);
+ unsigned U = fieldFromInstruction32(Insn, 23, 1);
+ unsigned P = fieldFromInstruction32(Insn, 24, 1);
+ bool writeback = (W == 1) | (P == 0);
+
+ addr |= (U << 8) | (Rn << 9);
+
+ if (writeback && (Rn == Rt || Rn == Rt2))
+ Check(S, MCDisassembler::SoftFail);
+
+ // Writeback operand
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // Rt
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // Rt2
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rt2, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // addr
+ if (!Check(S, DecodeT2AddrModeImm8s4(Inst, addr, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, uint32_t Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned sign1 = fieldFromInstruction32(Insn, 21, 1);
+ unsigned sign2 = fieldFromInstruction32(Insn, 23, 1);
+ if (sign1 != sign2) return MCDisassembler::Fail;
+
+ unsigned Val = fieldFromInstruction32(Insn, 0, 8);
+ Val |= fieldFromInstruction32(Insn, 12, 3) << 8;
+ Val |= fieldFromInstruction32(Insn, 26, 1) << 11;
+ Val |= sign1 << 12;
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<13>(Val)));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, uint32_t Val,
+ uint64_t Address,
+ const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ // Shift of "asr #32" is not allowed in Thumb2 mode.
+ if (Val == 0x20) S = MCDisassembler::SoftFail;
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return S;
+}
+
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.h b/lib/Target/ARM/Disassembler/ARMDisassembler.h
deleted file mode 100644
index 0a74a38..0000000
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.h
+++ /dev/null
@@ -1,99 +0,0 @@
-//===- ARMDisassembler.h - Disassembler for ARM/Thumb ISA -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is part of the ARM Disassembler.
-// It contains the header for ARMDisassembler and ThumbDisassembler, both are
-// subclasses of MCDisassembler.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARMDISASSEMBLER_H
-#define ARMDISASSEMBLER_H
-
-#include "llvm/MC/MCDisassembler.h"
-
-namespace llvm {
-
-class MCInst;
-class MemoryObject;
-class raw_ostream;
-
-struct EDInstInfo;
-
-/// ARMDisassembler - ARM disassembler for all ARM platforms.
-class ARMDisassembler : public MCDisassembler {
-public:
- /// Constructor - Initializes the disassembler.
- ///
- ARMDisassembler() :
- MCDisassembler() {
- }
-
- ~ARMDisassembler() {
- }
-
- /// getInstruction - See MCDisassembler.
- bool getInstruction(MCInst &instr,
- uint64_t &size,
- const MemoryObject &region,
- uint64_t address,
- raw_ostream &vStream) const;
-
- /// getEDInfo - See MCDisassembler.
- EDInstInfo *getEDInfo() const;
-private:
-};
-
-// Forward declaration.
-class ARMBasicMCBuilder;
-
-/// Session - Keep track of the IT Block progression.
-class Session {
- friend class ARMBasicMCBuilder;
-public:
- Session() : ITCounter(0), ITState(0) {}
- ~Session() {}
- /// InitIT - Initializes ITCounter/ITState.
- bool InitIT(unsigned short bits7_0);
- /// UpdateIT - Updates ITCounter/ITState as IT Block progresses.
- void UpdateIT();
-
-private:
- unsigned ITCounter; // Possible values: 0, 1, 2, 3, 4.
- unsigned ITState; // A2.5.2 Consists of IT[7:5] and IT[4:0] initially.
-};
-
-/// ThumbDisassembler - Thumb disassembler for all ARM platforms.
-class ThumbDisassembler : public MCDisassembler {
-public:
- /// Constructor - Initializes the disassembler.
- ///
- ThumbDisassembler() :
- MCDisassembler(), SO() {
- }
-
- ~ThumbDisassembler() {
- }
-
- /// getInstruction - See MCDisassembler.
- bool getInstruction(MCInst &instr,
- uint64_t &size,
- const MemoryObject &region,
- uint64_t address,
- raw_ostream &vStream) const;
-
- /// getEDInfo - See MCDisassembler.
- EDInstInfo *getEDInfo() const;
-private:
- Session SO;
-};
-
-} // namespace llvm
-
-#endif
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
deleted file mode 100644
index d89c80a..0000000
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
+++ /dev/null
@@ -1,3818 +0,0 @@
-//===- ARMDisassemblerCore.cpp - ARM disassembler helpers -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is part of the ARM Disassembler.
-// It contains code to represent the core concepts of Builder and DisassembleFP
-// to solve the problem of disassembling an ARM instr.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "arm-disassembler"
-
-#include "ARMDisassemblerCore.h"
-#include "ARMAddressingModes.h"
-#include "ARMMCExpr.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-//#define DEBUG(X) do { X; } while (0)
-
-/// ARMGenInstrInfo.inc - ARMGenInstrInfo.inc contains the static const
-/// MCInstrDesc ARMInsts[] definition and the MCOperandInfo[]'s describing the
-/// operand info for each ARMInsts[i].
-///
-/// Together with an instruction's encoding format, we can take advantage of the
-/// NumOperands and the OpInfo fields of the target instruction description in
-/// the quest to build out the MCOperand list for an MCInst.
-///
-/// The general guideline is that with a known format, the number of dst and src
-/// operands are well-known. The dst is built first, followed by the src
-/// operand(s). The operands not yet used at this point are for the Implicit
-/// Uses and Defs by this instr. For the Uses part, the pred:$p operand is
-/// defined with two components:
-///
-/// def pred { // Operand PredicateOperand
-/// ValueType Type = OtherVT;
-/// string PrintMethod = "printPredicateOperand";
-/// string AsmOperandLowerMethod = ?;
-/// dag MIOperandInfo = (ops i32imm, CCR);
-/// AsmOperandClass ParserMatchClass = ImmAsmOperand;
-/// dag DefaultOps = (ops (i32 14), (i32 zero_reg));
-/// }
-///
-/// which is manifested by the MCOperandInfo[] of:
-///
-/// { 0, 0|(1<<MCOI::Predicate), 0 },
-/// { ARM::CCRRegClassID, 0|(1<<MCOI::Predicate), 0 }
-///
-/// So the first predicate MCOperand corresponds to the immediate part of the
-/// ARM condition field (Inst{31-28}), and the second predicate MCOperand
-/// corresponds to a register kind of ARM::CPSR.
-///
-/// For the Defs part, in the simple case of only cc_out:$s, we have:
-///
-/// def cc_out { // Operand OptionalDefOperand
-/// ValueType Type = OtherVT;
-/// string PrintMethod = "printSBitModifierOperand";
-/// string AsmOperandLowerMethod = ?;
-/// dag MIOperandInfo = (ops CCR);
-/// AsmOperandClass ParserMatchClass = ImmAsmOperand;
-/// dag DefaultOps = (ops (i32 zero_reg));
-/// }
-///
-/// which is manifested by the one MCOperandInfo of:
-///
-/// { ARM::CCRRegClassID, 0|(1<<MCOI::OptionalDef), 0 }
-///
-
-namespace llvm {
-extern MCInstrDesc ARMInsts[];
-}
-
-using namespace llvm;
-
-const char *ARMUtils::OpcodeName(unsigned Opcode) {
- return ARMInsts[Opcode].Name;
-}
-
-// Return the register enum Based on RegClass and the raw register number.
-// FIXME: Auto-gened?
-static unsigned
-getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister) {
- if (RegClassID == ARM::rGPRRegClassID) {
- // Check for The register numbers 13 and 15 that are not permitted for many
- // Thumb register specifiers.
- if (RawRegister == 13 || RawRegister == 15) {
- B->SetErr(-1);
- return 0;
- }
- // For this purpose, we can treat rGPR as if it were GPR.
- RegClassID = ARM::GPRRegClassID;
- }
-
- // See also decodeNEONRd(), decodeNEONRn(), decodeNEONRm().
- // A7.3 register encoding
- // Qd -> bit[12] == 0
- // Qn -> bit[16] == 0
- // Qm -> bit[0] == 0
- //
- // If one of these bits is 1, the instruction is UNDEFINED.
- if (RegClassID == ARM::QPRRegClassID && slice(RawRegister, 0, 0) == 1) {
- B->SetErr(-1);
- return 0;
- }
- unsigned RegNum =
- RegClassID == ARM::QPRRegClassID ? RawRegister >> 1 : RawRegister;
-
- switch (RegNum) {
- default:
- break;
- case 0:
- switch (RegClassID) {
- case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R0;
- case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
- case ARM::DPR_VFP2RegClassID:
- return ARM::D0;
- case ARM::QPRRegClassID: case ARM::QPR_8RegClassID:
- case ARM::QPR_VFP2RegClassID:
- return ARM::Q0;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S0;
- }
- break;
- case 1:
- switch (RegClassID) {
- case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R1;
- case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
- case ARM::DPR_VFP2RegClassID:
- return ARM::D1;
- case ARM::QPRRegClassID: case ARM::QPR_8RegClassID:
- case ARM::QPR_VFP2RegClassID:
- return ARM::Q1;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S1;
- }
- break;
- case 2:
- switch (RegClassID) {
- case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R2;
- case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
- case ARM::DPR_VFP2RegClassID:
- return ARM::D2;
- case ARM::QPRRegClassID: case ARM::QPR_8RegClassID:
- case ARM::QPR_VFP2RegClassID:
- return ARM::Q2;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S2;
- }
- break;
- case 3:
- switch (RegClassID) {
- case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R3;
- case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
- case ARM::DPR_VFP2RegClassID:
- return ARM::D3;
- case ARM::QPRRegClassID: case ARM::QPR_8RegClassID:
- case ARM::QPR_VFP2RegClassID:
- return ARM::Q3;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S3;
- }
- break;
- case 4:
- switch (RegClassID) {
- case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R4;
- case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
- case ARM::DPR_VFP2RegClassID:
- return ARM::D4;
- case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q4;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S4;
- }
- break;
- case 5:
- switch (RegClassID) {
- case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R5;
- case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
- case ARM::DPR_VFP2RegClassID:
- return ARM::D5;
- case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q5;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S5;
- }
- break;
- case 6:
- switch (RegClassID) {
- case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R6;
- case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
- case ARM::DPR_VFP2RegClassID:
- return ARM::D6;
- case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q6;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S6;
- }
- break;
- case 7:
- switch (RegClassID) {
- case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R7;
- case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
- case ARM::DPR_VFP2RegClassID:
- return ARM::D7;
- case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q7;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S7;
- }
- break;
- case 8:
- switch (RegClassID) {
- case ARM::GPRRegClassID: return ARM::R8;
- case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D8;
- case ARM::QPRRegClassID: return ARM::Q8;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S8;
- }
- break;
- case 9:
- switch (RegClassID) {
- case ARM::GPRRegClassID: return ARM::R9;
- case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D9;
- case ARM::QPRRegClassID: return ARM::Q9;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S9;
- }
- break;
- case 10:
- switch (RegClassID) {
- case ARM::GPRRegClassID: return ARM::R10;
- case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D10;
- case ARM::QPRRegClassID: return ARM::Q10;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S10;
- }
- break;
- case 11:
- switch (RegClassID) {
- case ARM::GPRRegClassID: return ARM::R11;
- case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D11;
- case ARM::QPRRegClassID: return ARM::Q11;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S11;
- }
- break;
- case 12:
- switch (RegClassID) {
- case ARM::GPRRegClassID: return ARM::R12;
- case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D12;
- case ARM::QPRRegClassID: return ARM::Q12;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S12;
- }
- break;
- case 13:
- switch (RegClassID) {
- case ARM::GPRRegClassID: return ARM::SP;
- case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D13;
- case ARM::QPRRegClassID: return ARM::Q13;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S13;
- }
- break;
- case 14:
- switch (RegClassID) {
- case ARM::GPRRegClassID: return ARM::LR;
- case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D14;
- case ARM::QPRRegClassID: return ARM::Q14;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S14;
- }
- break;
- case 15:
- switch (RegClassID) {
- case ARM::GPRRegClassID: return ARM::PC;
- case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D15;
- case ARM::QPRRegClassID: return ARM::Q15;
- case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S15;
- }
- break;
- case 16:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D16;
- case ARM::SPRRegClassID: return ARM::S16;
- }
- break;
- case 17:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D17;
- case ARM::SPRRegClassID: return ARM::S17;
- }
- break;
- case 18:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D18;
- case ARM::SPRRegClassID: return ARM::S18;
- }
- break;
- case 19:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D19;
- case ARM::SPRRegClassID: return ARM::S19;
- }
- break;
- case 20:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D20;
- case ARM::SPRRegClassID: return ARM::S20;
- }
- break;
- case 21:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D21;
- case ARM::SPRRegClassID: return ARM::S21;
- }
- break;
- case 22:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D22;
- case ARM::SPRRegClassID: return ARM::S22;
- }
- break;
- case 23:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D23;
- case ARM::SPRRegClassID: return ARM::S23;
- }
- break;
- case 24:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D24;
- case ARM::SPRRegClassID: return ARM::S24;
- }
- break;
- case 25:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D25;
- case ARM::SPRRegClassID: return ARM::S25;
- }
- break;
- case 26:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D26;
- case ARM::SPRRegClassID: return ARM::S26;
- }
- break;
- case 27:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D27;
- case ARM::SPRRegClassID: return ARM::S27;
- }
- break;
- case 28:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D28;
- case ARM::SPRRegClassID: return ARM::S28;
- }
- break;
- case 29:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D29;
- case ARM::SPRRegClassID: return ARM::S29;
- }
- break;
- case 30:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D30;
- case ARM::SPRRegClassID: return ARM::S30;
- }
- break;
- case 31:
- switch (RegClassID) {
- case ARM::DPRRegClassID: return ARM::D31;
- case ARM::SPRRegClassID: return ARM::S31;
- }
- break;
- }
- DEBUG(errs() << "Invalid (RegClassID, RawRegister) combination\n");
- // Encoding error. Mark the builder with error code != 0.
- B->SetErr(-1);
- return 0;
-}
-
-///////////////////////////////
-// //
-// Utility Functions //
-// //
-///////////////////////////////
-
-// Extract/Decode Rd: Inst{15-12}.
-static inline unsigned decodeRd(uint32_t insn) {
- return (insn >> ARMII::RegRdShift) & ARMII::GPRRegMask;
-}
-
-// Extract/Decode Rn: Inst{19-16}.
-static inline unsigned decodeRn(uint32_t insn) {
- return (insn >> ARMII::RegRnShift) & ARMII::GPRRegMask;
-}
-
-// Extract/Decode Rm: Inst{3-0}.
-static inline unsigned decodeRm(uint32_t insn) {
- return (insn & ARMII::GPRRegMask);
-}
-
-// Extract/Decode Rs: Inst{11-8}.
-static inline unsigned decodeRs(uint32_t insn) {
- return (insn >> ARMII::RegRsShift) & ARMII::GPRRegMask;
-}
-
-static inline unsigned getCondField(uint32_t insn) {
- return (insn >> ARMII::CondShift);
-}
-
-static inline unsigned getIBit(uint32_t insn) {
- return (insn >> ARMII::I_BitShift) & 1;
-}
-
-static inline unsigned getAM3IBit(uint32_t insn) {
- return (insn >> ARMII::AM3_I_BitShift) & 1;
-}
-
-static inline unsigned getPBit(uint32_t insn) {
- return (insn >> ARMII::P_BitShift) & 1;
-}
-
-static inline unsigned getUBit(uint32_t insn) {
- return (insn >> ARMII::U_BitShift) & 1;
-}
-
-static inline unsigned getPUBits(uint32_t insn) {
- return (insn >> ARMII::U_BitShift) & 3;
-}
-
-static inline unsigned getSBit(uint32_t insn) {
- return (insn >> ARMII::S_BitShift) & 1;
-}
-
-static inline unsigned getWBit(uint32_t insn) {
- return (insn >> ARMII::W_BitShift) & 1;
-}
-
-static inline unsigned getDBit(uint32_t insn) {
- return (insn >> ARMII::D_BitShift) & 1;
-}
-
-static inline unsigned getNBit(uint32_t insn) {
- return (insn >> ARMII::N_BitShift) & 1;
-}
-
-static inline unsigned getMBit(uint32_t insn) {
- return (insn >> ARMII::M_BitShift) & 1;
-}
-
-// See A8.4 Shifts applied to a register.
-// A8.4.2 Register controlled shifts.
-//
-// getShiftOpcForBits - getShiftOpcForBits translates from the ARM encoding bits
-// into llvm enums for shift opcode. The API clients should pass in the value
-// encoded with two bits, so the assert stays to signal a wrong API usage.
-//
-// A8-12: DecodeRegShift()
-static inline ARM_AM::ShiftOpc getShiftOpcForBits(unsigned bits) {
- switch (bits) {
- default: assert(0 && "No such value"); return ARM_AM::no_shift;
- case 0: return ARM_AM::lsl;
- case 1: return ARM_AM::lsr;
- case 2: return ARM_AM::asr;
- case 3: return ARM_AM::ror;
- }
-}
-
-// See A8.4 Shifts applied to a register.
-// A8.4.1 Constant shifts.
-//
-// getImmShiftSE - getImmShiftSE translates from the raw ShiftOpc and raw Imm5
-// encodings into the intended ShiftOpc and shift amount.
-//
-// A8-11: DecodeImmShift()
-static inline void getImmShiftSE(ARM_AM::ShiftOpc &ShOp, unsigned &ShImm) {
- if (ShImm != 0)
- return;
- switch (ShOp) {
- case ARM_AM::no_shift:
- case ARM_AM::rrx:
- break;
- case ARM_AM::lsl:
- ShOp = ARM_AM::no_shift;
- break;
- case ARM_AM::lsr:
- case ARM_AM::asr:
- ShImm = 32;
- break;
- case ARM_AM::ror:
- ShOp = ARM_AM::rrx;
- break;
- }
-}
-
-// getAMSubModeForBits - getAMSubModeForBits translates from the ARM encoding
-// bits Inst{24-23} (P(24) and U(23)) into llvm enums for AMSubMode. The API
-// clients should pass in the value encoded with two bits, so the assert stays
-// to signal a wrong API usage.
-static inline ARM_AM::AMSubMode getAMSubModeForBits(unsigned bits) {
- switch (bits) {
- default: assert(0 && "No such value"); return ARM_AM::bad_am_submode;
- case 1: return ARM_AM::ia; // P=0 U=1
- case 3: return ARM_AM::ib; // P=1 U=1
- case 0: return ARM_AM::da; // P=0 U=0
- case 2: return ARM_AM::db; // P=1 U=0
- }
-}
-
-////////////////////////////////////////////
-// //
-// Disassemble function definitions //
-// //
-////////////////////////////////////////////
-
-/// There is a separate Disassemble*Frm function entry for disassembly of an ARM
-/// instr into a list of MCOperands in the appropriate order, with possible dst,
-/// followed by possible src(s).
-///
-/// The processing of the predicate, and the 'S' modifier bit, if MI modifies
-/// the CPSR, is factored into ARMBasicMCBuilder's method named
-/// TryPredicateAndSBitModifier.
-
-static bool DisassemblePseudo(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO) {
-
- assert(0 && "Unexpected pseudo instruction!");
- return false;
-}
-
-// A8.6.94 MLA
-// if d == 15 || n == 15 || m == 15 || a == 15 then UNPREDICTABLE;
-//
-// A8.6.105 MUL
-// if d == 15 || n == 15 || m == 15 then UNPREDICTABLE;
-//
-// A8.6.246 UMULL
-// if dLo == 15 || dHi == 15 || n == 15 || m == 15 then UNPREDICTABLE;
-// if dHi == dLo then UNPREDICTABLE;
-static bool BadRegsMulFrm(unsigned Opcode, uint32_t insn) {
- unsigned R19_16 = slice(insn, 19, 16);
- unsigned R15_12 = slice(insn, 15, 12);
- unsigned R11_8 = slice(insn, 11, 8);
- unsigned R3_0 = slice(insn, 3, 0);
- switch (Opcode) {
- default:
- // Did we miss an opcode?
- DEBUG(errs() << "BadRegsMulFrm: unexpected opcode!");
- return false;
- case ARM::MLA: case ARM::MLS: case ARM::SMLABB: case ARM::SMLABT:
- case ARM::SMLATB: case ARM::SMLATT: case ARM::SMLAWB: case ARM::SMLAWT:
- case ARM::SMMLA: case ARM::SMMLAR: case ARM::SMMLS: case ARM::SMMLSR:
- case ARM::USADA8:
- if (R19_16 == 15 || R15_12 == 15 || R11_8 == 15 || R3_0 == 15)
- return true;
- return false;
- case ARM::MUL: case ARM::SMMUL: case ARM::SMMULR:
- case ARM::SMULBB: case ARM::SMULBT: case ARM::SMULTB: case ARM::SMULTT:
- case ARM::SMULWB: case ARM::SMULWT: case ARM::SMUAD: case ARM::SMUADX:
- // A8.6.167 SMLAD & A8.6.172 SMLSD
- case ARM::SMLAD: case ARM::SMLADX: case ARM::SMLSD: case ARM::SMLSDX:
- case ARM::USAD8:
- if (R19_16 == 15 || R11_8 == 15 || R3_0 == 15)
- return true;
- return false;
- case ARM::SMLAL: case ARM::SMULL: case ARM::UMAAL: case ARM::UMLAL:
- case ARM::UMULL:
- case ARM::SMLALBB: case ARM::SMLALBT: case ARM::SMLALTB: case ARM::SMLALTT:
- case ARM::SMLALD: case ARM::SMLALDX: case ARM::SMLSLD: case ARM::SMLSLDX:
- if (R19_16 == 15 || R15_12 == 15 || R11_8 == 15 || R3_0 == 15)
- return true;
- if (R19_16 == R15_12)
- return true;
- return false;;
- }
-}
-
-// Multiply Instructions.
-// MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB, SMLAWT, SMMLA, SMMLAR,
-// SMMLS, SMMLAR, SMLAD, SMLADX, SMLSD, SMLSDX, and USADA8 (for convenience):
-// Rd{19-16} Rn{3-0} Rm{11-8} Ra{15-12}
-// But note that register checking for {SMLAD, SMLADX, SMLSD, SMLSDX} is
-// only for {d, n, m}.
-//
-// MUL, SMMUL, SMMULR, SMULBB, SMULBT, SMULTB, SMULTT, SMULWB, SMULWT, SMUAD,
-// SMUADX, and USAD8 (for convenience):
-// Rd{19-16} Rn{3-0} Rm{11-8}
-//
-// SMLAL, SMULL, UMAAL, UMLAL, UMULL, SMLALBB, SMLALBT, SMLALTB, SMLALTT,
-// SMLALD, SMLADLX, SMLSLD, SMLSLDX:
-// RdLo{15-12} RdHi{19-16} Rn{3-0} Rm{11-8}
-//
-// The mapping of the multiply registers to the "regular" ARM registers, where
-// there are convenience decoder functions, is:
-//
-// Inst{15-12} => Rd
-// Inst{19-16} => Rn
-// Inst{3-0} => Rm
-// Inst{11-8} => Rs
-static bool DisassembleMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- unsigned short NumDefs = MCID.getNumDefs();
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- assert(NumDefs > 0 && "NumDefs should be greater than 0 for MulFrm");
- assert(NumOps >= 3
- && OpInfo[0].RegClass == ARM::GPRRegClassID
- && OpInfo[1].RegClass == ARM::GPRRegClassID
- && OpInfo[2].RegClass == ARM::GPRRegClassID
- && "Expect three register operands");
-
- // Sanity check for the register encodings.
- if (BadRegsMulFrm(Opcode, insn))
- return false;
-
- // Instructions with two destination registers have RdLo{15-12} first.
- if (NumDefs == 2) {
- assert(NumOps >= 4 && OpInfo[3].RegClass == ARM::GPRRegClassID &&
- "Expect 4th register operand");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- ++OpIdx;
- }
-
- // The destination register: RdHi{19-16} or Rd{19-16}.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
-
- // The two src regsiters: Rn{3-0}, then Rm{11-8}.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRs(insn))));
- OpIdx += 3;
-
- // Many multiply instructions (e.g., MLA) have three src registers.
- // The third register operand is Ra{15-12}.
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- ++OpIdx;
- }
-
- return true;
-}
-
-// Helper routines for disassembly of coprocessor instructions.
-
-static bool LdStCopOpcode(unsigned Opcode) {
- if ((Opcode >= ARM::LDC2L_OFFSET && Opcode <= ARM::LDC_PRE) ||
- (Opcode >= ARM::STC2L_OFFSET && Opcode <= ARM::STC_PRE))
- return true;
- return false;
-}
-static bool CoprocessorOpcode(unsigned Opcode) {
- if (LdStCopOpcode(Opcode))
- return true;
-
- switch (Opcode) {
- default:
- return false;
- case ARM::CDP: case ARM::CDP2:
- case ARM::MCR: case ARM::MCR2: case ARM::MRC: case ARM::MRC2:
- case ARM::MCRR: case ARM::MCRR2: case ARM::MRRC: case ARM::MRRC2:
- return true;
- }
-}
-static inline unsigned GetCoprocessor(uint32_t insn) {
- return slice(insn, 11, 8);
-}
-static inline unsigned GetCopOpc1(uint32_t insn, bool CDP) {
- return CDP ? slice(insn, 23, 20) : slice(insn, 23, 21);
-}
-static inline unsigned GetCopOpc2(uint32_t insn) {
- return slice(insn, 7, 5);
-}
-static inline unsigned GetCopOpc(uint32_t insn) {
- return slice(insn, 7, 4);
-}
-// Most of the operands are in immediate forms, except Rd and Rn, which are ARM
-// core registers.
-//
-// CDP, CDP2: cop opc1 CRd CRn CRm opc2
-//
-// MCR, MCR2, MRC, MRC2: cop opc1 Rd CRn CRm opc2
-//
-// MCRR, MCRR2, MRRC, MRRc2: cop opc Rd Rn CRm
-//
-// LDC_OFFSET, LDC_PRE, LDC_POST: cop CRd Rn R0 [+/-]imm8:00
-// and friends
-// STC_OFFSET, STC_PRE, STC_POST: cop CRd Rn R0 [+/-]imm8:00
-// and friends
-// <-- addrmode2 -->
-//
-// LDC_OPTION: cop CRd Rn imm8
-// and friends
-// STC_OPTION: cop CRd Rn imm8
-// and friends
-//
-static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert(NumOps >= 4 && "Num of operands >= 4 for coprocessor instr");
-
- unsigned &OpIdx = NumOpsAdded;
- // A8.6.92
- // if coproc == '101x' then SEE "Advanced SIMD and VFP"
- // But since the special instructions have more explicit encoding bits
- // specified, if coproc == 10 or 11, we should reject it as invalid.
- unsigned coproc = GetCoprocessor(insn);
- if ((Opcode == ARM::MCR || Opcode == ARM::MCRR ||
- Opcode == ARM::MRC || Opcode == ARM::MRRC) &&
- (coproc == 10 || coproc == 11)) {
- DEBUG(errs() << "Encoding error: coproc == 10 or 11 for MCR[R]/MR[R]C\n");
- return false;
- }
-
- bool OneCopOpc = (Opcode == ARM::MCRR || Opcode == ARM::MCRR2 ||
- Opcode == ARM::MRRC || Opcode == ARM::MRRC2);
-
- // CDP/CDP2 has no GPR operand; the opc1 operand is also wider (Inst{23-20}).
- bool NoGPR = (Opcode == ARM::CDP || Opcode == ARM::CDP2);
- bool LdStCop = LdStCopOpcode(Opcode);
- bool RtOut = (Opcode == ARM::MRC || Opcode == ARM::MRC2);
-
- OpIdx = 0;
-
- if (RtOut) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- ++OpIdx;
- }
- MI.addOperand(MCOperand::CreateImm(coproc));
- ++OpIdx;
-
- if (LdStCop) {
- // Unindex if P:W = 0b00 --> _OPTION variant
- unsigned PW = getPBit(insn) << 1 | getWBit(insn);
-
- MI.addOperand(MCOperand::CreateImm(decodeRd(insn)));
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- OpIdx += 2;
-
- if (PW) {
- MI.addOperand(MCOperand::CreateReg(0));
- ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- unsigned IndexMode =
- (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
- unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, slice(insn, 7, 0) << 2,
- ARM_AM::no_shift, IndexMode);
- MI.addOperand(MCOperand::CreateImm(Offset));
- OpIdx += 2;
- } else {
- MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 0)));
- ++OpIdx;
- }
- } else {
- MI.addOperand(MCOperand::CreateImm(OneCopOpc ? GetCopOpc(insn)
- : GetCopOpc1(insn, NoGPR)));
- ++OpIdx;
-
- if (!RtOut) {
- MI.addOperand(NoGPR ? MCOperand::CreateImm(decodeRd(insn))
- : MCOperand::CreateReg(
- getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- ++OpIdx;
- }
-
- MI.addOperand(OneCopOpc ? MCOperand::CreateReg(
- getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn)))
- : MCOperand::CreateImm(decodeRn(insn)));
-
- MI.addOperand(MCOperand::CreateImm(decodeRm(insn)));
-
- OpIdx += 2;
-
- if (!OneCopOpc) {
- MI.addOperand(MCOperand::CreateImm(GetCopOpc2(insn)));
- ++OpIdx;
- }
- }
-
- return true;
-}
-
-// Branch Instructions.
-// BL: SignExtend(Imm24:'00', 32)
-// Bcc, BL_pred: SignExtend(Imm24:'00', 32) Pred0 Pred1
-// SMC: ZeroExtend(imm4, 32)
-// SVC: ZeroExtend(Imm24, 32)
-//
-// Various coprocessor instructions are assigned BrFrm arbitrarily.
-// Delegates to DisassembleCoprocessor() helper function.
-//
-// MRS/MRSsys: Rd
-// MSR/MSRsys: Rm mask=Inst{19-16}
-// BXJ: Rm
-// MSRi/MSRsysi: so_imm
-// SRSW/SRS: ldstm_mode:$amode mode_imm
-// RFEW/RFE: ldstm_mode:$amode Rn
-static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- if (CoprocessorOpcode(Opcode))
- return DisassembleCoprocessor(MI, Opcode, insn, NumOps, NumOpsAdded, B);
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- if (!OpInfo) return false;
-
- // MRS and MRSsys take one GPR reg Rd.
- if (Opcode == ARM::MRS || Opcode == ARM::MRSsys) {
- assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- NumOpsAdded = 1;
- return true;
- }
- // BXJ takes one GPR reg Rm.
- if (Opcode == ARM::BXJ) {
- assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- NumOpsAdded = 1;
- return true;
- }
- // MSR take a mask, followed by one GPR reg Rm. The mask contains the R Bit in
- // bit 4, and the special register fields in bits 3-0.
- if (Opcode == ARM::MSR) {
- assert(NumOps >= 1 && OpInfo[1].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
- MI.addOperand(MCOperand::CreateImm(slice(insn, 22, 22) << 4 /* R Bit */ |
- slice(insn, 19, 16) /* Special Reg */ ));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- NumOpsAdded = 2;
- return true;
- }
- // MSRi take a mask, followed by one so_imm operand. The mask contains the
- // R Bit in bit 4, and the special register fields in bits 3-0.
- if (Opcode == ARM::MSRi) {
- // A5.2.11 MSR (immediate), and hints & B6.1.6 MSR (immediate)
- // The hints instructions have more specific encodings, so if mask == 0,
- // we should reject this as an invalid instruction.
- if (slice(insn, 19, 16) == 0)
- return false;
- MI.addOperand(MCOperand::CreateImm(slice(insn, 22, 22) << 4 /* R Bit */ |
- slice(insn, 19, 16) /* Special Reg */ ));
- // SOImm is 4-bit rotate amount in bits 11-8 with 8-bit imm in bits 7-0.
- // A5.2.4 Rotate amount is twice the numeric value of Inst{11-8}.
- // See also ARMAddressingModes.h: getSOImmValImm() and getSOImmValRot().
- unsigned Rot = (insn >> ARMII::SoRotImmShift) & 0xF;
- unsigned Imm = insn & 0xFF;
- MI.addOperand(MCOperand::CreateImm(ARM_AM::rotr32(Imm, 2*Rot)));
- NumOpsAdded = 2;
- return true;
- }
- if (Opcode == ARM::SRSW || Opcode == ARM::SRS ||
- Opcode == ARM::RFEW || Opcode == ARM::RFE) {
- ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn));
- MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode)));
-
- if (Opcode == ARM::SRSW || Opcode == ARM::SRS)
- MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));
- else
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- NumOpsAdded = 3;
- return true;
- }
-
- assert((Opcode == ARM::Bcc || Opcode == ARM::BL || Opcode == ARM::BL_pred
- || Opcode == ARM::SMC || Opcode == ARM::SVC) &&
- "Unexpected Opcode");
-
- assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Imm operand expected");
-
- int Imm32 = 0;
- if (Opcode == ARM::SMC) {
- // ZeroExtend(imm4, 32) where imm24 = Inst{3-0}.
- Imm32 = slice(insn, 3, 0);
- } else if (Opcode == ARM::SVC) {
- // ZeroExtend(imm24, 32) where imm24 = Inst{23-0}.
- Imm32 = slice(insn, 23, 0);
- } else {
- // SignExtend(imm24:'00', 32) where imm24 = Inst{23-0}.
- unsigned Imm26 = slice(insn, 23, 0) << 2;
- //Imm32 = signextend<signed int, 26>(Imm26);
- Imm32 = SignExtend32<26>(Imm26);
- }
-
- MI.addOperand(MCOperand::CreateImm(Imm32));
- NumOpsAdded = 1;
-
- return true;
-}
-
-// Misc. Branch Instructions.
-// BX_RET, MOVPCLR
-// BLX, BLX_pred, BX, BX_pred
-// BLXi
-static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- if (!OpInfo) return false;
-
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- // BX_RET and MOVPCLR have only two predicate operands; do an early return.
- if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR)
- return true;
-
- // BLX and BX take one GPR reg.
- if (Opcode == ARM::BLX || Opcode == ARM::BLX_pred ||
- Opcode == ARM::BX || Opcode == ARM::BX_pred) {
- assert(NumOps >= 1 && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- OpIdx = 1;
- return true;
- }
-
- // BLXi takes imm32 (the PC offset).
- if (Opcode == ARM::BLXi) {
- assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Imm operand expected");
- // SignExtend(imm24:H:'0', 32) where imm24 = Inst{23-0} and H = Inst{24}.
- unsigned Imm26 = slice(insn, 23, 0) << 2 | slice(insn, 24, 24) << 1;
- int Imm32 = SignExtend32<26>(Imm26);
- MI.addOperand(MCOperand::CreateImm(Imm32));
- OpIdx = 1;
- return true;
- }
-
- return false;
-}
-
-static inline bool getBFCInvMask(uint32_t insn, uint32_t &mask) {
- uint32_t lsb = slice(insn, 11, 7);
- uint32_t msb = slice(insn, 20, 16);
- uint32_t Val = 0;
- if (msb < lsb) {
- DEBUG(errs() << "Encoding error: msb < lsb\n");
- return false;
- }
-
- for (uint32_t i = lsb; i <= msb; ++i)
- Val |= (1 << i);
- mask = ~Val;
- return true;
-}
-
-// Standard data-processing instructions allow PC as a register specifier,
-// but we should reject other DPFrm instructions with PC as registers.
-static bool BadRegsDPFrm(unsigned Opcode, uint32_t insn) {
- switch (Opcode) {
- default:
- // Did we miss an opcode?
- if (decodeRd(insn) == 15 || decodeRn(insn) == 15 || decodeRm(insn) == 15) {
- DEBUG(errs() << "DPFrm with bad reg specifier(s)\n");
- return true;
- }
- case ARM::ADCrr: case ARM::ADDSrr: case ARM::ADDrr: case ARM::ANDrr:
- case ARM::BICrr: case ARM::CMNzrr: case ARM::CMPrr: case ARM::EORrr:
- case ARM::ORRrr: case ARM::RSBrr: case ARM::RSCrr: case ARM::SBCrr:
- case ARM::SUBSrr: case ARM::SUBrr: case ARM::TEQrr: case ARM::TSTrr:
- return false;
- }
-}
-
-// A major complication is the fact that some of the saturating add/subtract
-// operations have Rd Rm Rn, instead of the "normal" Rd Rn Rm.
-// They are QADD, QDADD, QDSUB, and QSUB.
-static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- unsigned short NumDefs = MCID.getNumDefs();
- bool isUnary = isUnaryDP(MCID.TSFlags);
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- // Disassemble register def if there is one.
- if (NumDefs && (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID)) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- ++OpIdx;
- }
-
- // Now disassemble the src operands.
- if (OpIdx >= NumOps)
- return false;
-
- // Special-case handling of BFC/BFI/SBFX/UBFX.
- if (Opcode == ARM::BFC || Opcode == ARM::BFI) {
- // A8.6.17 BFC & A8.6.18 BFI
- // Sanity check Rd.
- if (decodeRd(insn) == 15)
- return false;
- MI.addOperand(MCOperand::CreateReg(0));
- if (Opcode == ARM::BFI) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- ++OpIdx;
- }
- uint32_t mask = 0;
- if (!getBFCInvMask(insn, mask))
- return false;
-
- MI.addOperand(MCOperand::CreateImm(mask));
- OpIdx += 2;
- return true;
- }
- if (Opcode == ARM::SBFX || Opcode == ARM::UBFX) {
- // Sanity check Rd and Rm.
- if (decodeRd(insn) == 15 || decodeRm(insn) == 15)
- return false;
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 7)));
- MI.addOperand(MCOperand::CreateImm(slice(insn, 20, 16) + 1));
- OpIdx += 3;
- return true;
- }
-
- bool RmRn = (Opcode == ARM::QADD || Opcode == ARM::QDADD ||
- Opcode == ARM::QDSUB || Opcode == ARM::QSUB);
-
- // BinaryDP has an Rn operand.
- if (!isUnary) {
- assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, ARM::GPRRegClassID,
- RmRn ? decodeRm(insn) : decodeRn(insn))));
- ++OpIdx;
- }
-
- // If this is a two-address operand, skip it, e.g., MOVCCr operand 1.
- if (isUnary && (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)) {
- MI.addOperand(MCOperand::CreateReg(0));
- ++OpIdx;
- }
-
- // Now disassemble operand 2.
- if (OpIdx >= NumOps)
- return false;
-
- if (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) {
- // We have a reg/reg form.
- // Assert disabled because saturating operations, e.g., A8.6.127 QASX, are
- // routed here as well.
- // assert(getIBit(insn) == 0 && "I_Bit != '0' reg/reg form");
- if (BadRegsDPFrm(Opcode, insn))
- return false;
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, ARM::GPRRegClassID,
- RmRn? decodeRn(insn) : decodeRm(insn))));
- ++OpIdx;
- } else if (Opcode == ARM::MOVi16 || Opcode == ARM::MOVTi16) {
- // These two instructions don't allow d as 15.
- if (decodeRd(insn) == 15)
- return false;
- // We have an imm16 = imm4:imm12 (imm4=Inst{19:16}, imm12 = Inst{11:0}).
- assert(getIBit(insn) == 1 && "I_Bit != '1' reg/imm form");
- unsigned Imm16 = slice(insn, 19, 16) << 12 | slice(insn, 11, 0);
- if (!B->tryAddingSymbolicOperand(Imm16, 4, MI))
- MI.addOperand(MCOperand::CreateImm(Imm16));
- ++OpIdx;
- } else {
- // We have a reg/imm form.
- // SOImm is 4-bit rotate amount in bits 11-8 with 8-bit imm in bits 7-0.
- // A5.2.4 Rotate amount is twice the numeric value of Inst{11-8}.
- // See also ARMAddressingModes.h: getSOImmValImm() and getSOImmValRot().
- assert(getIBit(insn) == 1 && "I_Bit != '1' reg/imm form");
- unsigned Rot = (insn >> ARMII::SoRotImmShift) & 0xF;
- unsigned Imm = insn & 0xFF;
- MI.addOperand(MCOperand::CreateImm(ARM_AM::rotr32(Imm, 2*Rot)));
- ++OpIdx;
- }
-
- return true;
-}
-
-static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- unsigned short NumDefs = MCID.getNumDefs();
- bool isUnary = isUnaryDP(MCID.TSFlags);
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- // Disassemble register def if there is one.
- if (NumDefs && (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID)) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- ++OpIdx;
- }
-
- // Disassemble the src operands.
- if (OpIdx >= NumOps)
- return false;
-
- // BinaryDP has an Rn operand.
- if (!isUnary) {
- assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- ++OpIdx;
- }
-
- // If this is a two-address operand, skip it, e.g., MOVCCs operand 1.
- if (isUnary && (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)) {
- MI.addOperand(MCOperand::CreateReg(0));
- ++OpIdx;
- }
-
- // Disassemble operand 2, which consists of three components.
- if (OpIdx + 2 >= NumOps)
- return false;
-
- assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) &&
- (OpInfo[OpIdx+1].RegClass == ARM::GPRRegClassID) &&
- (OpInfo[OpIdx+2].RegClass < 0) &&
- "Expect 3 reg operands");
-
- // Register-controlled shifts have Inst{7} = 0 and Inst{4} = 1.
- unsigned Rs = slice(insn, 4, 4);
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- if (Rs) {
- // If Inst{7} != 0, we should reject this insn as an invalid encoding.
- if (slice(insn, 7, 7))
- return false;
-
- // A8.6.3 ADC (register-shifted register)
- // if d == 15 || n == 15 || m == 15 || s == 15 then UNPREDICTABLE;
- //
- // This also accounts for shift instructions (register) where, fortunately,
- // Inst{19-16} = 0b0000.
- // A8.6.89 LSL (register)
- // if d == 15 || n == 15 || m == 15 then UNPREDICTABLE;
- if (decodeRd(insn) == 15 || decodeRn(insn) == 15 ||
- decodeRm(insn) == 15 || decodeRs(insn) == 15)
- return false;
-
- // Register-controlled shifts: [Rm, Rs, shift].
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRs(insn))));
- // Inst{6-5} encodes the shift opcode.
- ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
- MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, 0)));
- } else {
- // Constant shifts: [Rm, reg0, shift_imm].
- MI.addOperand(MCOperand::CreateReg(0)); // NoRegister
- // Inst{6-5} encodes the shift opcode.
- ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
- // Inst{11-7} encodes the imm5 shift amount.
- unsigned ShImm = slice(insn, 11, 7);
-
- // A8.4.1. Possible rrx or shift amount of 32...
- getImmShiftSE(ShOp, ShImm);
- MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShImm)));
- }
- OpIdx += 3;
-
- return true;
-}
-
-static bool BadRegsLdStFrm(unsigned Opcode, uint32_t insn, bool Store, bool WBack,
- bool Imm) {
- const StringRef Name = ARMInsts[Opcode].Name;
- unsigned Rt = decodeRd(insn);
- unsigned Rn = decodeRn(insn);
- unsigned Rm = decodeRm(insn);
- unsigned P = getPBit(insn);
- unsigned W = getWBit(insn);
-
- if (Store) {
- // Only STR (immediate, register) allows PC as the source.
- if (Name.startswith("STRB") && Rt == 15) {
- DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n");
- return true;
- }
- if (WBack && (Rn == 15 || Rn == Rt)) {
- DEBUG(errs() << "if wback && (n == 15 || n == t) then UNPREDICTABLE\n");
- return true;
- }
- if (!Imm && Rm == 15) {
- DEBUG(errs() << "if m == 15 then UNPREDICTABLE\n");
- return true;
- }
- } else {
- // Only LDR (immediate, register) allows PC as the destination.
- if (Name.startswith("LDRB") && Rt == 15) {
- DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n");
- return true;
- }
- if (Imm) {
- // Immediate
- if (Rn == 15) {
- // The literal form must be in offset mode; it's an encoding error
- // otherwise.
- if (!(P == 1 && W == 0)) {
- DEBUG(errs() << "Ld literal form with !(P == 1 && W == 0)\n");
- return true;
- }
- // LDRB (literal) does not allow PC as the destination.
- if (Opcode != ARM::LDRi12 && Rt == 15) {
- DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n");
- return true;
- }
- } else {
- // Write back while Rn == Rt does not make sense.
- if (WBack && (Rn == Rt)) {
- DEBUG(errs() << "if wback && n == t then UNPREDICTABLE\n");
- return true;
- }
- }
- } else {
- // Register
- if (Rm == 15) {
- DEBUG(errs() << "if m == 15 then UNPREDICTABLE\n");
- return true;
- }
- if (WBack && (Rn == 15 || Rn == Rt)) {
- DEBUG(errs() << "if wback && (n == 15 || n == t) then UNPREDICTABLE\n");
- return true;
- }
- }
- }
- return false;
-}
-
-static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- bool isPrePost = isPrePostLdSt(MCID.TSFlags);
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- if (!OpInfo) return false;
-
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- assert(((!isStore && MCID.getNumDefs() > 0) ||
- (isStore && (MCID.getNumDefs() == 0 || isPrePost)))
- && "Invalid arguments");
-
- // Operand 0 of a pre- and post-indexed store is the address base writeback.
- if (isPrePost && isStore) {
- assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- ++OpIdx;
- }
-
- // Disassemble the dst/src operand.
- if (OpIdx >= NumOps)
- return false;
-
- assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- ++OpIdx;
-
- // After dst of a pre- and post-indexed load is the address base writeback.
- if (isPrePost && !isStore) {
- assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- ++OpIdx;
- }
-
- // Disassemble the base operand.
- if (OpIdx >= NumOps)
- return false;
-
- assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
- assert((!isPrePost || (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1))
- && "Index mode or tied_to operand expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- ++OpIdx;
-
- // For reg/reg form, base reg is followed by +/- reg shop imm.
- // For immediate form, it is followed by +/- imm12.
- // See also ARMAddressingModes.h (Addressing Mode #2).
- if (OpIdx + 1 >= NumOps)
- return false;
-
- if (BadRegsLdStFrm(Opcode, insn, isStore, isPrePost, getIBit(insn)==0))
- return false;
-
- ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
- unsigned IndexMode =
- (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
- if (getIBit(insn) == 0) {
- // For pre- and post-indexed case, add a reg0 operand (Addressing Mode #2).
- // Otherwise, skip the reg operand since for addrmode_imm12, Rn has already
- // been populated.
- if (isPrePost) {
- MI.addOperand(MCOperand::CreateReg(0));
- OpIdx += 1;
- }
-
- unsigned Imm12 = slice(insn, 11, 0);
- if (Opcode == ARM::LDRBi12 || Opcode == ARM::LDRi12 ||
- Opcode == ARM::STRBi12 || Opcode == ARM::STRi12) {
- // Disassemble the 12-bit immediate offset, which is the second operand in
- // $addrmode_imm12 => (ops GPR:$base, i32imm:$offsimm).
- int Offset = AddrOpcode == ARM_AM::add ? 1 * Imm12 : -1 * Imm12;
- MI.addOperand(MCOperand::CreateImm(Offset));
- } else {
- // Disassemble the 12-bit immediate offset, which is the second operand in
- // $am2offset => (ops GPR, i32imm).
- unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, Imm12, ARM_AM::no_shift,
- IndexMode);
- MI.addOperand(MCOperand::CreateImm(Offset));
- }
- OpIdx += 1;
- } else {
- // If Inst{25} = 1 and Inst{4} != 0, we should reject this as invalid.
- if (slice(insn,4,4) == 1)
- return false;
-
- // Disassemble the offset reg (Rm), shift type, and immediate shift length.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- // Inst{6-5} encodes the shift opcode.
- ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
- // Inst{11-7} encodes the imm5 shift amount.
- unsigned ShImm = slice(insn, 11, 7);
-
- // A8.4.1. Possible rrx or shift amount of 32...
- getImmShiftSE(ShOp, ShImm);
- MI.addOperand(MCOperand::CreateImm(
- ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp, IndexMode)));
- OpIdx += 2;
- }
-
- return true;
-}
-
-static bool DisassembleLdFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
- return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false, B);
-}
-
-static bool DisassembleStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
- return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true, B);
-}
-
-static bool HasDualReg(unsigned Opcode) {
- switch (Opcode) {
- default:
- return false;
- case ARM::LDRD: case ARM::LDRD_PRE: case ARM::LDRD_POST:
- case ARM::STRD: case ARM::STRD_PRE: case ARM::STRD_POST:
- return true;
- }
-}
-
-static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- bool isPrePost = isPrePostLdSt(MCID.TSFlags);
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- if (!OpInfo) return false;
-
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- assert(((!isStore && MCID.getNumDefs() > 0) ||
- (isStore && (MCID.getNumDefs() == 0 || isPrePost)))
- && "Invalid arguments");
-
- // Operand 0 of a pre- and post-indexed store is the address base writeback.
- if (isPrePost && isStore) {
- assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- ++OpIdx;
- }
-
- // Disassemble the dst/src operand.
- if (OpIdx >= NumOps)
- return false;
-
- assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- ++OpIdx;
-
- // Fill in LDRD and STRD's second operand Rt operand.
- if (HasDualReg(Opcode)) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn) + 1)));
- ++OpIdx;
- }
-
- // After dst of a pre- and post-indexed load is the address base writeback.
- if (isPrePost && !isStore) {
- assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- ++OpIdx;
- }
-
- // Disassemble the base operand.
- if (OpIdx >= NumOps)
- return false;
-
- assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
- assert((!isPrePost || (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1))
- && "Offset mode or tied_to operand expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- ++OpIdx;
-
- // For reg/reg form, base reg is followed by +/- reg.
- // For immediate form, it is followed by +/- imm8.
- // See also ARMAddressingModes.h (Addressing Mode #3).
- if (OpIdx + 1 >= NumOps)
- return false;
-
- assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) &&
- (OpInfo[OpIdx+1].RegClass < 0) &&
- "Expect 1 reg operand followed by 1 imm operand");
-
- ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
- unsigned IndexMode =
- (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
- if (getAM3IBit(insn) == 1) {
- MI.addOperand(MCOperand::CreateReg(0));
-
- // Disassemble the 8-bit immediate offset.
- unsigned Imm4H = (insn >> ARMII::ImmHiShift) & 0xF;
- unsigned Imm4L = insn & 0xF;
- unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, (Imm4H << 4) | Imm4L,
- IndexMode);
- MI.addOperand(MCOperand::CreateImm(Offset));
- } else {
- // Disassemble the offset reg (Rm).
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, 0, IndexMode);
- MI.addOperand(MCOperand::CreateImm(Offset));
- }
- OpIdx += 2;
-
- return true;
-}
-
-static bool DisassembleLdMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
- return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false,
- B);
-}
-
-static bool DisassembleStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
- return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true, B);
-}
-
-// The algorithm for disassembly of LdStMulFrm is different from others because
-// it explicitly populates the two predicate operands after the base register.
-// After that, we need to populate the reglist with each affected register
-// encoded as an MCOperand.
-static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert(NumOps >= 4 && "LdStMulFrm expects NumOps >= 4");
- NumOpsAdded = 0;
-
- unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
-
- // Writeback to base, if necessary.
- if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::STMIA_UPD ||
- Opcode == ARM::LDMDA_UPD || Opcode == ARM::STMDA_UPD ||
- Opcode == ARM::LDMDB_UPD || Opcode == ARM::STMDB_UPD ||
- Opcode == ARM::LDMIB_UPD || Opcode == ARM::STMIB_UPD) {
- MI.addOperand(MCOperand::CreateReg(Base));
- ++NumOpsAdded;
- }
-
- // Add the base register operand.
- MI.addOperand(MCOperand::CreateReg(Base));
-
- // Handling the two predicate operands before the reglist.
- int64_t CondVal = getCondField(insn);
- if (CondVal == 0xF)
- return false;
- MI.addOperand(MCOperand::CreateImm(CondVal));
- MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
-
- NumOpsAdded += 3;
-
- // Fill the variadic part of reglist.
- unsigned RegListBits = insn & ((1 << 16) - 1);
- for (unsigned i = 0; i < 16; ++i) {
- if ((RegListBits >> i) & 1) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- i)));
- ++NumOpsAdded;
- }
- }
-
- return true;
-}
-
-// LDREX, LDREXB, LDREXH: Rd Rn
-// LDREXD: Rd Rd+1 Rn
-// STREX, STREXB, STREXH: Rd Rm Rn
-// STREXD: Rd Rm Rm+1 Rn
-//
-// SWP, SWPB: Rd Rm Rn
-static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- if (!OpInfo) return false;
-
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- assert(NumOps >= 2
- && OpInfo[0].RegClass == ARM::GPRRegClassID
- && OpInfo[1].RegClass == ARM::GPRRegClassID
- && "Expect 2 reg operands");
-
- bool isStore = slice(insn, 20, 20) == 0;
- bool isDW = (Opcode == ARM::LDREXD || Opcode == ARM::STREXD);
-
- // Add the destination operand.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- ++OpIdx;
-
- // Store register Exclusive needs a source operand.
- if (isStore) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- ++OpIdx;
-
- if (isDW) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn)+1)));
- ++OpIdx;
- }
- } else if (isDW) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn)+1)));
- ++OpIdx;
- }
-
- // Finally add the pointer operand.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- ++OpIdx;
-
- return true;
-}
-
-// Misc. Arithmetic Instructions.
-// CLZ: Rd Rm
-// PKHBT, PKHTB: Rd Rn Rm , LSL/ASR #imm5
-// RBIT, REV, REV16, REVSH: Rd Rm
-static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- assert(NumOps >= 2
- && OpInfo[0].RegClass == ARM::GPRRegClassID
- && OpInfo[1].RegClass == ARM::GPRRegClassID
- && "Expect 2 reg operands");
-
- bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID;
-
- // Sanity check the registers, which should not be 15.
- if (decodeRd(insn) == 15 || decodeRm(insn) == 15)
- return false;
- if (ThreeReg && decodeRn(insn) == 15)
- return false;
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- ++OpIdx;
-
- if (ThreeReg) {
- assert(NumOps >= 4 && "Expect >= 4 operands");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- ++OpIdx;
- }
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- ++OpIdx;
-
- // If there is still an operand info left which is an immediate operand, add
- // an additional imm5 LSL/ASR operand.
- if (ThreeReg && OpInfo[OpIdx].RegClass < 0
- && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
- // Extract the 5-bit immediate field Inst{11-7}.
- unsigned ShiftAmt = (insn >> ARMII::ShiftShift) & 0x1F;
- ARM_AM::ShiftOpc Opc = ARM_AM::no_shift;
- if (Opcode == ARM::PKHBT)
- Opc = ARM_AM::lsl;
- else if (Opcode == ARM::PKHTB)
- Opc = ARM_AM::asr;
- getImmShiftSE(Opc, ShiftAmt);
- MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShiftAmt)));
- ++OpIdx;
- }
-
- return true;
-}
-
-/// DisassembleSatFrm - Disassemble saturate instructions:
-/// SSAT, SSAT16, USAT, and USAT16.
-static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- // A8.6.183 SSAT
- // if d == 15 || n == 15 then UNPREDICTABLE;
- if (decodeRd(insn) == 15 || decodeRm(insn) == 15)
- return false;
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- NumOpsAdded = MCID.getNumOperands() - 2; // ignore predicate operands
-
- // Disassemble register def.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
-
- unsigned Pos = slice(insn, 20, 16);
- if (Opcode == ARM::SSAT || Opcode == ARM::SSAT16)
- Pos += 1;
- MI.addOperand(MCOperand::CreateImm(Pos));
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
-
- if (NumOpsAdded == 4) {
- ARM_AM::ShiftOpc Opc = (slice(insn, 6, 6) != 0 ? ARM_AM::asr : ARM_AM::lsl);
- // Inst{11-7} encodes the imm5 shift amount.
- unsigned ShAmt = slice(insn, 11, 7);
- if (ShAmt == 0) {
- // A8.6.183. Possible ASR shift amount of 32...
- if (Opc == ARM_AM::asr)
- ShAmt = 32;
- else
- Opc = ARM_AM::no_shift;
- }
- MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt)));
- }
- return true;
-}
-
-// Extend instructions.
-// SXT* and UXT*: Rd [Rn] Rm [rot_imm].
-// The 2nd operand register is Rn and the 3rd operand regsiter is Rm for the
-// three register operand form. Otherwise, Rn=0b1111 and only Rm is used.
-static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- // A8.6.220 SXTAB
- // if d == 15 || m == 15 then UNPREDICTABLE;
- if (decodeRd(insn) == 15 || decodeRm(insn) == 15)
- return false;
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- assert(NumOps >= 2
- && OpInfo[0].RegClass == ARM::GPRRegClassID
- && OpInfo[1].RegClass == ARM::GPRRegClassID
- && "Expect 2 reg operands");
-
- bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID;
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- ++OpIdx;
-
- if (ThreeReg) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- ++OpIdx;
- }
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- ++OpIdx;
-
- // If there is still an operand info left which is an immediate operand, add
- // an additional rotate immediate operand.
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
- && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
- // Extract the 2-bit rotate field Inst{11-10}.
- unsigned rot = (insn >> ARMII::ExtRotImmShift) & 3;
- // Rotation by 8, 16, or 24 bits.
- MI.addOperand(MCOperand::CreateImm(rot << 3));
- ++OpIdx;
- }
-
- return true;
-}
-
-/////////////////////////////////////
-// //
-// Utility Functions For VFP //
-// //
-/////////////////////////////////////
-
-// Extract/Decode Dd/Sd:
-//
-// SP => d = UInt(Vd:D)
-// DP => d = UInt(D:Vd)
-static unsigned decodeVFPRd(uint32_t insn, bool isSPVFP) {
- return isSPVFP ? (decodeRd(insn) << 1 | getDBit(insn))
- : (decodeRd(insn) | getDBit(insn) << 4);
-}
-
-// Extract/Decode Dn/Sn:
-//
-// SP => n = UInt(Vn:N)
-// DP => n = UInt(N:Vn)
-static unsigned decodeVFPRn(uint32_t insn, bool isSPVFP) {
- return isSPVFP ? (decodeRn(insn) << 1 | getNBit(insn))
- : (decodeRn(insn) | getNBit(insn) << 4);
-}
-
-// Extract/Decode Dm/Sm:
-//
-// SP => m = UInt(Vm:M)
-// DP => m = UInt(M:Vm)
-static unsigned decodeVFPRm(uint32_t insn, bool isSPVFP) {
- return isSPVFP ? (decodeRm(insn) << 1 | getMBit(insn))
- : (decodeRm(insn) | getMBit(insn) << 4);
-}
-
-// A7.5.1
-static APInt VFPExpandImm(unsigned char byte, unsigned N) {
- assert(N == 32 || N == 64);
-
- uint64_t Result;
- unsigned bit6 = slice(byte, 6, 6);
- if (N == 32) {
- Result = slice(byte, 7, 7) << 31 | slice(byte, 5, 0) << 19;
- if (bit6)
- Result |= 0x1f << 25;
- else
- Result |= 0x1 << 30;
- } else {
- Result = (uint64_t)slice(byte, 7, 7) << 63 |
- (uint64_t)slice(byte, 5, 0) << 48;
- if (bit6)
- Result |= 0xffULL << 54;
- else
- Result |= 0x1ULL << 62;
- }
- return APInt(N, Result);
-}
-
-// VFP Unary Format Instructions:
-//
-// VCMP[E]ZD, VCMP[E]ZS: compares one floating-point register with zero
-// VCVTDS, VCVTSD: converts between double-precision and single-precision
-// The rest of the instructions have homogeneous [VFP]Rd and [VFP]Rm registers.
-static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert(NumOps >= 1 && "VFPUnaryFrm expects NumOps >= 1");
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- unsigned RegClass = OpInfo[OpIdx].RegClass;
- assert((RegClass == ARM::SPRRegClassID || RegClass == ARM::DPRRegClassID) &&
- "Reg operand expected");
- bool isSP = (RegClass == ARM::SPRRegClassID);
-
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClass, decodeVFPRd(insn, isSP))));
- ++OpIdx;
-
- // Early return for compare with zero instructions.
- if (Opcode == ARM::VCMPEZD || Opcode == ARM::VCMPEZS
- || Opcode == ARM::VCMPZD || Opcode == ARM::VCMPZS)
- return true;
-
- RegClass = OpInfo[OpIdx].RegClass;
- assert((RegClass == ARM::SPRRegClassID || RegClass == ARM::DPRRegClassID) &&
- "Reg operand expected");
- isSP = (RegClass == ARM::SPRRegClassID);
-
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClass, decodeVFPRm(insn, isSP))));
- ++OpIdx;
-
- return true;
-}
-
-// All the instructions have homogeneous [VFP]Rd, [VFP]Rn, and [VFP]Rm regs.
-// Some of them have operand constraints which tie the first operand in the
-// InOperandList to that of the dst. As far as asm printing is concerned, this
-// tied_to operand is simply skipped.
-static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert(NumOps >= 3 && "VFPBinaryFrm expects NumOps >= 3");
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- unsigned RegClass = OpInfo[OpIdx].RegClass;
- assert((RegClass == ARM::SPRRegClassID || RegClass == ARM::DPRRegClassID) &&
- "Reg operand expected");
- bool isSP = (RegClass == ARM::SPRRegClassID);
-
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClass, decodeVFPRd(insn, isSP))));
- ++OpIdx;
-
- // Skip tied_to operand constraint.
- if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) {
- assert(NumOps >= 4 && "Expect >=4 operands");
- MI.addOperand(MCOperand::CreateReg(0));
- ++OpIdx;
- }
-
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClass, decodeVFPRn(insn, isSP))));
- ++OpIdx;
-
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClass, decodeVFPRm(insn, isSP))));
- ++OpIdx;
-
- return true;
-}
-
-// A8.6.295 vcvt (floating-point <-> integer)
-// Int to FP: VSITOD, VSITOS, VUITOD, VUITOS
-// FP to Int: VTOSI[Z|R]D, VTOSI[Z|R]S, VTOUI[Z|R]D, VTOUI[Z|R]S
-//
-// A8.6.297 vcvt (floating-point and fixed-point)
-// Dd|Sd Dd|Sd(TIED_TO) #fbits(= 16|32 - UInt(imm4:i))
-static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert(NumOps >= 2 && "VFPConv1Frm expects NumOps >= 2");
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- if (!OpInfo) return false;
-
- bool SP = slice(insn, 8, 8) == 0; // A8.6.295 & A8.6.297
- bool fixed_point = slice(insn, 17, 17) == 1; // A8.6.297
- unsigned RegClassID = SP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
-
- if (fixed_point) {
- // A8.6.297
- assert(NumOps >= 3 && "Expect >= 3 operands");
- int size = slice(insn, 7, 7) == 0 ? 16 : 32;
- int fbits = size - (slice(insn,3,0) << 1 | slice(insn,5,5));
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClassID,
- decodeVFPRd(insn, SP))));
-
- assert(MCID.getOperandConstraint(1, MCOI::TIED_TO) != -1 &&
- "Tied to operand expected");
- MI.addOperand(MI.getOperand(0));
-
- assert(OpInfo[2].RegClass < 0 && !OpInfo[2].isPredicate() &&
- !OpInfo[2].isOptionalDef() && "Imm operand expected");
- MI.addOperand(MCOperand::CreateImm(fbits));
-
- NumOpsAdded = 3;
- } else {
- // A8.6.295
- // The Rd (destination) and Rm (source) bits have different interpretations
- // depending on their single-precisonness.
- unsigned d, m;
- if (slice(insn, 18, 18) == 1) { // to_integer operation
- d = decodeVFPRd(insn, true /* Is Single Precision */);
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, ARM::SPRRegClassID, d)));
- m = decodeVFPRm(insn, SP);
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, m)));
- } else {
- d = decodeVFPRd(insn, SP);
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, d)));
- m = decodeVFPRm(insn, true /* Is Single Precision */);
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, ARM::SPRRegClassID, m)));
- }
- NumOpsAdded = 2;
- }
-
- return true;
-}
-
-// VMOVRS - A8.6.330
-// Rt => Rd; Sn => UInt(Vn:N)
-static bool DisassembleVFPConv2Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert(NumOps >= 2 && "VFPConv2Frm expects NumOps >= 2");
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
- decodeVFPRn(insn, true))));
- NumOpsAdded = 2;
- return true;
-}
-
-// VMOVRRD - A8.6.332
-// Rt => Rd; Rt2 => Rn; Dm => UInt(M:Vm)
-//
-// VMOVRRS - A8.6.331
-// Rt => Rd; Rt2 => Rn; Sm => UInt(Vm:M); Sm1 = Sm+1
-static bool DisassembleVFPConv3Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert(NumOps >= 3 && "VFPConv3Frm expects NumOps >= 3");
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- OpIdx = 2;
-
- if (OpInfo[OpIdx].RegClass == ARM::SPRRegClassID) {
- unsigned Sm = decodeVFPRm(insn, true);
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
- Sm)));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
- Sm+1)));
- OpIdx += 2;
- } else {
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, ARM::DPRRegClassID,
- decodeVFPRm(insn, false))));
- ++OpIdx;
- }
- return true;
-}
-
-// VMOVSR - A8.6.330
-// Rt => Rd; Sn => UInt(Vn:N)
-static bool DisassembleVFPConv4Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert(NumOps >= 2 && "VFPConv4Frm expects NumOps >= 2");
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
- decodeVFPRn(insn, true))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- NumOpsAdded = 2;
- return true;
-}
-
-// VMOVDRR - A8.6.332
-// Rt => Rd; Rt2 => Rn; Dm => UInt(M:Vm)
-//
-// VMOVRRS - A8.6.331
-// Rt => Rd; Rt2 => Rn; Sm => UInt(Vm:M); Sm1 = Sm+1
-static bool DisassembleVFPConv5Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert(NumOps >= 3 && "VFPConv5Frm expects NumOps >= 3");
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- if (OpInfo[OpIdx].RegClass == ARM::SPRRegClassID) {
- unsigned Sm = decodeVFPRm(insn, true);
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
- Sm)));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
- Sm+1)));
- OpIdx += 2;
- } else {
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, ARM::DPRRegClassID,
- decodeVFPRm(insn, false))));
- ++OpIdx;
- }
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- OpIdx += 2;
- return true;
-}
-
-// VFP Load/Store Instructions.
-// VLDRD, VLDRS, VSTRD, VSTRS
-static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert(NumOps >= 3 && "VFPLdStFrm expects NumOps >= 3");
-
- bool isSPVFP = (Opcode == ARM::VLDRS || Opcode == ARM::VSTRS);
- unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
-
- // Extract Dd/Sd for operand 0.
- unsigned RegD = decodeVFPRd(insn, isSPVFP);
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, RegD)));
-
- unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
- MI.addOperand(MCOperand::CreateReg(Base));
-
- // Next comes the AM5 Opcode.
- ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
- unsigned char Imm8 = insn & 0xFF;
- MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(AddrOpcode, Imm8)));
-
- NumOpsAdded = 3;
-
- return true;
-}
-
-// VFP Load/Store Multiple Instructions.
-// We have an optional write back reg, the base, and two predicate operands.
-// It is then followed by a reglist of either DPR(s) or SPR(s).
-//
-// VLDMD[_UPD], VLDMS[_UPD], VSTMD[_UPD], VSTMS[_UPD]
-static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert(NumOps >= 4 && "VFPLdStMulFrm expects NumOps >= 4");
-
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
-
- // Writeback to base, if necessary.
- if (Opcode == ARM::VLDMDIA_UPD || Opcode == ARM::VLDMSIA_UPD ||
- Opcode == ARM::VLDMDDB_UPD || Opcode == ARM::VLDMSDB_UPD ||
- Opcode == ARM::VSTMDIA_UPD || Opcode == ARM::VSTMSIA_UPD ||
- Opcode == ARM::VSTMDDB_UPD || Opcode == ARM::VSTMSDB_UPD) {
- MI.addOperand(MCOperand::CreateReg(Base));
- ++OpIdx;
- }
-
- MI.addOperand(MCOperand::CreateReg(Base));
-
- // Handling the two predicate operands before the reglist.
- int64_t CondVal = getCondField(insn);
- if (CondVal == 0xF)
- return false;
- MI.addOperand(MCOperand::CreateImm(CondVal));
- MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
-
- OpIdx += 3;
-
- bool isSPVFP = (Opcode == ARM::VLDMSIA ||
- Opcode == ARM::VLDMSIA_UPD || Opcode == ARM::VLDMSDB_UPD ||
- Opcode == ARM::VSTMSIA ||
- Opcode == ARM::VSTMSIA_UPD || Opcode == ARM::VSTMSDB_UPD);
- unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
-
- // Extract Dd/Sd.
- unsigned RegD = decodeVFPRd(insn, isSPVFP);
-
- // Fill the variadic part of reglist.
- unsigned char Imm8 = insn & 0xFF;
- unsigned Regs = isSPVFP ? Imm8 : Imm8/2;
-
- // Apply some sanity checks before proceeding.
- if (Regs == 0 || (RegD + Regs) > 32 || (!isSPVFP && Regs > 16))
- return false;
-
- for (unsigned i = 0; i < Regs; ++i) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID,
- RegD + i)));
- ++OpIdx;
- }
-
- return true;
-}
-
-// Misc. VFP Instructions.
-// FMSTAT (vmrs with Rt=0b1111, i.e., to apsr_nzcv and no register operand)
-// FCONSTD (DPR and a VFPf64Imm operand)
-// FCONSTS (SPR and a VFPf32Imm operand)
-// VMRS/VMSR (GPR operand)
-static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- if (Opcode == ARM::FMSTAT)
- return true;
-
- assert(NumOps >= 2 && "VFPMiscFrm expects >=2 operands");
-
- unsigned RegEnum = 0;
- switch (OpInfo[0].RegClass) {
- case ARM::DPRRegClassID:
- RegEnum = getRegisterEnum(B, ARM::DPRRegClassID, decodeVFPRd(insn, false));
- break;
- case ARM::SPRRegClassID:
- RegEnum = getRegisterEnum(B, ARM::SPRRegClassID, decodeVFPRd(insn, true));
- break;
- case ARM::GPRRegClassID:
- RegEnum = getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn));
- break;
- default:
- assert(0 && "Invalid reg class id");
- return false;
- }
-
- MI.addOperand(MCOperand::CreateReg(RegEnum));
- ++OpIdx;
-
- // Extract/decode the f64/f32 immediate.
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
- && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
- // The asm syntax specifies the floating point value, not the 8-bit literal.
- APInt immRaw = VFPExpandImm(slice(insn,19,16) << 4 | slice(insn, 3, 0),
- Opcode == ARM::FCONSTD ? 64 : 32);
- APFloat immFP = APFloat(immRaw, true);
- double imm = Opcode == ARM::FCONSTD ? immFP.convertToDouble() :
- immFP.convertToFloat();
- MI.addOperand(MCOperand::CreateFPImm(imm));
-
- ++OpIdx;
- }
-
- return true;
-}
-
-// DisassembleThumbFrm() is defined in ThumbDisassemblerCore.h file.
-#include "ThumbDisassemblerCore.h"
-
-/////////////////////////////////////////////////////
-// //
-// Utility Functions For ARM Advanced SIMD //
-// //
-/////////////////////////////////////////////////////
-
-// The following NEON namings are based on A8.6.266 VABA, VABAL. Notice that
-// A8.6.303 VDUP (ARM core register)'s D/Vd pair is the N/Vn pair of VABA/VABAL.
-
-// A7.3 Register encoding
-
-// Extract/Decode NEON D/Vd:
-//
-// Note that for quadword, Qd = UInt(D:Vd<3:1>) = Inst{22:15-13}, whereas for
-// doubleword, Dd = UInt(D:Vd). We compensate for this difference by
-// handling it in the getRegisterEnum() utility function.
-// D = Inst{22}, Vd = Inst{15-12}
-static unsigned decodeNEONRd(uint32_t insn) {
- return ((insn >> ARMII::NEON_D_BitShift) & 1) << 4
- | ((insn >> ARMII::NEON_RegRdShift) & ARMII::NEONRegMask);
-}
-
-// Extract/Decode NEON N/Vn:
-//
-// Note that for quadword, Qn = UInt(N:Vn<3:1>) = Inst{7:19-17}, whereas for
-// doubleword, Dn = UInt(N:Vn). We compensate for this difference by
-// handling it in the getRegisterEnum() utility function.
-// N = Inst{7}, Vn = Inst{19-16}
-static unsigned decodeNEONRn(uint32_t insn) {
- return ((insn >> ARMII::NEON_N_BitShift) & 1) << 4
- | ((insn >> ARMII::NEON_RegRnShift) & ARMII::NEONRegMask);
-}
-
-// Extract/Decode NEON M/Vm:
-//
-// Note that for quadword, Qm = UInt(M:Vm<3:1>) = Inst{5:3-1}, whereas for
-// doubleword, Dm = UInt(M:Vm). We compensate for this difference by
-// handling it in the getRegisterEnum() utility function.
-// M = Inst{5}, Vm = Inst{3-0}
-static unsigned decodeNEONRm(uint32_t insn) {
- return ((insn >> ARMII::NEON_M_BitShift) & 1) << 4
- | ((insn >> ARMII::NEON_RegRmShift) & ARMII::NEONRegMask);
-}
-
-namespace {
-enum ElemSize {
- ESizeNA = 0,
- ESize8 = 8,
- ESize16 = 16,
- ESize32 = 32,
- ESize64 = 64
-};
-} // End of unnamed namespace
-
-// size field -> Inst{11-10}
-// index_align field -> Inst{7-4}
-//
-// The Lane Index interpretation depends on the Data Size:
-// 8 (encoded as size = 0b00) -> Index = index_align[3:1]
-// 16 (encoded as size = 0b01) -> Index = index_align[3:2]
-// 32 (encoded as size = 0b10) -> Index = index_align[3]
-//
-// Ref: A8.6.317 VLD4 (single 4-element structure to one lane).
-static unsigned decodeLaneIndex(uint32_t insn) {
- unsigned size = insn >> 10 & 3;
- assert((size == 0 || size == 1 || size == 2) &&
- "Encoding error: size should be either 0, 1, or 2");
-
- unsigned index_align = insn >> 4 & 0xF;
- return (index_align >> 1) >> size;
-}
-
-// imm64 = AdvSIMDExpandImm(op, cmode, i:imm3:imm4)
-// op = Inst{5}, cmode = Inst{11-8}
-// i = Inst{24} (ARM architecture)
-// imm3 = Inst{18-16}, imm4 = Inst{3-0}
-// Ref: Table A7-15 Modified immediate values for Advanced SIMD instructions.
-static uint64_t decodeN1VImm(uint32_t insn, ElemSize esize) {
- unsigned char op = (insn >> 5) & 1;
- unsigned char cmode = (insn >> 8) & 0xF;
- unsigned char Imm8 = ((insn >> 24) & 1) << 7 |
- ((insn >> 16) & 7) << 4 |
- (insn & 0xF);
- return (op << 12) | (cmode << 8) | Imm8;
-}
-
-// A8.6.339 VMUL, VMULL (by scalar)
-// ESize16 => m = Inst{2-0} (Vm<2:0>) D0-D7
-// ESize32 => m = Inst{3-0} (Vm<3:0>) D0-D15
-static unsigned decodeRestrictedDm(uint32_t insn, ElemSize esize) {
- switch (esize) {
- case ESize16:
- return insn & 7;
- case ESize32:
- return insn & 0xF;
- default:
- assert(0 && "Unreachable code!");
- return 0;
- }
-}
-
-// A8.6.339 VMUL, VMULL (by scalar)
-// ESize16 => index = Inst{5:3} (M:Vm<3>) D0-D7
-// ESize32 => index = Inst{5} (M) D0-D15
-static unsigned decodeRestrictedDmIndex(uint32_t insn, ElemSize esize) {
- switch (esize) {
- case ESize16:
- return (((insn >> 5) & 1) << 1) | ((insn >> 3) & 1);
- case ESize32:
- return (insn >> 5) & 1;
- default:
- assert(0 && "Unreachable code!");
- return 0;
- }
-}
-
-// A8.6.296 VCVT (between floating-point and fixed-point, Advanced SIMD)
-// (64 - <fbits>) is encoded as imm6, i.e., Inst{21-16}.
-static unsigned decodeVCVTFractionBits(uint32_t insn) {
- return 64 - ((insn >> 16) & 0x3F);
-}
-
-// A8.6.302 VDUP (scalar)
-// ESize8 => index = Inst{19-17}
-// ESize16 => index = Inst{19-18}
-// ESize32 => index = Inst{19}
-static unsigned decodeNVLaneDupIndex(uint32_t insn, ElemSize esize) {
- switch (esize) {
- case ESize8:
- return (insn >> 17) & 7;
- case ESize16:
- return (insn >> 18) & 3;
- case ESize32:
- return (insn >> 19) & 1;
- default:
- assert(0 && "Unspecified element size!");
- return 0;
- }
-}
-
-// A8.6.328 VMOV (ARM core register to scalar)
-// A8.6.329 VMOV (scalar to ARM core register)
-// ESize8 => index = Inst{21:6-5}
-// ESize16 => index = Inst{21:6}
-// ESize32 => index = Inst{21}
-static unsigned decodeNVLaneOpIndex(uint32_t insn, ElemSize esize) {
- switch (esize) {
- case ESize8:
- return ((insn >> 21) & 1) << 2 | ((insn >> 5) & 3);
- case ESize16:
- return ((insn >> 21) & 1) << 1 | ((insn >> 6) & 1);
- case ESize32:
- return ((insn >> 21) & 1);
- default:
- assert(0 && "Unspecified element size!");
- return 0;
- }
-}
-
-// Imm6 = Inst{21-16}, L = Inst{7}
-//
-// LeftShift == true (A8.6.367 VQSHL, A8.6.387 VSLI):
-// case L:imm6 of
-// '0001xxx' => esize = 8; shift_amount = imm6 - 8
-// '001xxxx' => esize = 16; shift_amount = imm6 - 16
-// '01xxxxx' => esize = 32; shift_amount = imm6 - 32
-// '1xxxxxx' => esize = 64; shift_amount = imm6
-//
-// LeftShift == false (A8.6.376 VRSHR, A8.6.368 VQSHRN):
-// case L:imm6 of
-// '0001xxx' => esize = 8; shift_amount = 16 - imm6
-// '001xxxx' => esize = 16; shift_amount = 32 - imm6
-// '01xxxxx' => esize = 32; shift_amount = 64 - imm6
-// '1xxxxxx' => esize = 64; shift_amount = 64 - imm6
-//
-static unsigned decodeNVSAmt(uint32_t insn, bool LeftShift) {
- ElemSize esize = ESizeNA;
- unsigned L = (insn >> 7) & 1;
- unsigned imm6 = (insn >> 16) & 0x3F;
- if (L == 0) {
- if (imm6 >> 3 == 1)
- esize = ESize8;
- else if (imm6 >> 4 == 1)
- esize = ESize16;
- else if (imm6 >> 5 == 1)
- esize = ESize32;
- else
- assert(0 && "Wrong encoding of Inst{7:21-16}!");
- } else
- esize = ESize64;
-
- if (LeftShift)
- return esize == ESize64 ? imm6 : (imm6 - esize);
- else
- return esize == ESize64 ? (esize - imm6) : (2*esize - imm6);
-}
-
-// A8.6.305 VEXT
-// Imm4 = Inst{11-8}
-static unsigned decodeN3VImm(uint32_t insn) {
- return (insn >> 8) & 0xF;
-}
-
-// VLD*
-// D[d] D[d2] ... Rn [TIED_TO Rn] align [Rm]
-// VLD*LN*
-// D[d] D[d2] ... Rn [TIED_TO Rn] align [Rm] TIED_TO ... imm(idx)
-// VST*
-// Rn [TIED_TO Rn] align [Rm] D[d] D[d2] ...
-// VST*LN*
-// Rn [TIED_TO Rn] align [Rm] D[d] D[d2] ... [imm(idx)]
-//
-// Correctly set VLD*/VST*'s TIED_TO GPR, as the asm printer needs it.
-static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, bool Store, bool DblSpaced,
- unsigned alignment, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
-
- // At least one DPR register plus addressing mode #6.
- assert(NumOps >= 3 && "Expect >= 3 operands");
-
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- // We have homogeneous NEON registers for Load/Store.
- unsigned RegClass = 0;
-
- // Double-spaced registers have increments of 2.
- unsigned Inc = DblSpaced ? 2 : 1;
-
- unsigned Rn = decodeRn(insn);
- unsigned Rm = decodeRm(insn);
- unsigned Rd = decodeNEONRd(insn);
-
- // A7.7.1 Advanced SIMD addressing mode.
- bool WB = Rm != 15;
-
- // LLVM Addressing Mode #6.
- unsigned RmEnum = 0;
- if (WB && Rm != 13)
- RmEnum = getRegisterEnum(B, ARM::GPRRegClassID, Rm);
-
- if (Store) {
- // Consume possible WB, AddrMode6, possible increment reg, the DPR/QPR's,
- // then possible lane index.
- assert(OpIdx < NumOps && OpInfo[0].RegClass == ARM::GPRRegClassID &&
- "Reg operand expected");
-
- if (WB) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- Rn)));
- ++OpIdx;
- }
-
- assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected");
- // addrmode6 := (ops GPR:$addr, i32imm)
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- Rn)));
- MI.addOperand(MCOperand::CreateImm(alignment)); // Alignment
- OpIdx += 2;
-
- if (WB) {
- MI.addOperand(MCOperand::CreateReg(RmEnum));
- ++OpIdx;
- }
-
- assert(OpIdx < NumOps &&
- (OpInfo[OpIdx].RegClass == ARM::DPRRegClassID ||
- OpInfo[OpIdx].RegClass == ARM::QPRRegClassID) &&
- "Reg operand expected");
-
- RegClass = OpInfo[OpIdx].RegClass;
- while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClass, Rd)));
- Rd += Inc;
- ++OpIdx;
- }
-
- // Handle possible lane index.
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
- && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
- MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn)));
- ++OpIdx;
- }
-
- } else {
- // Consume the DPR/QPR's, possible WB, AddrMode6, possible incrment reg,
- // possible TIED_TO DPR/QPR's (ignored), then possible lane index.
- RegClass = OpInfo[0].RegClass;
-
- while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClass, Rd)));
- Rd += Inc;
- ++OpIdx;
- }
-
- if (WB) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- Rn)));
- ++OpIdx;
- }
-
- assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected");
- // addrmode6 := (ops GPR:$addr, i32imm)
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- Rn)));
- MI.addOperand(MCOperand::CreateImm(alignment)); // Alignment
- OpIdx += 2;
-
- if (WB) {
- MI.addOperand(MCOperand::CreateReg(RmEnum));
- ++OpIdx;
- }
-
- while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
- assert(MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1 &&
- "Tied to operand expected");
- MI.addOperand(MCOperand::CreateReg(0));
- ++OpIdx;
- }
-
- // Handle possible lane index.
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
- && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
- MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn)));
- ++OpIdx;
- }
- }
-
- // Accessing registers past the end of the NEON register file is not
- // defined.
- if (Rd > 32)
- return false;
-
- return true;
-}
-
-// A8.6.308, A8.6.311, A8.6.314, A8.6.317.
-static bool Align4OneLaneInst(unsigned elem, unsigned size,
- unsigned index_align, unsigned & alignment) {
- unsigned bits = 0;
- switch (elem) {
- default:
- return false;
- case 1:
- // A8.6.308
- if (size == 0)
- return slice(index_align, 0, 0) == 0;
- else if (size == 1) {
- bits = slice(index_align, 1, 0);
- if (bits != 0 && bits != 1)
- return false;
- if (bits == 1)
- alignment = 16;
- return true;
- } else if (size == 2) {
- bits = slice(index_align, 2, 0);
- if (bits != 0 && bits != 3)
- return false;
- if (bits == 3)
- alignment = 32;
- return true;;
- }
- return true;
- case 2:
- // A8.6.311
- if (size == 0) {
- if (slice(index_align, 0, 0) == 1)
- alignment = 16;
- return true;
- } if (size == 1) {
- if (slice(index_align, 0, 0) == 1)
- alignment = 32;
- return true;
- } else if (size == 2) {
- if (slice(index_align, 1, 1) != 0)
- return false;
- if (slice(index_align, 0, 0) == 1)
- alignment = 64;
- return true;;
- }
- return true;
- case 3:
- // A8.6.314
- if (size == 0) {
- if (slice(index_align, 0, 0) != 0)
- return false;
- return true;
- } if (size == 1) {
- if (slice(index_align, 0, 0) != 0)
- return false;
- return true;
- return true;
- } else if (size == 2) {
- if (slice(index_align, 1, 0) != 0)
- return false;
- return true;;
- }
- return true;
- case 4:
- // A8.6.317
- if (size == 0) {
- if (slice(index_align, 0, 0) == 1)
- alignment = 32;
- return true;
- } if (size == 1) {
- if (slice(index_align, 0, 0) == 1)
- alignment = 64;
- return true;
- } else if (size == 2) {
- bits = slice(index_align, 1, 0);
- if (bits == 3)
- return false;
- if (bits == 1)
- alignment = 64;
- else if (bits == 2)
- alignment = 128;
- return true;;
- }
- return true;
- }
-}
-
-// A7.7
-// If L (Inst{21}) == 0, store instructions.
-// Find out about double-spaced-ness of the Opcode and pass it on to
-// DisassembleNLdSt0().
-static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const StringRef Name = ARMInsts[Opcode].Name;
- bool DblSpaced = false;
- // 0 represents standard alignment, i.e., unaligned data access.
- unsigned alignment = 0;
-
- unsigned elem = 0; // legal values: {1, 2, 3, 4}
- if (Name.startswith("VST1") || Name.startswith("VLD1"))
- elem = 1;
-
- if (Name.startswith("VST2") || Name.startswith("VLD2"))
- elem = 2;
-
- if (Name.startswith("VST3") || Name.startswith("VLD3"))
- elem = 3;
-
- if (Name.startswith("VST4") || Name.startswith("VLD4"))
- elem = 4;
-
- if (Name.find("LN") != std::string::npos) {
- // To one lane instructions.
- // See, for example, 8.6.317 VLD4 (single 4-element structure to one lane).
-
- // Utility function takes number of elements, size, and index_align.
- if (!Align4OneLaneInst(elem,
- slice(insn, 11, 10),
- slice(insn, 7, 4),
- alignment))
- return false;
-
- // <size> == 16 && Inst{5} == 1 --> DblSpaced = true
- if (Name.endswith("16") || Name.endswith("16_UPD"))
- DblSpaced = slice(insn, 5, 5) == 1;
-
- // <size> == 32 && Inst{6} == 1 --> DblSpaced = true
- if (Name.endswith("32") || Name.endswith("32_UPD"))
- DblSpaced = slice(insn, 6, 6) == 1;
- } else if (Name.find("DUP") != std::string::npos) {
- // Single element (or structure) to all lanes.
- // Inst{9-8} encodes the number of element(s) in the structure, with:
- // 0b00 (VLD1DUP) (for this, a bit makes sense only for data size 16 and 32.
- // 0b01 (VLD2DUP)
- // 0b10 (VLD3DUP) (for this, a bit must be encoded as 0)
- // 0b11 (VLD4DUP)
- //
- // Inst{7-6} encodes the data size, with:
- // 0b00 => 8, 0b01 => 16, 0b10 => 32
- //
- // Inst{4} (the a bit) encodes the align action (0: standard alignment)
- unsigned elem = slice(insn, 9, 8) + 1;
- unsigned a = slice(insn, 4, 4);
- if (elem != 3) {
- // 0b11 is not a valid encoding for Inst{7-6}.
- if (slice(insn, 7, 6) == 3)
- return false;
- unsigned data_size = 8 << slice(insn, 7, 6);
- // For VLD1DUP, a bit makes sense only for data size of 16 and 32.
- if (a && data_size == 8)
- return false;
-
- // Now we can calculate the alignment!
- if (a)
- alignment = elem * data_size;
- } else {
- if (a) {
- // A8.6.315 VLD3 (single 3-element structure to all lanes)
- // The a bit must be encoded as 0.
- return false;
- }
- }
- } else {
- // Multiple n-element structures with type encoded as Inst{11-8}.
- // See, for example, A8.6.316 VLD4 (multiple 4-element structures).
-
- // Inst{5-4} encodes alignment.
- unsigned align = slice(insn, 5, 4);
- switch (align) {
- default:
- break;
- case 1:
- alignment = 64; break;
- case 2:
- alignment = 128; break;
- case 3:
- alignment = 256; break;
- }
-
- unsigned type = slice(insn, 11, 8);
- // Reject UNDEFINED instructions based on type and align.
- // Plus set DblSpaced flag where appropriate.
- switch (elem) {
- default:
- break;
- case 1:
- // n == 1
- // A8.6.307 & A8.6.391
- if ((type == 7 && slice(align, 1, 1) == 1) ||
- (type == 10 && align == 3) ||
- (type == 6 && slice(align, 1, 1) == 1))
- return false;
- break;
- case 2:
- // n == 2 && type == 0b1001 -> DblSpaced = true
- // A8.6.310 & A8.6.393
- if ((type == 8 || type == 9) && align == 3)
- return false;
- DblSpaced = (type == 9);
- break;
- case 3:
- // n == 3 && type == 0b0101 -> DblSpaced = true
- // A8.6.313 & A8.6.395
- if (slice(insn, 7, 6) == 3 || slice(align, 1, 1) == 1)
- return false;
- DblSpaced = (type == 5);
- break;
- case 4:
- // n == 4 && type == 0b0001 -> DblSpaced = true
- // A8.6.316 & A8.6.397
- if (slice(insn, 7, 6) == 3)
- return false;
- DblSpaced = (type == 1);
- break;
- }
- }
- return DisassembleNLdSt0(MI, Opcode, insn, NumOps, NumOpsAdded,
- slice(insn, 21, 21) == 0, DblSpaced, alignment/8, B);
-}
-
-// VMOV (immediate)
-// Qd/Dd imm
-// VBIC (immediate)
-// VORR (immediate)
-// Qd/Dd imm src(=Qd/Dd)
-static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
-
- assert(NumOps >= 2 &&
- (OpInfo[0].RegClass == ARM::DPRRegClassID ||
- OpInfo[0].RegClass == ARM::QPRRegClassID) &&
- (OpInfo[1].RegClass < 0) &&
- "Expect 1 reg operand followed by 1 imm operand");
-
- // Qd/Dd = Inst{22:15-12} => NEON Rd
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[0].RegClass,
- decodeNEONRd(insn))));
-
- ElemSize esize = ESizeNA;
- switch (Opcode) {
- case ARM::VMOVv8i8:
- case ARM::VMOVv16i8:
- esize = ESize8;
- break;
- case ARM::VMOVv4i16:
- case ARM::VMOVv8i16:
- case ARM::VMVNv4i16:
- case ARM::VMVNv8i16:
- case ARM::VBICiv4i16:
- case ARM::VBICiv8i16:
- case ARM::VORRiv4i16:
- case ARM::VORRiv8i16:
- esize = ESize16;
- break;
- case ARM::VMOVv2i32:
- case ARM::VMOVv4i32:
- case ARM::VMVNv2i32:
- case ARM::VMVNv4i32:
- case ARM::VBICiv2i32:
- case ARM::VBICiv4i32:
- case ARM::VORRiv2i32:
- case ARM::VORRiv4i32:
- esize = ESize32;
- break;
- case ARM::VMOVv1i64:
- case ARM::VMOVv2i64:
- esize = ESize64;
- break;
- default:
- assert(0 && "Unexpected opcode!");
- return false;
- }
-
- // One register and a modified immediate value.
- // Add the imm operand.
- MI.addOperand(MCOperand::CreateImm(decodeN1VImm(insn, esize)));
-
- NumOpsAdded = 2;
-
- // VBIC/VORRiv*i* variants have an extra $src = $Vd to be filled in.
- if (NumOps >= 3 &&
- (OpInfo[2].RegClass == ARM::DPRRegClassID ||
- OpInfo[2].RegClass == ARM::QPRRegClassID)) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[0].RegClass,
- decodeNEONRd(insn))));
- NumOpsAdded += 1;
- }
-
- return true;
-}
-
-namespace {
-enum N2VFlag {
- N2V_None,
- N2V_VectorDupLane,
- N2V_VectorConvert_Between_Float_Fixed
-};
-} // End of unnamed namespace
-
-// Vector Convert [between floating-point and fixed-point]
-// Qd/Dd Qm/Dm [fbits]
-//
-// Vector Duplicate Lane (from scalar to all elements) Instructions.
-// VDUPLN16d, VDUPLN16q, VDUPLN32d, VDUPLN32q, VDUPLN8d, VDUPLN8q:
-// Qd/Dd Dm index
-//
-// Vector Move Long:
-// Qd Dm
-//
-// Vector Move Narrow:
-// Dd Qm
-//
-// Others
-static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, N2VFlag Flag, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opc];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
-
- assert(NumOps >= 2 &&
- (OpInfo[0].RegClass == ARM::DPRRegClassID ||
- OpInfo[0].RegClass == ARM::QPRRegClassID) &&
- (OpInfo[1].RegClass == ARM::DPRRegClassID ||
- OpInfo[1].RegClass == ARM::QPRRegClassID) &&
- "Expect >= 2 operands and first 2 as reg operands");
-
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- ElemSize esize = ESizeNA;
- if (Flag == N2V_VectorDupLane) {
- // VDUPLN has its index embedded. Its size can be inferred from the Opcode.
- assert(Opc >= ARM::VDUPLN16d && Opc <= ARM::VDUPLN8q &&
- "Unexpected Opcode");
- esize = (Opc == ARM::VDUPLN8d || Opc == ARM::VDUPLN8q) ? ESize8
- : ((Opc == ARM::VDUPLN16d || Opc == ARM::VDUPLN16q) ? ESize16
- : ESize32);
- }
-
- // Qd/Dd = Inst{22:15-12} => NEON Rd
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
- decodeNEONRd(insn))));
- ++OpIdx;
-
- // VPADAL...
- if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) {
- // TIED_TO operand.
- MI.addOperand(MCOperand::CreateReg(0));
- ++OpIdx;
- }
-
- // Dm = Inst{5:3-0} => NEON Rm
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
- decodeNEONRm(insn))));
- ++OpIdx;
-
- // VZIP and others have two TIED_TO reg operands.
- int Idx;
- while (OpIdx < NumOps &&
- (Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
- // Add TIED_TO operand.
- MI.addOperand(MI.getOperand(Idx));
- ++OpIdx;
- }
-
- // Add the imm operand, if required.
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
- && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
-
- unsigned imm = 0xFFFFFFFF;
-
- if (Flag == N2V_VectorDupLane)
- imm = decodeNVLaneDupIndex(insn, esize);
- if (Flag == N2V_VectorConvert_Between_Float_Fixed)
- imm = decodeVCVTFractionBits(insn);
-
- assert(imm != 0xFFFFFFFF && "Internal error");
- MI.addOperand(MCOperand::CreateImm(imm));
- ++OpIdx;
- }
-
- return true;
-}
-
-static bool DisassembleN2RegFrm(MCInst &MI, unsigned Opc, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded,
- N2V_None, B);
-}
-static bool DisassembleNVCVTFrm(MCInst &MI, unsigned Opc, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded,
- N2V_VectorConvert_Between_Float_Fixed, B);
-}
-static bool DisassembleNVecDupLnFrm(MCInst &MI, unsigned Opc, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded,
- N2V_VectorDupLane, B);
-}
-
-// Vector Shift [Accumulate] Instructions.
-// Qd/Dd [Qd/Dd (TIED_TO)] Qm/Dm ShiftAmt
-//
-// Vector Shift Left Long (with maximum shift count) Instructions.
-// VSHLLi16, VSHLLi32, VSHLLi8: Qd Dm imm (== size)
-//
-static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, bool LeftShift, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
-
- assert(NumOps >= 3 &&
- (OpInfo[0].RegClass == ARM::DPRRegClassID ||
- OpInfo[0].RegClass == ARM::QPRRegClassID) &&
- (OpInfo[1].RegClass == ARM::DPRRegClassID ||
- OpInfo[1].RegClass == ARM::QPRRegClassID) &&
- "Expect >= 3 operands and first 2 as reg operands");
-
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- // Qd/Dd = Inst{22:15-12} => NEON Rd
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
- decodeNEONRd(insn))));
- ++OpIdx;
-
- if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) {
- // TIED_TO operand.
- MI.addOperand(MCOperand::CreateReg(0));
- ++OpIdx;
- }
-
- assert((OpInfo[OpIdx].RegClass == ARM::DPRRegClassID ||
- OpInfo[OpIdx].RegClass == ARM::QPRRegClassID) &&
- "Reg operand expected");
-
- // Qm/Dm = Inst{5:3-0} => NEON Rm
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
- decodeNEONRm(insn))));
- ++OpIdx;
-
- assert(OpInfo[OpIdx].RegClass < 0 && "Imm operand expected");
-
- // Add the imm operand.
-
- // VSHLL has maximum shift count as the imm, inferred from its size.
- unsigned Imm;
- switch (Opcode) {
- default:
- Imm = decodeNVSAmt(insn, LeftShift);
- break;
- case ARM::VSHLLi8:
- Imm = 8;
- break;
- case ARM::VSHLLi16:
- Imm = 16;
- break;
- case ARM::VSHLLi32:
- Imm = 32;
- break;
- }
- MI.addOperand(MCOperand::CreateImm(Imm));
- ++OpIdx;
-
- return true;
-}
-
-// Left shift instructions.
-static bool DisassembleN2RegVecShLFrm(MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, true,
- B);
-}
-// Right shift instructions have different shift amount interpretation.
-static bool DisassembleN2RegVecShRFrm(MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, false,
- B);
-}
-
-namespace {
-enum N3VFlag {
- N3V_None,
- N3V_VectorExtract,
- N3V_VectorShift,
- N3V_Multiply_By_Scalar
-};
-} // End of unnamed namespace
-
-// NEON Three Register Instructions with Optional Immediate Operand
-//
-// Vector Extract Instructions.
-// Qd/Dd Qn/Dn Qm/Dm imm4
-//
-// Vector Shift (Register) Instructions.
-// Qd/Dd Qm/Dm Qn/Dn (notice the order of m, n)
-//
-// Vector Multiply [Accumulate/Subtract] [Long] By Scalar Instructions.
-// Qd/Dd Qn/Dn RestrictedDm index
-//
-// Others
-static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, N3VFlag Flag, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
-
- // No checking for OpInfo[2] because of MOVDneon/MOVQ with only two regs.
- assert(NumOps >= 3 &&
- (OpInfo[0].RegClass == ARM::DPRRegClassID ||
- OpInfo[0].RegClass == ARM::QPRRegClassID) &&
- (OpInfo[1].RegClass == ARM::DPRRegClassID ||
- OpInfo[1].RegClass == ARM::QPRRegClassID) &&
- "Expect >= 3 operands and first 2 as reg operands");
-
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- bool VdVnVm = Flag == N3V_VectorShift ? false : true;
- bool IsImm4 = Flag == N3V_VectorExtract ? true : false;
- bool IsDmRestricted = Flag == N3V_Multiply_By_Scalar ? true : false;
- ElemSize esize = ESizeNA;
- if (Flag == N3V_Multiply_By_Scalar) {
- unsigned size = (insn >> 20) & 3;
- if (size == 1) esize = ESize16;
- if (size == 2) esize = ESize32;
- assert (esize == ESize16 || esize == ESize32);
- }
-
- // Qd/Dd = Inst{22:15-12} => NEON Rd
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
- decodeNEONRd(insn))));
- ++OpIdx;
-
- // VABA, VABAL, VBSLd, VBSLq, ...
- if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) {
- // TIED_TO operand.
- MI.addOperand(MCOperand::CreateReg(0));
- ++OpIdx;
- }
-
- // Dn = Inst{7:19-16} => NEON Rn
- // or
- // Dm = Inst{5:3-0} => NEON Rm
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, OpInfo[OpIdx].RegClass,
- VdVnVm ? decodeNEONRn(insn)
- : decodeNEONRm(insn))));
- ++OpIdx;
-
- // Dm = Inst{5:3-0} => NEON Rm
- // or
- // Dm is restricted to D0-D7 if size is 16, D0-D15 otherwise
- // or
- // Dn = Inst{7:19-16} => NEON Rn
- unsigned m = VdVnVm ? (IsDmRestricted ? decodeRestrictedDm(insn, esize)
- : decodeNEONRm(insn))
- : decodeNEONRn(insn);
-
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, OpInfo[OpIdx].RegClass, m)));
- ++OpIdx;
-
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
- && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
- // Add the imm operand.
- unsigned Imm = 0;
- if (IsImm4)
- Imm = decodeN3VImm(insn);
- else if (IsDmRestricted)
- Imm = decodeRestrictedDmIndex(insn, esize);
- else {
- assert(0 && "Internal error: unreachable code!");
- return false;
- }
-
- MI.addOperand(MCOperand::CreateImm(Imm));
- ++OpIdx;
- }
-
- return true;
-}
-
-static bool DisassembleN3RegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
- N3V_None, B);
-}
-static bool DisassembleN3RegVecShFrm(MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
- N3V_VectorShift, B);
-}
-static bool DisassembleNVecExtractFrm(MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
- N3V_VectorExtract, B);
-}
-static bool DisassembleNVecMulScalarFrm(MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
- N3V_Multiply_By_Scalar, B);
-}
-
-// Vector Table Lookup
-//
-// VTBL1, VTBX1: Dd [Dd(TIED_TO)] Dn Dm
-// VTBL2, VTBX2: Dd [Dd(TIED_TO)] Dn Dn+1 Dm
-// VTBL3, VTBX3: Dd [Dd(TIED_TO)] Dn Dn+1 Dn+2 Dm
-// VTBL4, VTBX4: Dd [Dd(TIED_TO)] Dn Dn+1 Dn+2 Dn+3 Dm
-static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- if (!OpInfo) return false;
-
- assert(NumOps >= 3 &&
- OpInfo[0].RegClass == ARM::DPRRegClassID &&
- OpInfo[1].RegClass == ARM::DPRRegClassID &&
- OpInfo[2].RegClass == ARM::DPRRegClassID &&
- "Expect >= 3 operands and first 3 as reg operands");
-
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- unsigned Rn = decodeNEONRn(insn);
-
- // {Dn} encoded as len = 0b00
- // {Dn Dn+1} encoded as len = 0b01
- // {Dn Dn+1 Dn+2 } encoded as len = 0b10
- // {Dn Dn+1 Dn+2 Dn+3} encoded as len = 0b11
- unsigned Len = slice(insn, 9, 8) + 1;
-
- // Dd (the destination vector)
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
- decodeNEONRd(insn))));
- ++OpIdx;
-
- // Process tied_to operand constraint.
- int Idx;
- if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
- MI.addOperand(MI.getOperand(Idx));
- ++OpIdx;
- }
-
- // Do the <list> now.
- for (unsigned i = 0; i < Len; ++i) {
- assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::DPRRegClassID &&
- "Reg operand expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
- Rn + i)));
- ++OpIdx;
- }
-
- // Dm (the index vector)
- assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::DPRRegClassID &&
- "Reg operand (index vector) expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
- decodeNEONRm(insn))));
- ++OpIdx;
-
- return true;
-}
-
-// Vector Get Lane (move scalar to ARM core register) Instructions.
-// VGETLNi32, VGETLNs16, VGETLNs8, VGETLNu16, VGETLNu8: Rt Dn index
-static bool DisassembleNGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- if (!OpInfo) return false;
-
- assert(MCID.getNumDefs() == 1 && NumOps >= 3 &&
- OpInfo[0].RegClass == ARM::GPRRegClassID &&
- OpInfo[1].RegClass == ARM::DPRRegClassID &&
- OpInfo[2].RegClass < 0 &&
- "Expect >= 3 operands with one dst operand");
-
- ElemSize esize =
- Opcode == ARM::VGETLNi32 ? ESize32
- : ((Opcode == ARM::VGETLNs16 || Opcode == ARM::VGETLNu16) ? ESize16
- : ESize8);
-
- // Rt = Inst{15-12} => ARM Rd
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
-
- // Dn = Inst{7:19-16} => NEON Rn
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
- decodeNEONRn(insn))));
-
- MI.addOperand(MCOperand::CreateImm(decodeNVLaneOpIndex(insn, esize)));
-
- NumOpsAdded = 3;
- return true;
-}
-
-// Vector Set Lane (move ARM core register to scalar) Instructions.
-// VSETLNi16, VSETLNi32, VSETLNi8: Dd Dd (TIED_TO) Rt index
-static bool DisassembleNSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- if (!OpInfo) return false;
-
- assert(MCID.getNumDefs() == 1 && NumOps >= 3 &&
- OpInfo[0].RegClass == ARM::DPRRegClassID &&
- OpInfo[1].RegClass == ARM::DPRRegClassID &&
- MCID.getOperandConstraint(1, MCOI::TIED_TO) != -1 &&
- OpInfo[2].RegClass == ARM::GPRRegClassID &&
- OpInfo[3].RegClass < 0 &&
- "Expect >= 3 operands with one dst operand");
-
- ElemSize esize =
- Opcode == ARM::VSETLNi8 ? ESize8
- : (Opcode == ARM::VSETLNi16 ? ESize16
- : ESize32);
-
- // Dd = Inst{7:19-16} => NEON Rn
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
- decodeNEONRn(insn))));
-
- // TIED_TO operand.
- MI.addOperand(MCOperand::CreateReg(0));
-
- // Rt = Inst{15-12} => ARM Rd
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
-
- MI.addOperand(MCOperand::CreateImm(decodeNVLaneOpIndex(insn, esize)));
-
- NumOpsAdded = 4;
- return true;
-}
-
-// Vector Duplicate Instructions (from ARM core register to all elements).
-// VDUP8d, VDUP16d, VDUP32d, VDUP8q, VDUP16q, VDUP32q: Qd/Dd Rt
-static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
-
- assert(NumOps >= 2 &&
- (OpInfo[0].RegClass == ARM::DPRRegClassID ||
- OpInfo[0].RegClass == ARM::QPRRegClassID) &&
- OpInfo[1].RegClass == ARM::GPRRegClassID &&
- "Expect >= 2 operands and first 2 as reg operand");
-
- unsigned RegClass = OpInfo[0].RegClass;
-
- // Qd/Dd = Inst{7:19-16} => NEON Rn
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClass,
- decodeNEONRn(insn))));
-
- // Rt = Inst{15-12} => ARM Rd
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
-
- NumOpsAdded = 2;
- return true;
-}
-
-static inline bool PreLoadOpcode(unsigned Opcode) {
- switch(Opcode) {
- case ARM::PLDi12: case ARM::PLDrs:
- case ARM::PLDWi12: case ARM::PLDWrs:
- case ARM::PLIi12: case ARM::PLIrs:
- return true;
- default:
- return false;
- }
-}
-
-static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- // Preload Data/Instruction requires either 2 or 3 operands.
- // PLDi12, PLDWi12, PLIi12: addrmode_imm12
- // PLDrs, PLDWrs, PLIrs: ldst_so_reg
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
-
- if (Opcode == ARM::PLDi12 || Opcode == ARM::PLDWi12
- || Opcode == ARM::PLIi12) {
- unsigned Imm12 = slice(insn, 11, 0);
- bool Negative = getUBit(insn) == 0;
-
- // A8.6.118 PLD (literal) PLDWi12 with Rn=PC is transformed to PLDi12.
- if (Opcode == ARM::PLDWi12 && slice(insn, 19, 16) == 0xF) {
- DEBUG(errs() << "Rn == '1111': PLDWi12 morphed to PLDi12\n");
- MI.setOpcode(ARM::PLDi12);
- }
-
- // -0 is represented specially. All other values are as normal.
- int Offset = Negative ? -1 * Imm12 : Imm12;
- if (Imm12 == 0 && Negative)
- Offset = INT32_MIN;
-
- MI.addOperand(MCOperand::CreateImm(Offset));
- NumOpsAdded = 2;
- } else {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
-
- ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
-
- // Inst{6-5} encodes the shift opcode.
- ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
- // Inst{11-7} encodes the imm5 shift amount.
- unsigned ShImm = slice(insn, 11, 7);
-
- // A8.4.1. Possible rrx or shift amount of 32...
- getImmShiftSE(ShOp, ShImm);
- MI.addOperand(MCOperand::CreateImm(
- ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp)));
- NumOpsAdded = 3;
- }
-
- return true;
-}
-
-static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- if (Opcode == ARM::DMB || Opcode == ARM::DSB || Opcode == ARM::ISB) {
- // Inst{3-0} encodes the memory barrier option for the variants.
- unsigned opt = slice(insn, 3, 0);
- switch (opt) {
- case ARM_MB::SY: case ARM_MB::ST:
- case ARM_MB::ISH: case ARM_MB::ISHST:
- case ARM_MB::NSH: case ARM_MB::NSHST:
- case ARM_MB::OSH: case ARM_MB::OSHST:
- MI.addOperand(MCOperand::CreateImm(opt));
- NumOpsAdded = 1;
- return true;
- default:
- return false;
- }
- }
-
- switch (Opcode) {
- case ARM::CLREX:
- case ARM::NOP:
- case ARM::TRAP:
- case ARM::YIELD:
- case ARM::WFE:
- case ARM::WFI:
- case ARM::SEV:
- return true;
- case ARM::SWP:
- case ARM::SWPB:
- // SWP, SWPB: Rd Rm Rn
- // Delegate to DisassembleLdStExFrm()....
- return DisassembleLdStExFrm(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- default:
- break;
- }
-
- if (Opcode == ARM::SETEND) {
- NumOpsAdded = 1;
- MI.addOperand(MCOperand::CreateImm(slice(insn, 9, 9)));
- return true;
- }
-
- // FIXME: To enable correct asm parsing and disasm of CPS we need 3 different
- // opcodes which match the same real instruction. This is needed since there's
- // no current handling of optional arguments. Fix here when a better handling
- // of optional arguments is implemented.
- if (Opcode == ARM::CPS3p) { // M = 1
- // Let's reject these impossible imod values by returning false:
- // 1. (imod=0b01)
- //
- // AsmPrinter cannot handle imod=0b00, plus (imod=0b00,M=1,iflags!=0) is an
- // invalid combination, so we just check for imod=0b00 here.
- if (slice(insn, 19, 18) == 0 || slice(insn, 19, 18) == 1)
- return false;
- MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod
- MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6))); // iflags
- MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode
- NumOpsAdded = 3;
- return true;
- }
- if (Opcode == ARM::CPS2p) { // mode = 0, M = 0
- // Let's reject these impossible imod values by returning false:
- // 1. (imod=0b00,M=0)
- // 2. (imod=0b01)
- if (slice(insn, 19, 18) == 0 || slice(insn, 19, 18) == 1)
- return false;
- MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod
- MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6))); // iflags
- NumOpsAdded = 2;
- return true;
- }
- if (Opcode == ARM::CPS1p) { // imod = 0, iflags = 0, M = 1
- MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode
- NumOpsAdded = 1;
- return true;
- }
-
- // DBG has its option specified in Inst{3-0}.
- if (Opcode == ARM::DBG) {
- MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0)));
- NumOpsAdded = 1;
- return true;
- }
-
- // BKPT takes an imm32 val equal to ZeroExtend(Inst{19-8:3-0}).
- if (Opcode == ARM::BKPT) {
- MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 8) << 4 |
- slice(insn, 3, 0)));
- NumOpsAdded = 1;
- return true;
- }
-
- if (PreLoadOpcode(Opcode))
- return DisassemblePreLoadFrm(MI, Opcode, insn, NumOps, NumOpsAdded, B);
-
- assert(0 && "Unexpected misc instruction!");
- return false;
-}
-
-/// FuncPtrs - FuncPtrs maps ARMFormat to its corresponding DisassembleFP.
-/// We divide the disassembly task into different categories, with each one
-/// corresponding to a specific instruction encoding format. There could be
-/// exceptions when handling a specific format, and that is why the Opcode is
-/// also present in the function prototype.
-static const DisassembleFP FuncPtrs[] = {
- &DisassemblePseudo,
- &DisassembleMulFrm,
- &DisassembleBrFrm,
- &DisassembleBrMiscFrm,
- &DisassembleDPFrm,
- &DisassembleDPSoRegFrm,
- &DisassembleLdFrm,
- &DisassembleStFrm,
- &DisassembleLdMiscFrm,
- &DisassembleStMiscFrm,
- &DisassembleLdStMulFrm,
- &DisassembleLdStExFrm,
- &DisassembleArithMiscFrm,
- &DisassembleSatFrm,
- &DisassembleExtFrm,
- &DisassembleVFPUnaryFrm,
- &DisassembleVFPBinaryFrm,
- &DisassembleVFPConv1Frm,
- &DisassembleVFPConv2Frm,
- &DisassembleVFPConv3Frm,
- &DisassembleVFPConv4Frm,
- &DisassembleVFPConv5Frm,
- &DisassembleVFPLdStFrm,
- &DisassembleVFPLdStMulFrm,
- &DisassembleVFPMiscFrm,
- &DisassembleThumbFrm,
- &DisassembleMiscFrm,
- &DisassembleNGetLnFrm,
- &DisassembleNSetLnFrm,
- &DisassembleNDupFrm,
-
- // VLD and VST (including one lane) Instructions.
- &DisassembleNLdSt,
-
- // A7.4.6 One register and a modified immediate value
- // 1-Register Instructions with imm.
- // LLVM only defines VMOVv instructions.
- &DisassembleN1RegModImmFrm,
-
- // 2-Register Instructions with no imm.
- &DisassembleN2RegFrm,
-
- // 2-Register Instructions with imm (vector convert float/fixed point).
- &DisassembleNVCVTFrm,
-
- // 2-Register Instructions with imm (vector dup lane).
- &DisassembleNVecDupLnFrm,
-
- // Vector Shift Left Instructions.
- &DisassembleN2RegVecShLFrm,
-
- // Vector Shift Righ Instructions, which has different interpretation of the
- // shift amount from the imm6 field.
- &DisassembleN2RegVecShRFrm,
-
- // 3-Register Data-Processing Instructions.
- &DisassembleN3RegFrm,
-
- // Vector Shift (Register) Instructions.
- // D:Vd M:Vm N:Vn (notice that M:Vm is the first operand)
- &DisassembleN3RegVecShFrm,
-
- // Vector Extract Instructions.
- &DisassembleNVecExtractFrm,
-
- // Vector [Saturating Rounding Doubling] Multiply [Accumulate/Subtract] [Long]
- // By Scalar Instructions.
- &DisassembleNVecMulScalarFrm,
-
- // Vector Table Lookup uses byte indexes in a control vector to look up byte
- // values in a table and generate a new vector.
- &DisassembleNVTBLFrm,
-
- NULL
-};
-
-/// BuildIt - BuildIt performs the build step for this ARM Basic MC Builder.
-/// The general idea is to set the Opcode for the MCInst, followed by adding
-/// the appropriate MCOperands to the MCInst. ARM Basic MC Builder delegates
-/// to the Format-specific disassemble function for disassembly, followed by
-/// TryPredicateAndSBitModifier() to do PredicateOperand and OptionalDefOperand
-/// which follow the Dst/Src Operands.
-bool ARMBasicMCBuilder::BuildIt(MCInst &MI, uint32_t insn) {
- // Stage 1 sets the Opcode.
- MI.setOpcode(Opcode);
- // If the number of operands is zero, we're done!
- if (NumOps == 0)
- return true;
-
- // Stage 2 calls the format-specific disassemble function to build the operand
- // list.
- if (Disasm == NULL)
- return false;
- unsigned NumOpsAdded = 0;
- bool OK = (*Disasm)(MI, Opcode, insn, NumOps, NumOpsAdded, this);
-
- if (!OK || this->Err != 0) return false;
- if (NumOpsAdded >= NumOps)
- return true;
-
- // Stage 3 deals with operands unaccounted for after stage 2 is finished.
- // FIXME: Should this be done selectively?
- return TryPredicateAndSBitModifier(MI, Opcode, insn, NumOps - NumOpsAdded);
-}
-
-// A8.3 Conditional execution
-// A8.3.1 Pseudocode details of conditional execution
-// Condition bits '111x' indicate the instruction is always executed.
-static uint32_t CondCode(uint32_t CondField) {
- if (CondField == 0xF)
- return ARMCC::AL;
- return CondField;
-}
-
-/// DoPredicateOperands - DoPredicateOperands process the predicate operands
-/// of some Thumb instructions which come before the reglist operands. It
-/// returns true if the two predicate operands have been processed.
-bool ARMBasicMCBuilder::DoPredicateOperands(MCInst& MI, unsigned Opcode,
- uint32_t /* insn */, unsigned short NumOpsRemaining) {
-
- assert(NumOpsRemaining > 0 && "Invalid argument");
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- unsigned Idx = MI.getNumOperands();
-
- // First, we check whether this instr specifies the PredicateOperand through
- // a pair of MCOperandInfos with isPredicate() property.
- if (NumOpsRemaining >= 2 &&
- OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() &&
- OpInfo[Idx].RegClass < 0 &&
- OpInfo[Idx+1].RegClass == ARM::CCRRegClassID)
- {
- // If we are inside an IT block, get the IT condition bits maintained via
- // ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond().
- // See also A2.5.2.
- if (InITBlock())
- MI.addOperand(MCOperand::CreateImm(GetITCond()));
- else
- MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
- MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
- return true;
- }
-
- return false;
-}
-
-/// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process
-/// the possible Predicate and SBitModifier, to build the remaining MCOperand
-/// constituents.
-bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOpsRemaining) {
-
- assert(NumOpsRemaining > 0 && "Invalid argument");
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- const std::string &Name = ARMInsts[Opcode].Name;
- unsigned Idx = MI.getNumOperands();
- uint64_t TSFlags = ARMInsts[Opcode].TSFlags;
-
- // First, we check whether this instr specifies the PredicateOperand through
- // a pair of MCOperandInfos with isPredicate() property.
- if (NumOpsRemaining >= 2 &&
- OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() &&
- OpInfo[Idx].RegClass < 0 &&
- OpInfo[Idx+1].RegClass == ARM::CCRRegClassID)
- {
- // If we are inside an IT block, get the IT condition bits maintained via
- // ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond().
- // See also A2.5.2.
- if (InITBlock())
- MI.addOperand(MCOperand::CreateImm(GetITCond()));
- else {
- if (Name.length() > 1 && Name[0] == 't') {
- // Thumb conditional branch instructions have their cond field embedded,
- // like ARM.
- //
- // A8.6.16 B
- // Check for undefined encodings.
- unsigned cond;
- if (Name == "t2Bcc") {
- if ((cond = slice(insn, 25, 22)) >= 14)
- return false;
- MI.addOperand(MCOperand::CreateImm(CondCode(cond)));
- } else if (Name == "tBcc") {
- if ((cond = slice(insn, 11, 8)) == 14)
- return false;
- MI.addOperand(MCOperand::CreateImm(CondCode(cond)));
- } else
- MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
- } else {
- // ARM instructions get their condition field from Inst{31-28}.
- // We should reject Inst{31-28} = 0b1111 as invalid encoding.
- if (!isNEONDomain(TSFlags) && getCondField(insn) == 0xF)
- return false;
- MI.addOperand(MCOperand::CreateImm(CondCode(getCondField(insn))));
- }
- }
- MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
- Idx += 2;
- NumOpsRemaining -= 2;
- }
-
- if (NumOpsRemaining == 0)
- return true;
-
- // Next, if OptionalDefOperand exists, we check whether the 'S' bit is set.
- if (OpInfo[Idx].isOptionalDef() && OpInfo[Idx].RegClass==ARM::CCRRegClassID) {
- MI.addOperand(MCOperand::CreateReg(getSBit(insn) == 1 ? ARM::CPSR : 0));
- --NumOpsRemaining;
- }
-
- if (NumOpsRemaining == 0)
- return true;
- else
- return false;
-}
-
-/// RunBuildAfterHook - RunBuildAfterHook performs operations deemed necessary
-/// after BuildIt is finished.
-bool ARMBasicMCBuilder::RunBuildAfterHook(bool Status, MCInst &MI,
- uint32_t insn) {
-
- if (!SP) return Status;
-
- if (Opcode == ARM::t2IT)
- Status = SP->InitIT(slice(insn, 7, 0)) ? Status : false;
- else if (InITBlock())
- SP->UpdateIT();
-
- return Status;
-}
-
-/// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder.
-ARMBasicMCBuilder::ARMBasicMCBuilder(unsigned opc, ARMFormat format,
- unsigned short num)
- : Opcode(opc), Format(format), NumOps(num), SP(0), Err(0) {
- unsigned Idx = (unsigned)format;
- assert(Idx < (array_lengthof(FuncPtrs) - 1) && "Unknown format");
- Disasm = FuncPtrs[Idx];
-}
-
-/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC
-/// infrastructure of an MCInst given the Opcode and Format of the instr.
-/// Return NULL if it fails to create/return a proper builder. API clients
-/// are responsible for freeing up of the allocated memory. Cacheing can be
-/// performed by the API clients to improve performance.
-ARMBasicMCBuilder *llvm::CreateMCBuilder(unsigned Opcode, ARMFormat Format) {
- // For "Unknown format", fail by returning a NULL pointer.
- if ((unsigned)Format >= (array_lengthof(FuncPtrs) - 1)) {
- DEBUG(errs() << "Unknown format\n");
- return 0;
- }
-
- return new ARMBasicMCBuilder(Opcode, Format,
- ARMInsts[Opcode].getNumOperands());
-}
-
-/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
-/// operand in place of the immediate Value in the MCInst. The immediate
-/// Value has had any PC adjustment made by the caller. If the getOpInfo()
-/// function was set as part of the setupBuilderForSymbolicDisassembly() call
-/// then that function is called to get any symbolic information at the
-/// builder's Address for this instrution. If that returns non-zero then the
-/// symbolic information it returns is used to create an MCExpr and that is
-/// added as an operand to the MCInst. This function returns true if it adds
-/// an operand to the MCInst and false otherwise.
-bool ARMBasicMCBuilder::tryAddingSymbolicOperand(uint64_t Value,
- uint64_t InstSize,
- MCInst &MI) {
- if (!GetOpInfo)
- return false;
-
- struct LLVMOpInfo1 SymbolicOp;
- SymbolicOp.Value = Value;
- if (!GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp))
- return false;
-
- const MCExpr *Add = NULL;
- if (SymbolicOp.AddSymbol.Present) {
- if (SymbolicOp.AddSymbol.Name) {
- StringRef Name(SymbolicOp.AddSymbol.Name);
- MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
- Add = MCSymbolRefExpr::Create(Sym, *Ctx);
- } else {
- Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx);
- }
- }
-
- const MCExpr *Sub = NULL;
- if (SymbolicOp.SubtractSymbol.Present) {
- if (SymbolicOp.SubtractSymbol.Name) {
- StringRef Name(SymbolicOp.SubtractSymbol.Name);
- MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
- Sub = MCSymbolRefExpr::Create(Sym, *Ctx);
- } else {
- Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx);
- }
- }
-
- const MCExpr *Off = NULL;
- if (SymbolicOp.Value != 0)
- Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx);
-
- const MCExpr *Expr;
- if (Sub) {
- const MCExpr *LHS;
- if (Add)
- LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx);
- else
- LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx);
- if (Off != 0)
- Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx);
- else
- Expr = LHS;
- } else if (Add) {
- if (Off != 0)
- Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx);
- else
- Expr = Add;
- } else {
- if (Off != 0)
- Expr = Off;
- else
- Expr = MCConstantExpr::Create(0, *Ctx);
- }
-
- if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_HI16)
- MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateUpper16(Expr, *Ctx)));
- else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_LO16)
- MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateLower16(Expr, *Ctx)));
- else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None)
- MI.addOperand(MCOperand::CreateExpr(Expr));
- else
- assert("bad SymbolicOp.VariantKind");
-
- return true;
-}
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
deleted file mode 100644
index a7ba141..0000000
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
+++ /dev/null
@@ -1,336 +0,0 @@
-//===- ARMDisassemblerCore.h - ARM disassembler helpers ---------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is part of the ARM Disassembler.
-//
-// The first part defines the enumeration type of ARM instruction format, which
-// specifies the encoding used by the instruction, as well as a helper function
-// to convert the enums to printable char strings.
-//
-// It also contains code to represent the concepts of Builder and DisassembleFP
-// to solve the problem of disassembling an ARM instr.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARMDISASSEMBLERCORE_H
-#define ARMDISASSEMBLERCORE_H
-
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm-c/Disassembler.h"
-#include "ARMBaseInstrInfo.h"
-#include "ARMRegisterInfo.h"
-#include "ARMDisassembler.h"
-
-namespace llvm {
-class MCContext;
-
-class ARMUtils {
-public:
- static const char *OpcodeName(unsigned Opcode);
-};
-
-/////////////////////////////////////////////////////
-// //
-// Enums and Utilities for ARM Instruction Format //
-// //
-/////////////////////////////////////////////////////
-
-#define ARM_FORMATS \
- ENTRY(ARM_FORMAT_PSEUDO, 0) \
- ENTRY(ARM_FORMAT_MULFRM, 1) \
- ENTRY(ARM_FORMAT_BRFRM, 2) \
- ENTRY(ARM_FORMAT_BRMISCFRM, 3) \
- ENTRY(ARM_FORMAT_DPFRM, 4) \
- ENTRY(ARM_FORMAT_DPSOREGFRM, 5) \
- ENTRY(ARM_FORMAT_LDFRM, 6) \
- ENTRY(ARM_FORMAT_STFRM, 7) \
- ENTRY(ARM_FORMAT_LDMISCFRM, 8) \
- ENTRY(ARM_FORMAT_STMISCFRM, 9) \
- ENTRY(ARM_FORMAT_LDSTMULFRM, 10) \
- ENTRY(ARM_FORMAT_LDSTEXFRM, 11) \
- ENTRY(ARM_FORMAT_ARITHMISCFRM, 12) \
- ENTRY(ARM_FORMAT_SATFRM, 13) \
- ENTRY(ARM_FORMAT_EXTFRM, 14) \
- ENTRY(ARM_FORMAT_VFPUNARYFRM, 15) \
- ENTRY(ARM_FORMAT_VFPBINARYFRM, 16) \
- ENTRY(ARM_FORMAT_VFPCONV1FRM, 17) \
- ENTRY(ARM_FORMAT_VFPCONV2FRM, 18) \
- ENTRY(ARM_FORMAT_VFPCONV3FRM, 19) \
- ENTRY(ARM_FORMAT_VFPCONV4FRM, 20) \
- ENTRY(ARM_FORMAT_VFPCONV5FRM, 21) \
- ENTRY(ARM_FORMAT_VFPLDSTFRM, 22) \
- ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 23) \
- ENTRY(ARM_FORMAT_VFPMISCFRM, 24) \
- ENTRY(ARM_FORMAT_THUMBFRM, 25) \
- ENTRY(ARM_FORMAT_MISCFRM, 26) \
- ENTRY(ARM_FORMAT_NEONGETLNFRM, 27) \
- ENTRY(ARM_FORMAT_NEONSETLNFRM, 28) \
- ENTRY(ARM_FORMAT_NEONDUPFRM, 29) \
- ENTRY(ARM_FORMAT_NLdSt, 30) \
- ENTRY(ARM_FORMAT_N1RegModImm, 31) \
- ENTRY(ARM_FORMAT_N2Reg, 32) \
- ENTRY(ARM_FORMAT_NVCVT, 33) \
- ENTRY(ARM_FORMAT_NVecDupLn, 34) \
- ENTRY(ARM_FORMAT_N2RegVecShL, 35) \
- ENTRY(ARM_FORMAT_N2RegVecShR, 36) \
- ENTRY(ARM_FORMAT_N3Reg, 37) \
- ENTRY(ARM_FORMAT_N3RegVecSh, 38) \
- ENTRY(ARM_FORMAT_NVecExtract, 39) \
- ENTRY(ARM_FORMAT_NVecMulScalar, 40) \
- ENTRY(ARM_FORMAT_NVTBL, 41)
-
-// ARM instruction format specifies the encoding used by the instruction.
-#define ENTRY(n, v) n = v,
-typedef enum {
- ARM_FORMATS
- ARM_FORMAT_NA
-} ARMFormat;
-#undef ENTRY
-
-// Converts enum to const char*.
-static const inline char *stringForARMFormat(ARMFormat form) {
-#define ENTRY(n, v) case n: return #n;
- switch(form) {
- ARM_FORMATS
- case ARM_FORMAT_NA:
- default:
- return "";
- }
-#undef ENTRY
-}
-
-/// Expands on the enum definitions from ARMBaseInstrInfo.h.
-/// They are being used by the disassembler implementation.
-namespace ARMII {
- enum {
- NEONRegMask = 15,
- GPRRegMask = 15,
- NEON_RegRdShift = 12,
- NEON_D_BitShift = 22,
- NEON_RegRnShift = 16,
- NEON_N_BitShift = 7,
- NEON_RegRmShift = 0,
- NEON_M_BitShift = 5
- };
-}
-
-/// Utility function for extracting [From, To] bits from a uint32_t.
-static inline unsigned slice(uint32_t Bits, unsigned From, unsigned To) {
- assert(From < 32 && To < 32 && From >= To);
- return (Bits >> To) & ((1 << (From - To + 1)) - 1);
-}
-
-/// Utility function for setting [From, To] bits to Val for a uint32_t.
-static inline void setSlice(unsigned &Bits, unsigned From, unsigned To,
- unsigned Val) {
- assert(From < 32 && To < 32 && From >= To);
- uint32_t Mask = ((1 << (From - To + 1)) - 1);
- Bits &= ~(Mask << To);
- Bits |= (Val & Mask) << To;
-}
-
-// Return an integer result equal to the number of bits of x that are ones.
-static inline uint32_t
-BitCount (uint64_t x)
-{
- // c accumulates the total bits set in x
- uint32_t c;
- for (c = 0; x; ++c)
- {
- x &= x - 1; // clear the least significant bit set
- }
- return c;
-}
-
-static inline bool
-BitIsSet (const uint64_t value, const uint64_t bit)
-{
- return (value & (1ull << bit)) != 0;
-}
-
-static inline bool
-BitIsClear (const uint64_t value, const uint64_t bit)
-{
- return (value & (1ull << bit)) == 0;
-}
-
-/// Various utilities for checking the target specific flags.
-
-/// A unary data processing instruction doesn't have an Rn operand.
-static inline bool isUnaryDP(uint64_t TSFlags) {
- return (TSFlags & ARMII::UnaryDP);
-}
-
-/// A NEON Domain instruction has cond field (Inst{31-28}) as 0b1111.
-static inline bool isNEONDomain(uint64_t TSFlags) {
- return (TSFlags & ARMII::DomainNEON) ||
- (TSFlags & ARMII::DomainNEONA8);
-}
-
-/// This four-bit field describes the addressing mode used.
-/// See also ARMBaseInstrInfo.h.
-static inline unsigned getAddrMode(uint64_t TSFlags) {
- return (TSFlags & ARMII::AddrModeMask);
-}
-
-/// {IndexModePre, IndexModePost}
-/// Only valid for load and store ops.
-/// See also ARMBaseInstrInfo.h.
-static inline unsigned getIndexMode(uint64_t TSFlags) {
- return (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
-}
-
-/// Pre-/post-indexed operations define an extra $base_wb in the OutOperandList.
-static inline bool isPrePostLdSt(uint64_t TSFlags) {
- return (TSFlags & ARMII::IndexModeMask) != 0;
-}
-
-// Forward declaration.
-class ARMBasicMCBuilder;
-
-// Builder Object is mostly ignored except in some Thumb disassemble functions.
-typedef ARMBasicMCBuilder *BO;
-
-/// DisassembleFP - DisassembleFP points to a function that disassembles an insn
-/// and builds the MCOperand list upon disassembly. It returns false on failure
-/// or true on success. The number of operands added is updated upon success.
-typedef bool (*DisassembleFP)(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO Builder);
-
-/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC
-/// infrastructure of an MCInst given the Opcode and Format of the instr.
-/// Return NULL if it fails to create/return a proper builder. API clients
-/// are responsible for freeing up of the allocated memory. Cacheing can be
-/// performed by the API clients to improve performance.
-extern ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format);
-
-/// ARMBasicMCBuilder - ARMBasicMCBuilder represents an ARM MCInst builder that
-/// knows how to build up the MCOperand list.
-class ARMBasicMCBuilder {
- friend ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format);
- unsigned Opcode;
- ARMFormat Format;
- unsigned short NumOps;
- DisassembleFP Disasm;
- Session *SP;
- int Err; // !=0 if the builder encounters some error condition during build.
-
-private:
- /// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder.
- ARMBasicMCBuilder(unsigned opc, ARMFormat format, unsigned short num);
-
-public:
- ARMBasicMCBuilder(ARMBasicMCBuilder &B)
- : Opcode(B.Opcode), Format(B.Format), NumOps(B.NumOps), Disasm(B.Disasm),
- SP(B.SP), GetOpInfo(0), DisInfo(0), Ctx(0) {
- Err = 0;
- }
-
- virtual ~ARMBasicMCBuilder() {}
-
- void SetSession(Session *sp) {
- SP = sp;
- }
-
- void SetErr(int ErrCode) {
- Err = ErrCode;
- }
-
- /// DoPredicateOperands - DoPredicateOperands process the predicate operands
- /// of some Thumb instructions which come before the reglist operands. It
- /// returns true if the two predicate operands have been processed.
- bool DoPredicateOperands(MCInst& MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOpsRemaning);
-
- /// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process
- /// the possible Predicate and SBitModifier, to build the remaining MCOperand
- /// constituents.
- bool TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOpsRemaning);
-
- /// InITBlock - InITBlock returns true if we are inside an IT block.
- bool InITBlock() {
- if (SP)
- return SP->ITCounter > 0;
-
- return false;
- }
-
- /// Build - Build delegates to BuildIt to perform the heavy liftling. After
- /// that, it invokes RunBuildAfterHook where some housekeepings can be done.
- virtual bool Build(MCInst &MI, uint32_t insn) {
- bool Status = BuildIt(MI, insn);
- return RunBuildAfterHook(Status, MI, insn);
- }
-
- /// BuildIt - BuildIt performs the build step for this ARM Basic MC Builder.
- /// The general idea is to set the Opcode for the MCInst, followed by adding
- /// the appropriate MCOperands to the MCInst. ARM Basic MC Builder delegates
- /// to the Format-specific disassemble function for disassembly, followed by
- /// TryPredicateAndSBitModifier() for PredicateOperand and OptionalDefOperand
- /// which follow the Dst/Src Operands.
- virtual bool BuildIt(MCInst &MI, uint32_t insn);
-
- /// RunBuildAfterHook - RunBuildAfterHook performs operations deemed necessary
- /// after BuildIt is finished.
- virtual bool RunBuildAfterHook(bool Status, MCInst &MI, uint32_t insn);
-
-private:
- /// Get condition of the current IT instruction.
- unsigned GetITCond() {
- assert(SP);
- return slice(SP->ITState, 7, 4);
- }
-
-private:
- //
- // Hooks for symbolic disassembly via the public 'C' interface.
- //
- // The function to get the symbolic information for operands.
- LLVMOpInfoCallback GetOpInfo;
- // The pointer to the block of symbolic information for above call back.
- void *DisInfo;
- // The assembly context for creating symbols and MCExprs in place of
- // immediate operands when there is symbolic information.
- MCContext *Ctx;
- // The address of the instruction being disassembled.
- uint64_t Address;
-
-public:
- void setupBuilderForSymbolicDisassembly(LLVMOpInfoCallback getOpInfo,
- void *disInfo, MCContext *ctx,
- uint64_t address) {
- GetOpInfo = getOpInfo;
- DisInfo = disInfo;
- Ctx = ctx;
- Address = address;
- }
-
- uint64_t getBuilderAddress() const { return Address; }
-
- /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
- /// operand in place of the immediate Value in the MCInst. The immediate
- /// Value has had any PC adjustment made by the caller. If the getOpInfo()
- /// function was set as part of the setupBuilderForSymbolicDisassembly() call
- /// then that function is called to get any symbolic information at the
- /// builder's Address for this instrution. If that returns non-zero then the
- /// symbolic information it returns is used to create an MCExpr and that is
- /// added as an operand to the MCInst. This function returns true if it adds
- /// an operand to the MCInst and false otherwise.
- bool tryAddingSymbolicOperand(uint64_t Value, uint64_t InstSize, MCInst &MI);
-
-};
-
-} // namespace llvm
-
-#endif
diff --git a/lib/Target/ARM/Disassembler/CMakeLists.txt b/lib/Target/ARM/Disassembler/CMakeLists.txt
index b23dd6b..da87751 100644
--- a/lib/Target/ARM/Disassembler/CMakeLists.txt
+++ b/lib/Target/ARM/Disassembler/CMakeLists.txt
@@ -2,7 +2,6 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
add_llvm_library(LLVMARMDisassembler
ARMDisassembler.cpp
- ARMDisassemblerCore.cpp
)
# workaround for hanging compilation on MSVC8, 9 and 10
if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
@@ -11,4 +10,12 @@ set_property(
PROPERTY COMPILE_FLAGS "/Od"
)
endif()
-add_dependencies(LLVMARMDisassembler ARMCodeGenTable_gen)
+add_dependencies(LLVMARMDisassembler ARMCommonTableGen)
+
+add_llvm_library_dependencies(LLVMARMDisassembler
+ LLVMARMCodeGen
+ LLVMARMDesc
+ LLVMARMInfo
+ LLVMMC
+ LLVMSupport
+ )
diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
deleted file mode 100644
index 834c6f6..0000000
--- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
+++ /dev/null
@@ -1,2459 +0,0 @@
-//===- ThumbDisassemblerCore.h - Thumb disassembler helpers -----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is part of the ARM Disassembler.
-// It contains code for disassembling a Thumb instr. It is to be included by
-// ARMDisassemblerCore.cpp because it contains the static DisassembleThumbFrm()
-// function which acts as the dispatcher to disassemble a Thumb instruction.
-//
-//===----------------------------------------------------------------------===//
-
-///////////////////////////////
-// //
-// Utility Functions //
-// //
-///////////////////////////////
-
-// Utilities for 16-bit Thumb instructions.
-/*
-15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
- [ tRt ]
- [ tRm ] [ tRn ] [ tRd ]
- D [ Rm ] [ Rd ]
-
- [ imm3]
- [ imm5 ]
- i [ imm5 ]
- [ imm7 ]
- [ imm8 ]
- [ imm11 ]
-
- [ cond ]
-*/
-
-// Extract tRt: Inst{10-8}.
-static inline unsigned getT1tRt(uint32_t insn) {
- return slice(insn, 10, 8);
-}
-
-// Extract tRm: Inst{8-6}.
-static inline unsigned getT1tRm(uint32_t insn) {
- return slice(insn, 8, 6);
-}
-
-// Extract tRn: Inst{5-3}.
-static inline unsigned getT1tRn(uint32_t insn) {
- return slice(insn, 5, 3);
-}
-
-// Extract tRd: Inst{2-0}.
-static inline unsigned getT1tRd(uint32_t insn) {
- return slice(insn, 2, 0);
-}
-
-// Extract [D:Rd]: Inst{7:2-0}.
-static inline unsigned getT1Rd(uint32_t insn) {
- return slice(insn, 7, 7) << 3 | slice(insn, 2, 0);
-}
-
-// Extract Rm: Inst{6-3}.
-static inline unsigned getT1Rm(uint32_t insn) {
- return slice(insn, 6, 3);
-}
-
-// Extract imm3: Inst{8-6}.
-static inline unsigned getT1Imm3(uint32_t insn) {
- return slice(insn, 8, 6);
-}
-
-// Extract imm5: Inst{10-6}.
-static inline unsigned getT1Imm5(uint32_t insn) {
- return slice(insn, 10, 6);
-}
-
-// Extract i:imm5: Inst{9:7-3}.
-static inline unsigned getT1Imm6(uint32_t insn) {
- return slice(insn, 9, 9) << 5 | slice(insn, 7, 3);
-}
-
-// Extract imm7: Inst{6-0}.
-static inline unsigned getT1Imm7(uint32_t insn) {
- return slice(insn, 6, 0);
-}
-
-// Extract imm8: Inst{7-0}.
-static inline unsigned getT1Imm8(uint32_t insn) {
- return slice(insn, 7, 0);
-}
-
-// Extract imm11: Inst{10-0}.
-static inline unsigned getT1Imm11(uint32_t insn) {
- return slice(insn, 10, 0);
-}
-
-// Extract cond: Inst{11-8}.
-static inline unsigned getT1Cond(uint32_t insn) {
- return slice(insn, 11, 8);
-}
-
-static inline bool IsGPR(unsigned RegClass) {
- return RegClass == ARM::GPRRegClassID || RegClass == ARM::rGPRRegClassID;
-}
-
-// Utilities for 32-bit Thumb instructions.
-
-static inline bool BadReg(uint32_t n) { return n == 13 || n == 15; }
-
-// Extract imm4: Inst{19-16}.
-static inline unsigned getImm4(uint32_t insn) {
- return slice(insn, 19, 16);
-}
-
-// Extract imm3: Inst{14-12}.
-static inline unsigned getImm3(uint32_t insn) {
- return slice(insn, 14, 12);
-}
-
-// Extract imm8: Inst{7-0}.
-static inline unsigned getImm8(uint32_t insn) {
- return slice(insn, 7, 0);
-}
-
-// A8.6.61 LDRB (immediate, Thumb) and friends
-// +/-: Inst{9}
-// imm8: Inst{7-0}
-static inline int decodeImm8(uint32_t insn) {
- int Offset = getImm8(insn);
- return slice(insn, 9, 9) ? Offset : -Offset;
-}
-
-// Extract imm12: Inst{11-0}.
-static inline unsigned getImm12(uint32_t insn) {
- return slice(insn, 11, 0);
-}
-
-// A8.6.63 LDRB (literal) and friends
-// +/-: Inst{23}
-// imm12: Inst{11-0}
-static inline int decodeImm12(uint32_t insn) {
- int Offset = getImm12(insn);
- return slice(insn, 23, 23) ? Offset : -Offset;
-}
-
-// Extract imm2: Inst{7-6}.
-static inline unsigned getImm2(uint32_t insn) {
- return slice(insn, 7, 6);
-}
-
-// For BFI, BFC, t2SBFX, and t2UBFX.
-// Extract lsb: Inst{14-12:7-6}.
-static inline unsigned getLsb(uint32_t insn) {
- return getImm3(insn) << 2 | getImm2(insn);
-}
-
-// For BFI and BFC.
-// Extract msb: Inst{4-0}.
-static inline unsigned getMsb(uint32_t insn) {
- return slice(insn, 4, 0);
-}
-
-// For t2SBFX and t2UBFX.
-// Extract widthminus1: Inst{4-0}.
-static inline unsigned getWidthMinus1(uint32_t insn) {
- return slice(insn, 4, 0);
-}
-
-// For t2ADDri12 and t2SUBri12.
-// imm12 = i:imm3:imm8;
-static inline unsigned getIImm3Imm8(uint32_t insn) {
- return slice(insn, 26, 26) << 11 | getImm3(insn) << 8 | getImm8(insn);
-}
-
-// For t2MOVi16 and t2MOVTi16.
-// imm16 = imm4:i:imm3:imm8;
-static inline unsigned getImm16(uint32_t insn) {
- return getImm4(insn) << 12 | slice(insn, 26, 26) << 11 |
- getImm3(insn) << 8 | getImm8(insn);
-}
-
-// Inst{5-4} encodes the shift type.
-static inline unsigned getShiftTypeBits(uint32_t insn) {
- return slice(insn, 5, 4);
-}
-
-// Inst{14-12}:Inst{7-6} encodes the imm5 shift amount.
-static inline unsigned getShiftAmtBits(uint32_t insn) {
- return getImm3(insn) << 2 | getImm2(insn);
-}
-
-// A8.6.17 BFC
-// Encoding T1 ARMv6T2, ARMv7
-// LLVM-specific encoding for #<lsb> and #<width>
-static inline bool getBitfieldInvMask(uint32_t insn, uint32_t &mask) {
- uint32_t lsb = getImm3(insn) << 2 | getImm2(insn);
- uint32_t msb = getMsb(insn);
- uint32_t Val = 0;
- if (msb < lsb) {
- DEBUG(errs() << "Encoding error: msb < lsb\n");
- return false;
- }
- for (uint32_t i = lsb; i <= msb; ++i)
- Val |= (1 << i);
- mask = ~Val;
- return true;
-}
-
-// A8.4 Shifts applied to a register
-// A8.4.1 Constant shifts
-// A8.4.3 Pseudocode details of instruction-specified shifts and rotates
-//
-// decodeImmShift() returns the shift amount and the the shift opcode.
-// Note that, as of Jan-06-2010, LLVM does not support rrx shifted operands yet.
-static inline unsigned decodeImmShift(unsigned bits2, unsigned imm5,
- ARM_AM::ShiftOpc &ShOp) {
-
- assert(imm5 < 32 && "Invalid imm5 argument");
- switch (bits2) {
- default: assert(0 && "No such value");
- case 0:
- ShOp = (imm5 == 0 ? ARM_AM::no_shift : ARM_AM::lsl);
- return imm5;
- case 1:
- ShOp = ARM_AM::lsr;
- return (imm5 == 0 ? 32 : imm5);
- case 2:
- ShOp = ARM_AM::asr;
- return (imm5 == 0 ? 32 : imm5);
- case 3:
- ShOp = (imm5 == 0 ? ARM_AM::rrx : ARM_AM::ror);
- return (imm5 == 0 ? 1 : imm5);
- }
-}
-
-// A6.3.2 Modified immediate constants in Thumb instructions
-//
-// ThumbExpandImm() returns the modified immediate constant given an imm12 for
-// Thumb data-processing instructions with modified immediate.
-// See also A6.3.1 Data-processing (modified immediate).
-static inline unsigned ThumbExpandImm(unsigned imm12) {
- assert(imm12 <= 0xFFF && "Invalid imm12 argument");
-
- // If the leading two bits is 0b00, the modified immediate constant is
- // obtained by splatting the low 8 bits into the first byte, every other byte,
- // or every byte of a 32-bit value.
- //
- // Otherwise, a rotate right of '1':imm12<6:0> by the amount imm12<11:7> is
- // performed.
-
- if (slice(imm12, 11, 10) == 0) {
- unsigned short control = slice(imm12, 9, 8);
- unsigned imm8 = slice(imm12, 7, 0);
- switch (control) {
- default:
- assert(0 && "No such value");
- return 0;
- case 0:
- return imm8;
- case 1:
- return imm8 << 16 | imm8;
- case 2:
- return imm8 << 24 | imm8 << 8;
- case 3:
- return imm8 << 24 | imm8 << 16 | imm8 << 8 | imm8;
- }
- } else {
- // A rotate is required.
- unsigned Val = 1 << 7 | slice(imm12, 6, 0);
- unsigned Amt = slice(imm12, 11, 7);
- return ARM_AM::rotr32(Val, Amt);
- }
-}
-
-static inline int decodeImm32_B_EncodingT3(uint32_t insn) {
- bool S = slice(insn, 26, 26);
- bool J1 = slice(insn, 13, 13);
- bool J2 = slice(insn, 11, 11);
- unsigned Imm21 = slice(insn, 21, 16) << 12 | slice(insn, 10, 0) << 1;
- if (S) Imm21 |= 1 << 20;
- if (J2) Imm21 |= 1 << 19;
- if (J1) Imm21 |= 1 << 18;
-
- return SignExtend32<21>(Imm21);
-}
-
-static inline int decodeImm32_B_EncodingT4(uint32_t insn) {
- unsigned S = slice(insn, 26, 26);
- bool I1 = slice(insn, 13, 13) == S;
- bool I2 = slice(insn, 11, 11) == S;
- unsigned Imm25 = slice(insn, 25, 16) << 12 | slice(insn, 10, 0) << 1;
- if (S) Imm25 |= 1 << 24;
- if (I1) Imm25 |= 1 << 23;
- if (I2) Imm25 |= 1 << 22;
-
- return SignExtend32<25>(Imm25);
-}
-
-static inline int decodeImm32_BL(uint32_t insn) {
- unsigned S = slice(insn, 26, 26);
- bool I1 = slice(insn, 13, 13) == S;
- bool I2 = slice(insn, 11, 11) == S;
- unsigned Imm25 = slice(insn, 25, 16) << 12 | slice(insn, 10, 0) << 1;
- if (S) Imm25 |= 1 << 24;
- if (I1) Imm25 |= 1 << 23;
- if (I2) Imm25 |= 1 << 22;
-
- return SignExtend32<25>(Imm25);
-}
-
-static inline int decodeImm32_BLX(uint32_t insn) {
- unsigned S = slice(insn, 26, 26);
- bool I1 = slice(insn, 13, 13) == S;
- bool I2 = slice(insn, 11, 11) == S;
- unsigned Imm25 = slice(insn, 25, 16) << 12 | slice(insn, 10, 1) << 2;
- if (S) Imm25 |= 1 << 24;
- if (I1) Imm25 |= 1 << 23;
- if (I2) Imm25 |= 1 << 22;
-
- return SignExtend32<25>(Imm25);
-}
-
-// See, for example, A8.6.221 SXTAB16.
-static inline unsigned decodeRotate(uint32_t insn) {
- unsigned rotate = slice(insn, 5, 4);
- return rotate << 3;
-}
-
-///////////////////////////////////////////////
-// //
-// Thumb1 instruction disassembly functions. //
-// //
-///////////////////////////////////////////////
-
-// See "Utilities for 16-bit Thumb instructions" for register naming convention.
-
-// A6.2.1 Shift (immediate), add, subtract, move, and compare
-//
-// shift immediate: tRd CPSR tRn imm5
-// add/sub register: tRd CPSR tRn tRm
-// add/sub 3-bit immediate: tRd CPSR tRn imm3
-// add/sub 8-bit immediate: tRt CPSR tRt(TIED_TO) imm8
-// mov/cmp immediate: tRt [CPSR] imm8 (CPSR present for mov)
-//
-// Special case:
-// tMOVSr: tRd tRn
-static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID
- && "Invalid arguments");
-
- bool Imm3 = (Opcode == ARM::tADDi3 || Opcode == ARM::tSUBi3);
-
- // Use Rt implies use imm8.
- bool UseRt = (Opcode == ARM::tADDi8 || Opcode == ARM::tSUBi8 ||
- Opcode == ARM::tMOVi8 || Opcode == ARM::tCMPi8);
-
- // Add the destination operand.
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, ARM::tGPRRegClassID,
- UseRt ? getT1tRt(insn) : getT1tRd(insn))));
- ++OpIdx;
-
- // Check whether the next operand to be added is a CCR Register.
- if (OpInfo[OpIdx].RegClass == ARM::CCRRegClassID) {
- assert(OpInfo[OpIdx].isOptionalDef() && "Optional def operand expected");
- MI.addOperand(MCOperand::CreateReg(B->InITBlock() ? 0 : ARM::CPSR));
- ++OpIdx;
- }
-
- // Check whether the next operand to be added is a Thumb1 Register.
- assert(OpIdx < NumOps && "More operands expected");
- if (OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) {
- // For UseRt, the reg operand is tied to the first reg operand.
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, ARM::tGPRRegClassID,
- UseRt ? getT1tRt(insn) : getT1tRn(insn))));
- ++OpIdx;
- }
-
- // Special case for tMOVSr.
- if (OpIdx == NumOps)
- return true;
-
- // The next available operand is either a reg operand or an imm operand.
- if (OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) {
- // Three register operand instructions.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
- getT1tRm(insn))));
- } else {
- assert(OpInfo[OpIdx].RegClass < 0 &&
- !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()
- && "Pure imm operand expected");
- unsigned Imm = 0;
- if (UseRt)
- Imm = getT1Imm8(insn);
- else if (Imm3)
- Imm = getT1Imm3(insn);
- else {
- Imm = getT1Imm5(insn);
- ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 12, 11));
- getImmShiftSE(ShOp, Imm);
- }
- MI.addOperand(MCOperand::CreateImm(Imm));
- }
- ++OpIdx;
-
- return true;
-}
-
-// A6.2.2 Data-processing
-//
-// tCMPr, tTST, tCMN: tRd tRn
-// tMVN, tRSB: tRd CPSR tRn
-// Others: tRd CPSR tRd(TIED_TO) tRn
-static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
- (OpInfo[1].RegClass == ARM::CCRRegClassID
- || OpInfo[1].RegClass == ARM::tGPRRegClassID)
- && "Invalid arguments");
-
- // Add the destination operand.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
- getT1tRd(insn))));
- ++OpIdx;
-
- // Check whether the next operand to be added is a CCR Register.
- if (OpInfo[OpIdx].RegClass == ARM::CCRRegClassID) {
- assert(OpInfo[OpIdx].isOptionalDef() && "Optional def operand expected");
- MI.addOperand(MCOperand::CreateReg(B->InITBlock() ? 0 : ARM::CPSR));
- ++OpIdx;
- }
-
- // We have either { tRd(TIED_TO), tRn } or { tRn } remaining.
- // Process the TIED_TO operand first.
-
- assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID
- && "Thumb reg operand expected");
- int Idx;
- if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
- // The reg operand is tied to the first reg operand.
- MI.addOperand(MI.getOperand(Idx));
- ++OpIdx;
- }
-
- // Process possible next reg operand.
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) {
- // Add tRn operand.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
- getT1tRn(insn))));
- ++OpIdx;
- }
-
- return true;
-}
-
-// A6.2.3 Special data instructions and branch and exchange
-//
-// tADDhirr: Rd Rd(TIED_TO) Rm
-// tCMPhir: Rd Rm
-// tMOVr, tMOVgpr2gpr, tMOVgpr2tgpr, tMOVtgpr2gpr: Rd|tRd Rm|tRn
-// tBX: Rm
-// tBX_RET: 0 operand
-// tBX_RET_vararg: Rm
-// tBLXr_r9: Rm
-// tBRIND: Rm
-static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- // tBX_RET has 0 operand.
- if (NumOps == 0)
- return true;
-
- // BX/BLX/tBRIND (indirect branch, i.e, mov pc, Rm) has 1 reg operand: Rm.
- if (Opcode==ARM::tBLXr_r9 || Opcode==ARM::tBX || Opcode==ARM::tBRIND) {
- if (Opcode == ARM::tBLXr_r9) {
- // Handling the two predicate operands before the reg operand.
- if (!B->DoPredicateOperands(MI, Opcode, insn, NumOps))
- return false;
- NumOpsAdded += 2;
- }
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- getT1Rm(insn))));
- NumOpsAdded += 1;
-
- if (Opcode == ARM::tBX) {
- // Handling the two predicate operands after the reg operand.
- if (!B->DoPredicateOperands(MI, Opcode, insn, NumOps))
- return false;
- NumOpsAdded += 2;
- }
-
- return true;
- }
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- // Add the destination operand.
- unsigned RegClass = OpInfo[OpIdx].RegClass;
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClass,
- IsGPR(RegClass) ? getT1Rd(insn)
- : getT1tRd(insn))));
- ++OpIdx;
-
- // We have either { Rd(TIED_TO), Rm } or { Rm|tRn } remaining.
- // Process the TIED_TO operand first.
-
- assert(OpIdx < NumOps && "More operands expected");
- int Idx;
- if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
- // The reg operand is tied to the first reg operand.
- MI.addOperand(MI.getOperand(Idx));
- ++OpIdx;
- }
-
- // The next reg operand is either Rm or tRn.
- assert(OpIdx < NumOps && "More operands expected");
- RegClass = OpInfo[OpIdx].RegClass;
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClass,
- IsGPR(RegClass) ? getT1Rm(insn)
- : getT1tRn(insn))));
- ++OpIdx;
-
- return true;
-}
-
-// A8.6.59 LDR (literal)
-//
-// tLDRpci: tRt imm8*4
-static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- if (!OpInfo) return false;
-
- assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
- (OpInfo[1].RegClass < 0 &&
- !OpInfo[1].isPredicate() &&
- !OpInfo[1].isOptionalDef())
- && "Invalid arguments");
-
- // Add the destination operand.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
- getT1tRt(insn))));
-
- // And the (imm8 << 2) operand.
- MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn) << 2));
-
- NumOpsAdded = 2;
-
- return true;
-}
-
-// Thumb specific addressing modes (see ARMInstrThumb.td):
-//
-// t_addrmode_rr := reg + reg
-//
-// t_addrmode_s4 := reg + reg
-// reg + imm5 * 4
-//
-// t_addrmode_s2 := reg + reg
-// reg + imm5 * 2
-//
-// t_addrmode_s1 := reg + reg
-// reg + imm5
-//
-// t_addrmode_sp := sp + imm8 * 4
-//
-
-// A8.6.63 LDRB (literal)
-// A8.6.79 LDRSB (literal)
-// A8.6.75 LDRH (literal)
-// A8.6.83 LDRSH (literal)
-// A8.6.59 LDR (literal)
-//
-// These instrs calculate an address from the PC value and an immediate offset.
-// Rd Rn=PC (+/-)imm12 (+ if Inst{23} == 0b1)
-static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- if (!OpInfo) return false;
-
- assert(NumOps >= 2 &&
- OpInfo[0].RegClass == ARM::GPRRegClassID &&
- OpInfo[1].RegClass < 0 &&
- "Expect >= 2 operands, first as reg, and second as imm operand");
-
- // Build the register operand, followed by the (+/-)imm12 immediate.
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
-
- MI.addOperand(MCOperand::CreateImm(decodeImm12(insn)));
-
- NumOpsAdded = 2;
-
- return true;
-}
-
-
-// A6.2.4 Load/store single data item
-//
-// Load/Store Register (reg|imm): tRd tRn imm5|tRm
-// Load Register Signed Byte|Halfword: tRd tRn tRm
-static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- assert(NumOps >= 2
- && OpInfo[0].RegClass == ARM::tGPRRegClassID
- && OpInfo[1].RegClass == ARM::tGPRRegClassID
- && "Expect >= 2 operands and first two as thumb reg operands");
-
- // Add the destination reg and the base reg.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
- getT1tRd(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
- getT1tRn(insn))));
- OpIdx = 2;
-
- // We have either { imm5 } or { tRm } remaining.
- // Note that STR/LDR (register) should skip the imm5 offset operand for
- // t_addrmode_s[1|2|4].
-
- assert(OpIdx < NumOps && "More operands expected");
-
- if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() &&
- !OpInfo[OpIdx].isOptionalDef()) {
- // Table A6-5 16-bit Thumb Load/store instructions
- // opA = 0b0101 for STR/LDR (register) and friends.
- // Otherwise, we have STR/LDR (immediate) and friends.
- assert(opA != 5 && "Immediate operand expected for this opcode");
- MI.addOperand(MCOperand::CreateImm(getT1Imm5(insn)));
- ++OpIdx;
- } else {
- // The next reg operand is tRm, the offset.
- assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID
- && "Thumb reg operand expected");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
- getT1tRm(insn))));
- ++OpIdx;
- }
- return true;
-}
-
-// A6.2.4 Load/store single data item
-//
-// Load/Store Register SP relative: tRt ARM::SP imm8
-static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert((Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
- && "Unexpected opcode");
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- if (!OpInfo) return false;
-
- assert(NumOps >= 3 &&
- OpInfo[0].RegClass == ARM::tGPRRegClassID &&
- OpInfo[1].RegClass == ARM::GPRRegClassID &&
- (OpInfo[2].RegClass < 0 &&
- !OpInfo[2].isPredicate() &&
- !OpInfo[2].isOptionalDef())
- && "Invalid arguments");
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
- getT1tRt(insn))));
- MI.addOperand(MCOperand::CreateReg(ARM::SP));
- MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn)));
- NumOpsAdded = 3;
- return true;
-}
-
-// Table A6-1 16-bit Thumb instruction encoding
-// A8.6.10 ADR
-//
-// tADDrPCi: tRt imm8
-static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert(Opcode == ARM::tADDrPCi && "Unexpected opcode");
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- if (!OpInfo) return false;
-
- assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
- (OpInfo[1].RegClass < 0 &&
- !OpInfo[1].isPredicate() &&
- !OpInfo[1].isOptionalDef())
- && "Invalid arguments");
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
- getT1tRt(insn))));
- MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn)));
- NumOpsAdded = 2;
- return true;
-}
-
-// Table A6-1 16-bit Thumb instruction encoding
-// A8.6.8 ADD (SP plus immediate)
-//
-// tADDrSPi: tRt ARM::SP imm8
-static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert(Opcode == ARM::tADDrSPi && "Unexpected opcode");
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- if (!OpInfo) return false;
-
- assert(NumOps >= 3 &&
- OpInfo[0].RegClass == ARM::tGPRRegClassID &&
- OpInfo[1].RegClass == ARM::GPRRegClassID &&
- (OpInfo[2].RegClass < 0 &&
- !OpInfo[2].isPredicate() &&
- !OpInfo[2].isOptionalDef())
- && "Invalid arguments");
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
- getT1tRt(insn))));
- MI.addOperand(MCOperand::CreateReg(ARM::SP));
- MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn)));
- NumOpsAdded = 3;
- return true;
-}
-
-// tPUSH, tPOP: Pred-Imm Pred-CCR register_list
-//
-// where register_list = low registers + [lr] for PUSH or
-// low registers + [pc] for POP
-//
-// "low registers" is specified by Inst{7-0}
-// lr|pc is specified by Inst{8}
-static bool DisassembleThumb1PushPop(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert((Opcode == ARM::tPUSH || Opcode == ARM::tPOP) && "Unexpected opcode");
-
- unsigned &OpIdx = NumOpsAdded;
-
- // Handling the two predicate operands before the reglist.
- if (B->DoPredicateOperands(MI, Opcode, insn, NumOps))
- OpIdx += 2;
- else {
- DEBUG(errs() << "Expected predicate operands not found.\n");
- return false;
- }
-
- unsigned RegListBits = slice(insn, 8, 8) << (Opcode == ARM::tPUSH ? 14 : 15)
- | slice(insn, 7, 0);
-
- // Fill the variadic part of reglist.
- for (unsigned i = 0; i < 16; ++i) {
- if ((RegListBits >> i) & 1) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- i)));
- ++OpIdx;
- }
- }
-
- return true;
-}
-
-// A6.2.5 Miscellaneous 16-bit instructions
-// Delegate to DisassembleThumb1PushPop() for tPUSH & tPOP.
-//
-// tADDspi, tSUBspi: ARM::SP ARM::SP(TIED_TO) imm7
-// t2IT: firstcond=Inst{7-4} mask=Inst{3-0}
-// tCBNZ, tCBZ: tRd imm6*2
-// tBKPT: imm8
-// tNOP, tSEV, tYIELD, tWFE, tWFI:
-// no operand (except predicate pair)
-// tSETENDBE, tSETENDLE, :
-// no operand
-// Others: tRd tRn
-static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- if (NumOps == 0)
- return true;
-
- if (Opcode == ARM::tPUSH || Opcode == ARM::tPOP)
- return DisassembleThumb1PushPop(MI, Opcode, insn, NumOps, NumOpsAdded, B);
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
-
- // Predicate operands are handled elsewhere.
- if (NumOps == 2 &&
- OpInfo[0].isPredicate() && OpInfo[1].isPredicate() &&
- OpInfo[0].RegClass < 0 && OpInfo[1].RegClass == ARM::CCRRegClassID) {
- return true;
- }
-
- if (Opcode == ARM::tADDspi || Opcode == ARM::tSUBspi) {
- // Special case handling for tADDspi and tSUBspi.
- // A8.6.8 ADD (SP plus immediate) & A8.6.215 SUB (SP minus immediate)
- MI.addOperand(MCOperand::CreateReg(ARM::SP));
- MI.addOperand(MCOperand::CreateReg(ARM::SP));
- MI.addOperand(MCOperand::CreateImm(getT1Imm7(insn)));
- NumOpsAdded = 3;
- return true;
- }
-
- if (Opcode == ARM::t2IT) {
- // Special case handling for If-Then.
- // A8.6.50 IT
- // Tag the (firstcond[0] bit << 4) along with mask.
-
- // firstcond
- MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 4)));
-
- // firstcond[0] and mask
- MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));
- NumOpsAdded = 2;
- return true;
- }
-
- if (Opcode == ARM::tBKPT) {
- MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn))); // breakpoint value
- NumOpsAdded = 1;
- return true;
- }
-
- // CPS has a singleton $opt operand that contains the following information:
- // The first op would be 0b10 as enable and 0b11 as disable in regular ARM,
- // but in Thumb it's is 0 as enable and 1 as disable. So map it to ARM's
- // default one. The second get the AIF flags from Inst{2-0}.
- if (Opcode == ARM::tCPS) {
- MI.addOperand(MCOperand::CreateImm(2 + slice(insn, 4, 4)));
- MI.addOperand(MCOperand::CreateImm(slice(insn, 2, 0)));
- NumOpsAdded = 2;
- return true;
- }
-
- assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
- (OpInfo[1].RegClass < 0 || OpInfo[1].RegClass==ARM::tGPRRegClassID)
- && "Expect >=2 operands");
-
- // Add the destination operand.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
- getT1tRd(insn))));
-
- if (OpInfo[1].RegClass == ARM::tGPRRegClassID) {
- // Two register instructions.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
- getT1tRn(insn))));
- } else {
- // CBNZ, CBZ
- assert((Opcode == ARM::tCBNZ || Opcode == ARM::tCBZ) &&"Unexpected opcode");
- MI.addOperand(MCOperand::CreateImm(getT1Imm6(insn) * 2));
- }
-
- NumOpsAdded = 2;
-
- return true;
-}
-
-// A8.6.53 LDM / LDMIA
-// A8.6.189 STM / STMIA
-//
-// tLDMIA_UPD/tSTMIA_UPD: tRt tRt AM4ModeImm Pred-Imm Pred-CCR register_list
-// tLDMIA: tRt AM4ModeImm Pred-Imm Pred-CCR register_list
-static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps,
- unsigned &NumOpsAdded, BO B) {
- assert((Opcode == ARM::tLDMIA || Opcode == ARM::tLDMIA_UPD ||
- Opcode == ARM::tSTMIA_UPD) && "Unexpected opcode");
-
- unsigned tRt = getT1tRt(insn);
- NumOpsAdded = 0;
-
- // WB register, if necessary.
- if (Opcode == ARM::tLDMIA_UPD || Opcode == ARM::tSTMIA_UPD) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- tRt)));
- ++NumOpsAdded;
- }
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- tRt)));
- ++NumOpsAdded;
-
- // Handling the two predicate operands before the reglist.
- if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) {
- NumOpsAdded += 2;
- } else {
- DEBUG(errs() << "Expected predicate operands not found.\n");
- return false;
- }
-
- unsigned RegListBits = slice(insn, 7, 0);
- if (BitCount(RegListBits) < 1) {
- DEBUG(errs() << "if BitCount(registers) < 1 then UNPREDICTABLE\n");
- return false;
- }
-
- // Fill the variadic part of reglist.
- for (unsigned i = 0; i < 8; ++i)
- if ((RegListBits >> i) & 1) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
- i)));
- ++NumOpsAdded;
- }
-
- return true;
-}
-
-static bool DisassembleThumb1LdMul(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
- return DisassembleThumb1LdStMul(true, MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
-}
-
-static bool DisassembleThumb1StMul(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
- return DisassembleThumb1LdStMul(false, MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
-}
-
-// A8.6.16 B Encoding T1
-// cond = Inst{11-8} & imm8 = Inst{7-0}
-// imm32 = SignExtend(imm8:'0', 32)
-//
-// tBcc: offset Pred-Imm Pred-CCR
-// tSVC: imm8 Pred-Imm Pred-CCR
-// tTRAP: 0 operand (early return)
-static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO) {
-
- if (Opcode == ARM::tTRAP)
- return true;
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- if (!OpInfo) return false;
-
- assert(NumOps == 3 && OpInfo[0].RegClass < 0 &&
- OpInfo[1].isPredicate() && OpInfo[2].RegClass == ARM::CCRRegClassID
- && "Exactly 3 operands expected");
-
- unsigned Imm8 = getT1Imm8(insn);
- MI.addOperand(MCOperand::CreateImm(
- Opcode == ARM::tBcc ? SignExtend32<9>(Imm8 << 1)
- : (int)Imm8));
-
- // Predicate operands by ARMBasicMCBuilder::TryPredicateAndSBitModifier().
- // But note that for tBcc, if cond = '1110' then UNDEFINED.
- if (Opcode == ARM::tBcc && slice(insn, 11, 8) == 14) {
- DEBUG(errs() << "if cond = '1110' then UNDEFINED\n");
- return false;
- }
- NumOpsAdded = 1;
-
- return true;
-}
-
-// A8.6.16 B Encoding T2
-// imm11 = Inst{10-0}
-// imm32 = SignExtend(imm11:'0', 32)
-//
-// tB: offset
-static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO) {
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- if (!OpInfo) return false;
-
- assert(NumOps == 1 && OpInfo[0].RegClass < 0 && "1 imm operand expected");
-
- unsigned Imm11 = getT1Imm11(insn);
-
- MI.addOperand(MCOperand::CreateImm(SignExtend32<12>(Imm11 << 1)));
-
- NumOpsAdded = 1;
-
- return true;
-
-}
-
-// See A6.2 16-bit Thumb instruction encoding for instruction classes
-// corresponding to op.
-//
-// Table A6-1 16-bit Thumb instruction encoding (abridged)
-// op Instruction or instruction class
-// ------ --------------------------------------------------------------------
-// 00xxxx Shift (immediate), add, subtract, move, and compare on page A6-7
-// 010000 Data-processing on page A6-8
-// 010001 Special data instructions and branch and exchange on page A6-9
-// 01001x Load from Literal Pool, see LDR (literal) on page A8-122
-// 0101xx Load/store single data item on page A6-10
-// 011xxx
-// 100xxx
-// 10100x Generate PC-relative address, see ADR on page A8-32
-// 10101x Generate SP-relative address, see ADD (SP plus immediate) on
-// page A8-28
-// 1011xx Miscellaneous 16-bit instructions on page A6-11
-// 11000x Store multiple registers, see STM / STMIA / STMEA on page A8-374
-// 11001x Load multiple registers, see LDM / LDMIA / LDMFD on page A8-110 a
-// 1101xx Conditional branch, and Supervisor Call on page A6-13
-// 11100x Unconditional Branch, see B on page A8-44
-//
-static bool DisassembleThumb1(uint16_t op, MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- unsigned op1 = slice(op, 5, 4);
- unsigned op2 = slice(op, 3, 2);
- unsigned op3 = slice(op, 1, 0);
- unsigned opA = slice(op, 5, 2);
- switch (op1) {
- case 0:
- // A6.2.1 Shift (immediate), add, subtract, move, and compare
- return DisassembleThumb1General(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- case 1:
- switch (op2) {
- case 0:
- switch (op3) {
- case 0:
- // A6.2.2 Data-processing
- return DisassembleThumb1DP(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- case 1:
- // A6.2.3 Special data instructions and branch and exchange
- return DisassembleThumb1Special(MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- default:
- // A8.6.59 LDR (literal)
- return DisassembleThumb1LdPC(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- }
- break;
- default:
- // A6.2.4 Load/store single data item
- return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- break;
- }
- break;
- case 2:
- switch (op2) {
- case 0:
- // A6.2.4 Load/store single data item
- return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- case 1:
- // A6.2.4 Load/store single data item
- return DisassembleThumb1LdStSP(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- case 2:
- if (op3 <= 1) {
- // A8.6.10 ADR
- return DisassembleThumb1AddPCi(MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- } else {
- // A8.6.8 ADD (SP plus immediate)
- return DisassembleThumb1AddSPi(MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- }
- default:
- // A6.2.5 Miscellaneous 16-bit instructions
- return DisassembleThumb1Misc(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- }
- break;
- case 3:
- switch (op2) {
- case 0:
- if (op3 <= 1) {
- // A8.6.189 STM / STMIA / STMEA
- return DisassembleThumb1StMul(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- } else {
- // A8.6.53 LDM / LDMIA / LDMFD
- return DisassembleThumb1LdMul(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- }
- case 1:
- // A6.2.6 Conditional branch, and Supervisor Call
- return DisassembleThumb1CondBr(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- case 2:
- // Unconditional Branch, see B on page A8-44
- return DisassembleThumb1Br(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- default:
- assert(0 && "Unreachable code");
- break;
- }
- break;
- default:
- assert(0 && "Unreachable code");
- break;
- }
-
- return false;
-}
-
-///////////////////////////////////////////////
-// //
-// Thumb2 instruction disassembly functions. //
-// //
-///////////////////////////////////////////////
-
-///////////////////////////////////////////////////////////
-// //
-// Note: the register naming follows the ARM convention! //
-// //
-///////////////////////////////////////////////////////////
-
-static inline bool Thumb2SRSOpcode(unsigned Opcode) {
- switch (Opcode) {
- default:
- return false;
- case ARM::t2SRSDBW: case ARM::t2SRSDB:
- case ARM::t2SRSIAW: case ARM::t2SRSIA:
- return true;
- }
-}
-
-static inline bool Thumb2RFEOpcode(unsigned Opcode) {
- switch (Opcode) {
- default:
- return false;
- case ARM::t2RFEDBW: case ARM::t2RFEDB:
- case ARM::t2RFEIAW: case ARM::t2RFEIA:
- return true;
- }
-}
-
-// t2SRS[IA|DB]W/t2SRS[IA|DB]: mode_imm = Inst{4-0}
-static bool DisassembleThumb2SRS(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded) {
- MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));
- NumOpsAdded = 1;
- return true;
-}
-
-// t2RFE[IA|DB]W/t2RFE[IA|DB]: Rn
-static bool DisassembleThumb2RFE(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
- unsigned Rn = decodeRn(insn);
- if (Rn == 15) {
- DEBUG(errs() << "if n == 15 then UNPREDICTABLE\n");
- return false;
- }
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,ARM::GPRRegClassID,Rn)));
- NumOpsAdded = 1;
- return true;
-}
-
-static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- if (Thumb2SRSOpcode(Opcode))
- return DisassembleThumb2SRS(MI, Opcode, insn, NumOps, NumOpsAdded);
-
- if (Thumb2RFEOpcode(Opcode))
- return DisassembleThumb2RFE(MI, Opcode, insn, NumOps, NumOpsAdded, B);
-
- assert((Opcode == ARM::t2LDMIA || Opcode == ARM::t2LDMIA_UPD ||
- Opcode == ARM::t2LDMDB || Opcode == ARM::t2LDMDB_UPD ||
- Opcode == ARM::t2STMIA || Opcode == ARM::t2STMIA_UPD ||
- Opcode == ARM::t2STMDB || Opcode == ARM::t2STMDB_UPD)
- && "Unexpected opcode");
- assert(NumOps >= 4 && "Thumb2 LdStMul expects NumOps >= 4");
-
- NumOpsAdded = 0;
-
- unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
-
- // Writeback to base.
- if (Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD ||
- Opcode == ARM::t2STMIA_UPD || Opcode == ARM::t2STMDB_UPD) {
- MI.addOperand(MCOperand::CreateReg(Base));
- ++NumOpsAdded;
- }
-
- MI.addOperand(MCOperand::CreateReg(Base));
- ++NumOpsAdded;
-
- // Handling the two predicate operands before the reglist.
- if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) {
- NumOpsAdded += 2;
- } else {
- DEBUG(errs() << "Expected predicate operands not found.\n");
- return false;
- }
-
- unsigned RegListBits = insn & ((1 << 16) - 1);
-
- // Fill the variadic part of reglist.
- for (unsigned i = 0; i < 16; ++i)
- if ((RegListBits >> i) & 1) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- i)));
- ++NumOpsAdded;
- }
-
- return true;
-}
-
-// t2LDREX: Rd Rn
-// t2LDREXD: Rd Rs Rn
-// t2LDREXB, t2LDREXH: Rd Rn
-// t2STREX: Rs Rd Rn
-// t2STREXD: Rm Rd Rs Rn
-// t2STREXB, t2STREXH: Rm Rd Rn
-static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- if (!OpInfo) return false;
-
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- assert(NumOps >= 2
- && OpInfo[0].RegClass > 0
- && OpInfo[1].RegClass > 0
- && "Expect >=2 operands and first two as reg operands");
-
- bool isStore = (ARM::t2STREX <= Opcode && Opcode <= ARM::t2STREXH);
- bool isSW = (Opcode == ARM::t2LDREX || Opcode == ARM::t2STREX);
- bool isDW = (Opcode == ARM::t2LDREXD || Opcode == ARM::t2STREXD);
-
- unsigned Rt = decodeRd(insn);
- unsigned Rt2 = decodeRs(insn); // But note that this is Rd for t2STREX.
- unsigned Rd = decodeRm(insn);
- unsigned Rn = decodeRn(insn);
-
- // Some sanity checking first.
- if (isStore) {
- // if d == n || d == t then UNPREDICTABLE
- // if d == n || d == t || d == t2 then UNPREDICTABLE
- if (isDW) {
- if (Rd == Rn || Rd == Rt || Rd == Rt2) {
- DEBUG(errs() << "if d == n || d == t || d == t2 then UNPREDICTABLE\n");
- return false;
- }
- } else {
- if (isSW) {
- if (Rt2 == Rn || Rt2 == Rt) {
- DEBUG(errs() << "if d == n || d == t then UNPREDICTABLE\n");
- return false;
- }
- } else {
- if (Rd == Rn || Rd == Rt) {
- DEBUG(errs() << "if d == n || d == t then UNPREDICTABLE\n");
- return false;
- }
- }
- }
- } else {
- // Load
- // A8.6.71 LDREXD
- // if t == t2 then UNPREDICTABLE
- if (isDW && Rt == Rt2) {
- DEBUG(errs() << "if t == t2 then UNPREDICTABLE\n");
- return false;
- }
- }
-
- // Add the destination operand for store.
- if (isStore) {
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, OpInfo[OpIdx].RegClass,
- isSW ? Rt2 : Rd)));
- ++OpIdx;
- }
-
- // Source operand for store and destination operand for load.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
- Rt)));
- ++OpIdx;
-
- // Thumb2 doubleword complication: with an extra source/destination operand.
- if (isDW) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass,
- Rt2)));
- ++OpIdx;
- }
-
- // Finally add the pointer operand.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
- Rn)));
- ++OpIdx;
-
- return true;
-}
-
-// t2LDRDi8: Rd Rs Rn imm8s4 (offset mode)
-// t2LDRDpci: Rd Rs imm8s4 (Not decoded, prefer the generic t2LDRDi8 version)
-// t2STRDi8: Rd Rs Rn imm8s4 (offset mode)
-//
-// Ditto for t2LDRD_PRE, t2LDRD_POST, t2STRD_PRE, t2STRD_POST, which are for
-// disassembly only and do not have a tied_to writeback base register operand.
-static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- if (!OpInfo) return false;
-
- assert(NumOps >= 4
- && OpInfo[0].RegClass > 0
- && OpInfo[0].RegClass == OpInfo[1].RegClass
- && OpInfo[2].RegClass > 0
- && OpInfo[3].RegClass < 0
- && "Expect >= 4 operands and first 3 as reg operands");
-
- // Thumnb allows for specifying Rt and Rt2, unlike ARM (which has Rt2==Rt+1).
- unsigned Rt = decodeRd(insn);
- unsigned Rt2 = decodeRs(insn);
- unsigned Rn = decodeRn(insn);
-
- // Some sanity checking first.
-
- // A8.6.67 LDRD (literal) has its W bit as (0).
- if (Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2LDRD_PRE || Opcode == ARM::t2LDRD_POST) {
- if (Rn == 15 && slice(insn, 21, 21) != 0)
- return false;
- } else {
- // For Dual Store, PC cannot be used as the base register.
- if (Rn == 15) {
- DEBUG(errs() << "if n == 15 then UNPREDICTABLE\n");
- return false;
- }
- }
- if (Rt == Rt2) {
- DEBUG(errs() << "if t == t2 then UNPREDICTABLE\n");
- return false;
- }
- if (Opcode != ARM::t2LDRDi8 && Opcode != ARM::t2STRDi8) {
- if (Rn == Rt || Rn == Rt2) {
- DEBUG(errs() << "if wback && (n == t || n == t2) then UNPREDICTABLE\n");
- return false;
- }
- }
-
- // Add the <Rt> <Rt2> operands.
- unsigned RegClassPair = OpInfo[0].RegClass;
- unsigned RegClassBase = OpInfo[2].RegClass;
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassPair,
- decodeRd(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassPair,
- decodeRs(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassBase,
- decodeRn(insn))));
-
- // Finally add (+/-)imm8*4, depending on the U bit.
- int Offset = getImm8(insn) * 4;
- if (getUBit(insn) == 0)
- Offset = -Offset;
- MI.addOperand(MCOperand::CreateImm(Offset));
- NumOpsAdded = 4;
-
- return true;
-}
-
-// t2TBB, t2TBH: Rn Rm Pred-Imm Pred-CCR
-static bool DisassembleThumb2TB(MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert(NumOps >= 2 && "Expect >= 2 operands");
-
- // The generic version of TBB/TBH needs a base register.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- // Add the index register.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- NumOpsAdded = 2;
-
- return true;
-}
-
-static inline bool Thumb2ShiftOpcode(unsigned Opcode) {
- switch (Opcode) {
- default:
- return false;
- case ARM::t2MOVCClsl: case ARM::t2MOVCClsr:
- case ARM::t2MOVCCasr: case ARM::t2MOVCCror:
- case ARM::t2LSLri: case ARM::t2LSRri:
- case ARM::t2ASRri: case ARM::t2RORri:
- return true;
- }
-}
-
-// A6.3.11 Data-processing (shifted register)
-//
-// Two register operands (Rn=0b1111 no 1st operand reg): Rs Rm
-// Two register operands (Rs=0b1111 no dst operand reg): Rn Rm
-// Three register operands: Rs Rn Rm
-// Three register operands: (Rn=0b1111 Conditional Move) Rs Ro(TIED_TO) Rm
-//
-// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
-// register with shift forms: (Rm, ConstantShiftSpecifier).
-// Constant shift specifier: Imm = (ShOp | ShAmt<<3).
-//
-// There are special instructions, like t2MOVsra_flag and t2MOVsrl_flag, which
-// only require two register operands: Rd, Rm in ARM Reference Manual terms, and
-// nothing else, because the shift amount is already specified.
-// Similar case holds for t2MOVrx, t2ADDrr, ..., etc.
-static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- // Special case handling.
- if (Opcode == ARM::t2BR_JT) {
- assert(NumOps == 4
- && OpInfo[0].RegClass == ARM::GPRRegClassID
- && OpInfo[1].RegClass == ARM::GPRRegClassID
- && OpInfo[2].RegClass < 0
- && OpInfo[3].RegClass < 0
- && "Exactly 4 operands expect and first two as reg operands");
- // Only need to populate the src reg operand.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- MI.addOperand(MCOperand::CreateReg(0));
- MI.addOperand(MCOperand::CreateImm(0));
- MI.addOperand(MCOperand::CreateImm(0));
- NumOpsAdded = 4;
- return true;
- }
-
- OpIdx = 0;
-
- assert(NumOps >= 2
- && (OpInfo[0].RegClass == ARM::GPRRegClassID ||
- OpInfo[0].RegClass == ARM::rGPRRegClassID)
- && (OpInfo[1].RegClass == ARM::GPRRegClassID ||
- OpInfo[1].RegClass == ARM::rGPRRegClassID)
- && "Expect >= 2 operands and first two as reg operands");
-
- bool ThreeReg = (NumOps > 2 && (OpInfo[2].RegClass == ARM::GPRRegClassID ||
- OpInfo[2].RegClass == ARM::rGPRRegClassID));
- bool NoDstReg = (decodeRs(insn) == 0xF);
-
- // Build the register operands, followed by the constant shift specifier.
-
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, OpInfo[0].RegClass,
- NoDstReg ? decodeRn(insn) : decodeRs(insn))));
- ++OpIdx;
-
- if (ThreeReg) {
- int Idx;
- if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
- // Process tied_to operand constraint.
- MI.addOperand(MI.getOperand(Idx));
- ++OpIdx;
- } else if (!NoDstReg) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[1].RegClass,
- decodeRn(insn))));
- ++OpIdx;
- } else {
- DEBUG(errs() << "Thumb2 encoding error: d==15 for three-reg operands.\n");
- return false;
- }
- }
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
- decodeRm(insn))));
- ++OpIdx;
-
- if (NumOps == OpIdx)
- return true;
-
- if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
- && !OpInfo[OpIdx].isOptionalDef()) {
-
- if (Thumb2ShiftOpcode(Opcode)) {
- unsigned Imm = getShiftAmtBits(insn);
- ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 5, 4));
- getImmShiftSE(ShOp, Imm);
- MI.addOperand(MCOperand::CreateImm(Imm));
- } else {
- // Build the constant shift specifier operand.
- unsigned bits2 = getShiftTypeBits(insn);
- unsigned imm5 = getShiftAmtBits(insn);
- ARM_AM::ShiftOpc ShOp = ARM_AM::no_shift;
- unsigned ShAmt = decodeImmShift(bits2, imm5, ShOp);
- MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShAmt)));
- }
- ++OpIdx;
- }
-
- return true;
-}
-
-// A6.3.1 Data-processing (modified immediate)
-//
-// Two register operands: Rs Rn ModImm
-// One register operands (Rs=0b1111 no explicit dest reg): Rn ModImm
-// One register operands (Rn=0b1111 no explicit src reg): Rs ModImm -
-// {t2MOVi, t2MVNi}
-//
-// ModImm = ThumbExpandImm(i:imm3:imm8)
-static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- unsigned RdRegClassID = OpInfo[0].RegClass;
- assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID ||
- RdRegClassID == ARM::rGPRRegClassID)
- && "Expect >= 2 operands and first one as reg operand");
-
- unsigned RnRegClassID = OpInfo[1].RegClass;
- bool TwoReg = (RnRegClassID == ARM::GPRRegClassID
- || RnRegClassID == ARM::rGPRRegClassID);
- bool NoDstReg = (decodeRs(insn) == 0xF);
-
- // Build the register operands, followed by the modified immediate.
-
- MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RdRegClassID,
- NoDstReg ? decodeRn(insn) : decodeRs(insn))));
- ++OpIdx;
-
- if (TwoReg) {
- if (NoDstReg) {
- DEBUG(errs()<<"Thumb2 encoding error: d==15 for DPModImm 2-reg instr.\n");
- return false;
- }
- int Idx;
- if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
- // The reg operand is tied to the first reg operand.
- MI.addOperand(MI.getOperand(Idx));
- } else {
- // Add second reg operand.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
- decodeRn(insn))));
- }
- ++OpIdx;
- }
-
- // The modified immediate operand should come next.
- assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 &&
- !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()
- && "Pure imm operand expected");
-
- // i:imm3:imm8
- // A6.3.2 Modified immediate constants in Thumb instructions
- unsigned imm12 = getIImm3Imm8(insn);
- MI.addOperand(MCOperand::CreateImm(ThumbExpandImm(imm12)));
- ++OpIdx;
-
- return true;
-}
-
-static inline bool Thumb2SaturateOpcode(unsigned Opcode) {
- switch (Opcode) {
- case ARM::t2SSAT: case ARM::t2SSAT16:
- case ARM::t2USAT: case ARM::t2USAT16:
- return true;
- default:
- return false;
- }
-}
-
-/// DisassembleThumb2Sat - Disassemble Thumb2 saturate instructions:
-/// o t2SSAT, t2USAT: Rs sat_pos Rn shamt
-/// o t2SSAT16, t2USAT16: Rs sat_pos Rn
-static bool DisassembleThumb2Sat(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned &NumOpsAdded, BO B) {
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- NumOpsAdded = MCID.getNumOperands() - 2; // ignore predicate operands
-
- // Disassemble the register def.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
- decodeRs(insn))));
-
- unsigned Pos = slice(insn, 4, 0);
- if (Opcode == ARM::t2SSAT || Opcode == ARM::t2SSAT16)
- Pos += 1;
- MI.addOperand(MCOperand::CreateImm(Pos));
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
- decodeRn(insn))));
-
- if (NumOpsAdded == 4) {
- ARM_AM::ShiftOpc Opc = (slice(insn, 21, 21) != 0 ?
- ARM_AM::asr : ARM_AM::lsl);
- // Inst{14-12:7-6} encodes the imm5 shift amount.
- unsigned ShAmt = slice(insn, 14, 12) << 2 | slice(insn, 7, 6);
- if (ShAmt == 0) {
- if (Opc == ARM_AM::asr)
- ShAmt = 32;
- else
- Opc = ARM_AM::no_shift;
- }
- MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt)));
- }
- return true;
-}
-
-// A6.3.3 Data-processing (plain binary immediate)
-//
-// o t2ADDri12, t2SUBri12: Rs Rn imm12
-// o t2LEApcrel (ADR): Rs imm12
-// o t2BFC (BFC): Rs Ro(TIED_TO) bf_inv_mask_imm
-// o t2BFI (BFI): Rs Ro(TIED_TO) Rn bf_inv_mask_imm
-// o t2MOVi16: Rs imm16
-// o t2MOVTi16: Rs imm16
-// o t2SBFX (SBFX): Rs Rn lsb width
-// o t2UBFX (UBFX): Rs Rn lsb width
-// o t2BFI (BFI): Rs Rn lsb width
-static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- unsigned RdRegClassID = OpInfo[0].RegClass;
- assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID ||
- RdRegClassID == ARM::rGPRRegClassID)
- && "Expect >= 2 operands and first one as reg operand");
-
- unsigned RnRegClassID = OpInfo[1].RegClass;
- bool TwoReg = (RnRegClassID == ARM::GPRRegClassID
- || RnRegClassID == ARM::rGPRRegClassID);
-
- // Build the register operand(s), followed by the immediate(s).
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RdRegClassID,
- decodeRs(insn))));
- ++OpIdx;
-
- if (TwoReg) {
- assert(NumOps >= 3 && "Expect >= 3 operands");
- int Idx;
- if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
- // Process tied_to operand constraint.
- MI.addOperand(MI.getOperand(Idx));
- } else {
- // Add src reg operand.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
- decodeRn(insn))));
- }
- ++OpIdx;
- }
-
- if (Opcode == ARM::t2BFI) {
- // Add val reg operand.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
- decodeRn(insn))));
- ++OpIdx;
- }
-
- assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
- && !OpInfo[OpIdx].isOptionalDef()
- && "Pure imm operand expected");
-
- // Pre-increment OpIdx.
- ++OpIdx;
-
- if (Opcode == ARM::t2ADDri12 || Opcode == ARM::t2SUBri12
- || Opcode == ARM::t2LEApcrel)
- MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn)));
- else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16) {
- if (!B->tryAddingSymbolicOperand(getImm16(insn), 4, MI))
- MI.addOperand(MCOperand::CreateImm(getImm16(insn)));
- } else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) {
- uint32_t mask = 0;
- if (getBitfieldInvMask(insn, mask))
- MI.addOperand(MCOperand::CreateImm(mask));
- else
- return false;
- } else {
- // Handle the case of: lsb width
- assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX)
- && "Unexpected opcode");
- MI.addOperand(MCOperand::CreateImm(getLsb(insn)));
- MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1));
-
- ++OpIdx;
- }
-
- return true;
-}
-
-// A6.3.4 Table A6-15 Miscellaneous control instructions
-// A8.6.41 DMB
-// A8.6.42 DSB
-// A8.6.49 ISB
-static inline bool t2MiscCtrlInstr(uint32_t insn) {
- if (slice(insn, 31, 20) == 0xf3b && slice(insn, 15, 14) == 2 &&
- slice(insn, 12, 12) == 0)
- return true;
-
- return false;
-}
-
-// A6.3.4 Branches and miscellaneous control
-//
-// A8.6.16 B
-// Branches: t2B, t2Bcc -> imm operand
-//
-// Branches: t2TPsoft -> no operand
-//
-// A8.6.23 BL, BLX (immediate)
-// Branches (defined in ARMInstrThumb.td): tBLr9, tBLXi_r9 -> imm operand
-//
-// A8.6.26
-// t2BXJ -> Rn
-//
-// Miscellaneous control:
-// -> no operand (except pred-imm pred-ccr for CLREX, memory barrier variants)
-//
-// Hint: t2NOP, t2YIELD, t2WFE, t2WFI, t2SEV
-// -> no operand (except pred-imm pred-ccr)
-//
-// t2DBG -> imm4 = Inst{3-0}
-//
-// t2MRS/t2MRSsys -> Rs
-// t2MSR/t2MSRsys -> Rn mask=Inst{11-8}
-// t2SMC -> imm4 = Inst{19-16}
-static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- if (NumOps == 0)
- return true;
-
- if (Opcode == ARM::t2DMB || Opcode == ARM::t2DSB) {
- // Inst{3-0} encodes the memory barrier option for the variants.
- unsigned opt = slice(insn, 3, 0);
- switch (opt) {
- case ARM_MB::SY: case ARM_MB::ST:
- case ARM_MB::ISH: case ARM_MB::ISHST:
- case ARM_MB::NSH: case ARM_MB::NSHST:
- case ARM_MB::OSH: case ARM_MB::OSHST:
- MI.addOperand(MCOperand::CreateImm(opt));
- NumOpsAdded = 1;
- return true;
- default:
- return false;
- }
- }
-
- if (t2MiscCtrlInstr(insn))
- return true;
-
- switch (Opcode) {
- case ARM::t2CLREX:
- case ARM::t2NOP:
- case ARM::t2YIELD:
- case ARM::t2WFE:
- case ARM::t2WFI:
- case ARM::t2SEV:
- return true;
- default:
- break;
- }
-
- // FIXME: To enable correct asm parsing and disasm of CPS we need 3 different
- // opcodes which match the same real instruction. This is needed since there's
- // no current handling of optional arguments. Fix here when a better handling
- // of optional arguments is implemented.
- if (Opcode == ARM::t2CPS3p) {
- MI.addOperand(MCOperand::CreateImm(slice(insn, 10, 9))); // imod
- MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 5))); // iflags
- MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode
- NumOpsAdded = 3;
- return true;
- }
- if (Opcode == ARM::t2CPS2p) {
- MI.addOperand(MCOperand::CreateImm(slice(insn, 10, 9))); // imod
- MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 5))); // iflags
- NumOpsAdded = 2;
- return true;
- }
- if (Opcode == ARM::t2CPS1p) {
- MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode
- NumOpsAdded = 1;
- return true;
- }
-
- // DBG has its option specified in Inst{3-0}.
- if (Opcode == ARM::t2DBG) {
- MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0)));
- NumOpsAdded = 1;
- return true;
- }
-
- // MRS and MRSsys take one GPR reg Rs.
- if (Opcode == ARM::t2MRS || Opcode == ARM::t2MRSsys) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRs(insn))));
- NumOpsAdded = 1;
- return true;
- }
- // BXJ takes one GPR reg Rn.
- if (Opcode == ARM::t2BXJ) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- NumOpsAdded = 1;
- return true;
- }
- // MSR take a mask, followed by one GPR reg Rn. The mask contains the R Bit in
- // bit 4, and the special register fields in bits 3-0.
- if (Opcode == ARM::t2MSR) {
- MI.addOperand(MCOperand::CreateImm(slice(insn, 20, 20) << 4 /* R Bit */ |
- slice(insn, 11, 8) /* Special Reg */));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- NumOpsAdded = 2;
- return true;
- }
- // SMC take imm4.
- if (Opcode == ARM::t2SMC) {
- MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 16)));
- NumOpsAdded = 1;
- return true;
- }
-
- // Some instructions have predicate operands first before the immediate.
- if (Opcode == ARM::tBLXi_r9 || Opcode == ARM::tBLr9) {
- // Handling the two predicate operands before the imm operand.
- if (B->DoPredicateOperands(MI, Opcode, insn, NumOps))
- NumOpsAdded += 2;
- else {
- DEBUG(errs() << "Expected predicate operands not found.\n");
- return false;
- }
- }
-
- // Add the imm operand.
- int Offset = 0;
-
- switch (Opcode) {
- default:
- assert(0 && "Unexpected opcode");
- return false;
- case ARM::t2B:
- Offset = decodeImm32_B_EncodingT4(insn);
- break;
- case ARM::t2Bcc:
- Offset = decodeImm32_B_EncodingT3(insn);
- break;
- case ARM::tBLr9:
- Offset = decodeImm32_BL(insn);
- break;
- case ARM::tBLXi_r9:
- Offset = decodeImm32_BLX(insn);
- break;
- }
-
- if (!B->tryAddingSymbolicOperand(Offset + B->getBuilderAddress() + 4, 4, MI))
- MI.addOperand(MCOperand::CreateImm(Offset));
-
- // This is an increment as some predicate operands may have been added first.
- NumOpsAdded += 1;
-
- return true;
-}
-
-static inline bool Thumb2PreloadOpcode(unsigned Opcode) {
- switch (Opcode) {
- default:
- return false;
- case ARM::t2PLDi12: case ARM::t2PLDi8:
- case ARM::t2PLDs:
- case ARM::t2PLDWi12: case ARM::t2PLDWi8:
- case ARM::t2PLDWs:
- case ARM::t2PLIi12: case ARM::t2PLIi8:
- case ARM::t2PLIs:
- return true;
- }
-}
-
-static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- // Preload Data/Instruction requires either 2 or 3 operands.
- // t2PLDi12, t2PLDi8, t2PLDpci: Rn [+/-]imm12/imm8
- // t2PLDr: Rn Rm
- // t2PLDs: Rn Rm imm2=Inst{5-4}
- // Same pattern applies for t2PLDW* and t2PLI*.
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- assert(NumOps >= 2 &&
- OpInfo[0].RegClass == ARM::GPRRegClassID &&
- "Expect >= 2 operands and first one as reg operand");
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- ++OpIdx;
-
- if (OpInfo[OpIdx].RegClass == ARM::rGPRRegClassID) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- } else {
- assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
- && !OpInfo[OpIdx].isOptionalDef()
- && "Pure imm operand expected");
- int Offset = 0;
- if (Opcode == ARM::t2PLDi8 || Opcode == ARM::t2PLDWi8 ||
- Opcode == ARM::t2PLIi8) {
- // A8.6.117 Encoding T2: add = FALSE
- unsigned Imm8 = getImm8(insn);
- Offset = -1 * Imm8;
- } else {
- // The i12 forms. See, for example, A8.6.117 Encoding T1.
- // Note that currently t2PLDi12 also handles the previously named t2PLDpci
- // opcode, that's why we use decodeImm12(insn) which returns +/- imm12.
- Offset = decodeImm12(insn);
- }
- MI.addOperand(MCOperand::CreateImm(Offset));
- }
- ++OpIdx;
-
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 &&
- !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
- // Fills in the shift amount for t2PLDs, t2PLDWs, t2PLIs.
- MI.addOperand(MCOperand::CreateImm(slice(insn, 5, 4)));
- ++OpIdx;
- }
-
- return true;
-}
-
-static bool BadRegsThumb2LdSt(unsigned Opcode, uint32_t insn, bool Load,
- unsigned R0, unsigned R1, unsigned R2, bool UseRm, bool WB) {
-
- // Inst{22-21} encodes the data item transferred for load/store.
- // For single word, it is encoded as ob10.
- bool Word = (slice(insn, 22, 21) == 2);
- bool Half = (slice(insn, 22, 21) == 1);
- bool Byte = (slice(insn, 22, 21) == 0);
-
- if (UseRm && BadReg(R2)) {
- DEBUG(errs() << "if BadReg(m) then UNPREDICTABLE\n");
- return true;
- }
-
- if (Load) {
- if (!Word && R0 == 13) {
- DEBUG(errs() << "if t == 13 then UNPREDICTABLE\n");
- return true;
- }
- if (Byte) {
- if (WB && R0 == 15 && slice(insn, 10, 8) == 3) {
- // A8.6.78 LDRSB (immediate) Encoding T2 (errata markup 8.0)
- DEBUG(errs() << "if t == 15 && PUW == '011' then UNPREDICTABLE\n");
- return true;
- }
- }
- // A6.3.8 Load halfword, memory hints
- if (Half) {
- if (WB) {
- if (R0 == R1) {
- // A8.6.82 LDRSH (immediate) Encoding T2
- DEBUG(errs() << "if WB && n == t then UNPREDICTABLE\n");
- return true;
- }
- if (R0 == 15 && slice(insn, 10, 8) == 3) {
- // A8.6.82 LDRSH (immediate) Encoding T2 (errata markup 8.0)
- DEBUG(errs() << "if t == 15 && PUW == '011' then UNPREDICTABLE\n");
- return true;
- }
- } else {
- if (Opcode == ARM::t2LDRHi8 || Opcode == ARM::t2LDRSHi8) {
- if (R0 == 15 && slice(insn, 10, 8) == 4) {
- // A8.6.82 LDRSH (immediate) Encoding T2
- DEBUG(errs() << "if Rt == '1111' and PUW == '100' then SEE"
- << " \"Unallocated memory hints\"\n");
- return true;
- }
- } else {
- if (R0 == 15) {
- // A8.6.82 LDRSH (immediate) Encoding T1
- DEBUG(errs() << "if Rt == '1111' then SEE"
- << " \"Unallocated memory hints\"\n");
- return true;
- }
- }
- }
- }
- } else {
- if (WB && R0 == R1) {
- DEBUG(errs() << "if wback && n == t then UNPREDICTABLE\n");
- return true;
- }
- if ((WB && R0 == 15) || (!WB && R1 == 15)) {
- DEBUG(errs() << "if Rn == '1111' then UNDEFINED\n");
- return true;
- }
- if (Word) {
- if ((WB && R1 == 15) || (!WB && R0 == 15)) {
- DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n");
- return true;
- }
- } else {
- if ((WB && BadReg(R1)) || (!WB && BadReg(R0))) {
- DEBUG(errs() << "if BadReg(t) then UNPREDICTABLE\n");
- return true;
- }
- }
- }
- return false;
-}
-
-// A6.3.10 Store single data item
-// A6.3.9 Load byte, memory hints
-// A6.3.8 Load halfword, memory hints
-// A6.3.7 Load word
-//
-// For example,
-//
-// t2LDRi12: Rd Rn (+)imm12
-// t2LDRi8: Rd Rn (+/-)imm8 (+ if Inst{9} == 0b1)
-// t2LDRs: Rd Rn Rm ConstantShiftSpecifier (see also
-// DisassembleThumb2DPSoReg)
-// t2LDR_POST: Rd Rn Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
-// t2LDR_PRE: Rd Rn Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
-//
-// t2STRi12: Rd Rn (+)imm12
-// t2STRi8: Rd Rn (+/-)imm8 (+ if Inst{9} == 0b1)
-// t2STRs: Rd Rn Rm ConstantShiftSpecifier (see also
-// DisassembleThumb2DPSoReg)
-// t2STR_POST: Rn Rd Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
-// t2STR_PRE: Rn Rd Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
-//
-// Note that for indexed modes, the Rn(TIED_TO) operand needs to be populated
-// correctly, as LLVM AsmPrinter depends on it. For indexed stores, the first
-// operand is Rn; for all the other instructions, Rd is the first operand.
-//
-// Delegates to DisassembleThumb2PreLoad() for preload data/instruction.
-// Delegates to DisassembleThumb2Ldpci() for load * literal operations.
-static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- unsigned Rn = decodeRn(insn);
-
- if (Thumb2PreloadOpcode(Opcode))
- return DisassembleThumb2PreLoad(MI, Opcode, insn, NumOps, NumOpsAdded, B);
-
- // See, for example, A6.3.7 Load word: Table A6-18 Load word.
- if (Load && Rn == 15)
- return DisassembleThumb2Ldpci(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- assert(NumOps >= 3 &&
- OpInfo[0].RegClass > 0 &&
- OpInfo[1].RegClass > 0 &&
- "Expect >= 3 operands and first two as reg operands");
-
- bool ThreeReg = (OpInfo[2].RegClass > 0);
- bool TIED_TO = ThreeReg && MCID.getOperandConstraint(2, MCOI::TIED_TO) != -1;
- bool Imm12 = !ThreeReg && slice(insn, 23, 23) == 1; // ARMInstrThumb2.td
-
- // Build the register operands, followed by the immediate.
- unsigned R0 = 0, R1 = 0, R2 = 0;
- unsigned Rd = decodeRd(insn);
- int Imm = 0;
-
- if (!Load && TIED_TO) {
- R0 = Rn;
- R1 = Rd;
- } else {
- R0 = Rd;
- R1 = Rn;
- }
- if (ThreeReg) {
- if (TIED_TO) {
- R2 = Rn;
- Imm = decodeImm8(insn);
- } else {
- R2 = decodeRm(insn);
- // See, for example, A8.6.64 LDRB (register).
- // And ARMAsmPrinter::printT2AddrModeSoRegOperand().
- // LSL is the default shift opc, and LLVM does not expect it to be encoded
- // as part of the immediate operand.
- // Imm = ARM_AM::getSORegOpc(ARM_AM::lsl, slice(insn, 5, 4));
- Imm = slice(insn, 5, 4);
- }
- } else {
- if (Imm12)
- Imm = getImm12(insn);
- else
- Imm = decodeImm8(insn);
- }
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
- R0)));
- ++OpIdx;
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
- R1)));
- ++OpIdx;
-
- if (ThreeReg) {
- // This could be an offset register or a TIED_TO register.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass,
- R2)));
- ++OpIdx;
- }
-
- if (BadRegsThumb2LdSt(Opcode, insn, Load, R0, R1, R2, ThreeReg & !TIED_TO,
- TIED_TO))
- return false;
-
- assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
- && !OpInfo[OpIdx].isOptionalDef()
- && "Pure imm operand expected");
-
- MI.addOperand(MCOperand::CreateImm(Imm));
- ++OpIdx;
-
- return true;
-}
-
-// A6.3.12 Data-processing (register)
-//
-// Two register operands [rotate]: Rs Rm [rotation(= (rotate:'000'))]
-// Three register operands only: Rs Rn Rm
-// Three register operands [rotate]: Rs Rn Rm [rotation(= (rotate:'000'))]
-//
-// Parallel addition and subtraction 32-bit Thumb instructions: Rs Rn Rm
-//
-// Miscellaneous operations: Rs [Rn] Rm
-static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCInstrDesc &MCID = ARMInsts[Opcode];
- const MCOperandInfo *OpInfo = MCID.OpInfo;
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
-
- assert(NumOps >= 2 &&
- OpInfo[0].RegClass > 0 &&
- OpInfo[1].RegClass > 0 &&
- "Expect >= 2 operands and first two as reg operands");
-
- // Build the register operands, followed by the optional rotation amount.
-
- bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass > 0;
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
- decodeRs(insn))));
- ++OpIdx;
-
- if (ThreeReg) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass,
- decodeRn(insn))));
- ++OpIdx;
- }
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
- decodeRm(insn))));
- ++OpIdx;
-
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
- && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
- // Add the rotation amount immediate.
- MI.addOperand(MCOperand::CreateImm(decodeRotate(insn)));
- ++OpIdx;
- }
-
- return true;
-}
-
-// A6.3.16 Multiply, multiply accumulate, and absolute difference
-//
-// t2MLA, t2MLS, t2SMMLA, t2SMMLS: Rs Rn Rm Ra=Inst{15-12}
-// t2MUL, t2SMMUL: Rs Rn Rm
-// t2SMLA[BB|BT|TB|TT|WB|WT]: Rs Rn Rm Ra=Inst{15-12}
-// t2SMUL[BB|BT|TB|TT|WB|WT]: Rs Rn Rm
-//
-// Dual halfword multiply: t2SMUAD[X], t2SMUSD[X], t2SMLAD[X], t2SMLSD[X]:
-// Rs Rn Rm Ra=Inst{15-12}
-//
-// Unsigned Sum of Absolute Differences [and Accumulate]
-// Rs Rn Rm [Ra=Inst{15-12}]
-static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
-
- assert(NumOps >= 3 &&
- OpInfo[0].RegClass == ARM::rGPRRegClassID &&
- OpInfo[1].RegClass == ARM::rGPRRegClassID &&
- OpInfo[2].RegClass == ARM::rGPRRegClassID &&
- "Expect >= 3 operands and first three as reg operands");
-
- // Build the register operands.
-
- bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID;
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
- decodeRs(insn))));
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
- decodeRn(insn))));
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
- decodeRm(insn))));
-
- if (FourReg)
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
- decodeRd(insn))));
-
- NumOpsAdded = FourReg ? 4 : 3;
-
- return true;
-}
-
-// A6.3.17 Long multiply, long multiply accumulate, and divide
-//
-// t2SMULL, t2UMULL, t2SMLAL, t2UMLAL, t2UMAAL: RdLo RdHi Rn Rm
-// where RdLo = Inst{15-12} and RdHi = Inst{11-8}
-//
-// Halfword multiple accumulate long: t2SMLAL<x><y>: RdLo RdHi Rn Rm
-// where RdLo = Inst{15-12} and RdHi = Inst{11-8}
-//
-// Dual halfword multiple: t2SMLALD[X], t2SMLSLD[X]: RdLo RdHi Rn Rm
-// where RdLo = Inst{15-12} and RdHi = Inst{11-8}
-//
-// Signed/Unsigned divide: t2SDIV, t2UDIV: Rs Rn Rm
-static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
-
- assert(NumOps >= 3 &&
- OpInfo[0].RegClass == ARM::rGPRRegClassID &&
- OpInfo[1].RegClass == ARM::rGPRRegClassID &&
- OpInfo[2].RegClass == ARM::rGPRRegClassID &&
- "Expect >= 3 operands and first three as reg operands");
-
- bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID;
-
- // Build the register operands.
-
- if (FourReg)
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
- decodeRd(insn))));
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
- decodeRs(insn))));
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
- decodeRn(insn))));
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
- decodeRm(insn))));
-
- if (FourReg)
- NumOpsAdded = 4;
- else
- NumOpsAdded = 3;
-
- return true;
-}
-
-// See A6.3 32-bit Thumb instruction encoding for instruction classes
-// corresponding to (op1, op2, op).
-//
-// Table A6-9 32-bit Thumb instruction encoding
-// op1 op2 op Instruction class, see
-// --- ------- -- -----------------------------------------------------------
-// 01 00xx0xx - Load/store multiple on page A6-23
-// 00xx1xx - Load/store dual, load/store exclusive, table branch on
-// page A6-24
-// 01xxxxx - Data-processing (shifted register) on page A6-31
-// 1xxxxxx - Coprocessor instructions on page A6-40
-// 10 x0xxxxx 0 Data-processing (modified immediate) on page A6-15
-// x1xxxxx 0 Data-processing (plain binary immediate) on page A6-19
-// - 1 Branches and miscellaneous control on page A6-20
-// 11 000xxx0 - Store single data item on page A6-30
-// 001xxx0 - Advanced SIMD element or structure load/store instructions
-// on page A7-27
-// 00xx001 - Load byte, memory hints on page A6-28
-// 00xx011 - Load halfword, memory hints on page A6-26
-// 00xx101 - Load word on page A6-25
-// 00xx111 - UNDEFINED
-// 010xxxx - Data-processing (register) on page A6-33
-// 0110xxx - Multiply, multiply accumulate, and absolute difference on
-// page A6-38
-// 0111xxx - Long multiply, long multiply accumulate, and divide on
-// page A6-39
-// 1xxxxxx - Coprocessor instructions on page A6-40
-//
-static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op,
- MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps,
- unsigned &NumOpsAdded, BO B) {
-
- switch (op1) {
- case 1:
- if (slice(op2, 6, 5) == 0) {
- if (slice(op2, 2, 2) == 0) {
- // Load/store multiple.
- return DisassembleThumb2LdStMul(MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- }
-
- // Load/store dual, load/store exclusive, table branch, otherwise.
- assert(slice(op2, 2, 2) == 1 && "Thumb2 encoding error!");
- if ((ARM::t2LDREX <= Opcode && Opcode <= ARM::t2LDREXH) ||
- (ARM::t2STREX <= Opcode && Opcode <= ARM::t2STREXH)) {
- // Load/store exclusive.
- return DisassembleThumb2LdStEx(MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- }
- if (Opcode == ARM::t2LDRDi8 ||
- Opcode == ARM::t2LDRD_PRE || Opcode == ARM::t2LDRD_POST ||
- Opcode == ARM::t2STRDi8 ||
- Opcode == ARM::t2STRD_PRE || Opcode == ARM::t2STRD_POST) {
- // Load/store dual.
- return DisassembleThumb2LdStDual(MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- }
- if (Opcode == ARM::t2TBB || Opcode == ARM::t2TBH) {
- // Table branch.
- return DisassembleThumb2TB(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- }
- } else if (slice(op2, 6, 5) == 1) {
- // Data-processing (shifted register).
- return DisassembleThumb2DPSoReg(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- }
-
- // FIXME: A6.3.18 Coprocessor instructions
- // But see ThumbDisassembler::getInstruction().
-
- break;
- case 2:
- if (op == 0) {
- if (slice(op2, 5, 5) == 0)
- // Data-processing (modified immediate)
- return DisassembleThumb2DPModImm(MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- if (Thumb2SaturateOpcode(Opcode))
- return DisassembleThumb2Sat(MI, Opcode, insn, NumOpsAdded, B);
-
- // Data-processing (plain binary immediate)
- return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- }
- // Branches and miscellaneous control on page A6-20.
- return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- case 3:
- switch (slice(op2, 6, 5)) {
- case 0:
- // Load/store instructions...
- if (slice(op2, 0, 0) == 0) {
- if (slice(op2, 4, 4) == 0) {
- // Store single data item on page A6-30
- return DisassembleThumb2LdSt(false, MI,Opcode,insn,NumOps,NumOpsAdded,
- B);
- } else {
- // FIXME: Advanced SIMD element or structure load/store instructions.
- // But see ThumbDisassembler::getInstruction().
- ;
- }
- } else {
- // Table A6-9 32-bit Thumb instruction encoding: Load byte|halfword|word
- return DisassembleThumb2LdSt(true, MI, Opcode, insn, NumOps,
- NumOpsAdded, B);
- }
- break;
- case 1:
- if (slice(op2, 4, 4) == 0) {
- // A6.3.12 Data-processing (register)
- return DisassembleThumb2DPReg(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- } else if (slice(op2, 3, 3) == 0) {
- // A6.3.16 Multiply, multiply accumulate, and absolute difference
- return DisassembleThumb2Mul(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- } else {
- // A6.3.17 Long multiply, long multiply accumulate, and divide
- return DisassembleThumb2LongMul(MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- }
- break;
- default:
- // FIXME: A6.3.18 Coprocessor instructions
- // But see ThumbDisassembler::getInstruction().
- ;
- break;
- }
-
- break;
- default:
- assert(0 && "Thumb2 encoding error!");
- break;
- }
-
- return false;
-}
-
-static bool DisassembleThumbFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO Builder) {
-
- uint16_t HalfWord = slice(insn, 31, 16);
-
- if (HalfWord == 0) {
- // A6.2 16-bit Thumb instruction encoding
- // op = bits[15:10]
- uint16_t op = slice(insn, 15, 10);
- return DisassembleThumb1(op, MI, Opcode, insn, NumOps, NumOpsAdded,
- Builder);
- }
-
- unsigned bits15_11 = slice(HalfWord, 15, 11);
-
- // A6.1 Thumb instruction set encoding
- if (!(bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F)) {
- assert("Bits[15:11] first halfword of Thumb2 instruction is out of range");
- return false;
- }
-
- // A6.3 32-bit Thumb instruction encoding
-
- uint16_t op1 = slice(HalfWord, 12, 11);
- uint16_t op2 = slice(HalfWord, 10, 4);
- uint16_t op = slice(insn, 15, 15);
-
- return DisassembleThumb2(op1, op2, op, MI, Opcode, insn, NumOps, NumOpsAdded,
- Builder);
-}
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 78d3e47..ccdac3e 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -12,9 +12,9 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "asm-printer"
-#include "ARMBaseInfo.h"
#include "ARMInstPrinter.h"
-#include "ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
@@ -25,6 +25,23 @@ using namespace llvm;
#define GET_INSTRUCTION_NAME
#include "ARMGenAsmWriter.inc"
+/// translateShiftImm - Convert shift immediate from 0-31 to 1-32 for printing.
+///
+/// getSORegOffset returns an integer from 0-31, representing '32' as 0.
+static unsigned translateShiftImm(unsigned imm) {
+ if (imm == 0)
+ return 32;
+ return imm;
+}
+
+
+ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI,
+ const MCSubtargetInfo &STI) :
+ MCInstPrinter(MAI) {
+ // Initialize the set of available features.
+ setAvailableFeatures(STI.getFeatureBits());
+}
+
StringRef ARMInstPrinter::getOpcodeName(unsigned Opcode) const {
return getInstructionName(Opcode);
}
@@ -33,11 +50,12 @@ void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
OS << getRegisterName(RegNo);
}
-void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
unsigned Opcode = MI->getOpcode();
// Check for MOVs and print canonical forms, instead.
- if (Opcode == ARM::MOVs) {
+ if (Opcode == ARM::MOVsr) {
// FIXME: Thumb variants?
const MCOperand &Dst = MI->getOperand(0);
const MCOperand &MO1 = MI->getOperand(1);
@@ -51,20 +69,36 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
O << '\t' << getRegisterName(Dst.getReg())
<< ", " << getRegisterName(MO1.getReg());
- if (ARM_AM::getSORegShOp(MO3.getImm()) == ARM_AM::rrx)
- return;
+ O << ", " << getRegisterName(MO2.getReg());
+ assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+ printAnnotation(O, Annot);
+ return;
+ }
- O << ", ";
+ if (Opcode == ARM::MOVsi) {
+ // FIXME: Thumb variants?
+ const MCOperand &Dst = MI->getOperand(0);
+ const MCOperand &MO1 = MI->getOperand(1);
+ const MCOperand &MO2 = MI->getOperand(2);
+
+ O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()));
+ printSBitModifierOperand(MI, 5, O);
+ printPredicateOperand(MI, 3, O);
- if (MO2.getReg()) {
- O << getRegisterName(MO2.getReg());
- assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
- } else {
- O << "#" << ARM_AM::getSORegOffset(MO3.getImm());
+ O << '\t' << getRegisterName(Dst.getReg())
+ << ", " << getRegisterName(MO1.getReg());
+
+ if (ARM_AM::getSORegShOp(MO2.getImm()) == ARM_AM::rrx) {
+ printAnnotation(O, Annot);
+ return;
}
+
+ O << ", #" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm()));
+ printAnnotation(O, Annot);
return;
}
+
// A8.6.123 PUSH
if ((Opcode == ARM::STMDB_UPD || Opcode == ARM::t2STMDB_UPD) &&
MI->getOperand(0).getReg() == ARM::SP) {
@@ -74,6 +108,15 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
O << ".w";
O << '\t';
printRegisterList(MI, 4, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+ if (Opcode == ARM::STR_PRE_IMM && MI->getOperand(2).getReg() == ARM::SP &&
+ MI->getOperand(3).getImm() == -4) {
+ O << '\t' << "push";
+ printPredicateOperand(MI, 4, O);
+ O << "\t{" << getRegisterName(MI->getOperand(1).getReg()) << "}";
+ printAnnotation(O, Annot);
return;
}
@@ -86,8 +129,18 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
O << ".w";
O << '\t';
printRegisterList(MI, 4, O);
+ printAnnotation(O, Annot);
return;
}
+ if (Opcode == ARM::LDR_POST_IMM && MI->getOperand(2).getReg() == ARM::SP &&
+ MI->getOperand(4).getImm() == 4) {
+ O << '\t' << "pop";
+ printPredicateOperand(MI, 5, O);
+ O << "\t{" << getRegisterName(MI->getOperand(0).getReg()) << "}";
+ printAnnotation(O, Annot);
+ return;
+ }
+
// A8.6.355 VPUSH
if ((Opcode == ARM::VSTMSDB_UPD || Opcode == ARM::VSTMDDB_UPD) &&
@@ -96,6 +149,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
printPredicateOperand(MI, 2, O);
O << '\t';
printRegisterList(MI, 4, O);
+ printAnnotation(O, Annot);
return;
}
@@ -106,10 +160,40 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
printPredicateOperand(MI, 2, O);
O << '\t';
printRegisterList(MI, 4, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+
+ if (Opcode == ARM::tLDMIA) {
+ bool Writeback = true;
+ unsigned BaseReg = MI->getOperand(0).getReg();
+ for (unsigned i = 3; i < MI->getNumOperands(); ++i) {
+ if (MI->getOperand(i).getReg() == BaseReg)
+ Writeback = false;
+ }
+
+ O << "\tldm";
+
+ printPredicateOperand(MI, 1, O);
+ O << '\t' << getRegisterName(BaseReg);
+ if (Writeback) O << "!";
+ O << ", ";
+ printRegisterList(MI, 3, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+
+ // Thumb1 NOP
+ if (Opcode == ARM::tMOVr && MI->getOperand(0).getReg() == ARM::R8 &&
+ MI->getOperand(1).getReg() == ARM::R8) {
+ O << "\tnop";
+ printPredicateOperand(MI, 2, O);
+ printAnnotation(O, Annot);
return;
}
printInstruction(MI, O);
+ printAnnotation(O, Annot);
}
void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
@@ -122,16 +206,38 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
O << '#' << Op.getImm();
} else {
assert(Op.isExpr() && "unknown operand kind in printOperand");
- O << *Op.getExpr();
+ // If a symbolic branch target was added as a constant expression then print
+ // that address in hex.
+ const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr());
+ int64_t Address;
+ if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
+ O << "0x";
+ O.write_hex(Address);
+ }
+ else {
+ // Otherwise, just print the expression.
+ O << *Op.getExpr();
+ }
}
}
+void ARMInstPrinter::printT2LdrLabelOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ if (MO1.isExpr())
+ O << *MO1.getExpr();
+ else if (MO1.isImm())
+ O << "[pc, #" << MO1.getImm() << "]";
+ else
+ llvm_unreachable("Unknown LDR label operand?");
+}
+
// so_reg is a 4-operand unit corresponding to register forms of the A5.1
// "Addressing Mode 1 - Data-processing operands" forms. This includes:
// REG 0 0 - e.g. R5
// REG REG 0,SH_OPC - e.g. R5, ROR R3
// REG 0 IMM,SH_OPC - e.g. R5, LSL #3
-void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum,
+void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum+1);
@@ -144,14 +250,27 @@ void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum,
O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
if (ShOpc == ARM_AM::rrx)
return;
- if (MO2.getReg()) {
- O << ' ' << getRegisterName(MO2.getReg());
- assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
- } else if (ShOpc != ARM_AM::rrx) {
- O << " #" << ARM_AM::getSORegOffset(MO3.getImm());
- }
+
+ O << ' ' << getRegisterName(MO2.getReg());
+ assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+}
+
+void ARMInstPrinter::printSORegImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ O << getRegisterName(MO1.getReg());
+
+ // Print the shift opc.
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm());
+ O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
+ if (ShOpc == ARM_AM::rrx)
+ return;
+ O << " #" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm()));
}
+
//===--------------------------------------------------------------------===//
// Addressing Mode #2
//===--------------------------------------------------------------------===//
@@ -209,6 +328,22 @@ void ARMInstPrinter::printAM2PostIndexOp(const MCInst *MI, unsigned Op,
<< " #" << ShImm;
}
+void ARMInstPrinter::printAddrModeTBB(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op+1);
+ O << "[" << getRegisterName(MO1.getReg()) << ", "
+ << getRegisterName(MO2.getReg()) << "]";
+}
+
+void ARMInstPrinter::printAddrModeTBH(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op+1);
+ O << "[" << getRegisterName(MO1.getReg()) << ", "
+ << getRegisterName(MO2.getReg()) << ", lsl #1]";
+}
+
void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(Op);
@@ -284,7 +419,7 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
O << '[' << getRegisterName(MO1.getReg());
if (MO2.getReg()) {
- O << ", " << (char)ARM_AM::getAM3Op(MO3.getImm())
+ O << ", " << getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
<< getRegisterName(MO2.getReg()) << ']';
return;
}
@@ -315,8 +450,8 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
const MCOperand &MO2 = MI->getOperand(OpNum+1);
if (MO1.getReg()) {
- O << (char)ARM_AM::getAM3Op(MO2.getImm())
- << getRegisterName(MO1.getReg());
+ O << getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()))
+ << getRegisterName(MO1.getReg());
return;
}
@@ -326,6 +461,31 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
<< ImmOffs;
}
+void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ unsigned Imm = MO.getImm();
+ O << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff);
+}
+
+void ARMInstPrinter::printPostIdxRegOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ O << (MO2.getImm() ? "" : "-") << getRegisterName(MO1.getReg());
+}
+
+void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ unsigned Imm = MO.getImm();
+ O << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2);
+}
+
+
void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(OpNum)
@@ -345,7 +505,9 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
O << "[" << getRegisterName(MO1.getReg());
- if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
+ unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm());
+ unsigned Op = ARM_AM::getAM5Op(MO2.getImm());
+ if (ImmOffs || Op == ARM_AM::sub) {
O << ", #"
<< ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm()))
<< ImmOffs * 4;
@@ -402,20 +564,31 @@ void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
unsigned ShiftOp = MI->getOperand(OpNum).getImm();
- ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
- switch (Opc) {
- case ARM_AM::no_shift:
+ bool isASR = (ShiftOp & (1 << 5)) != 0;
+ unsigned Amt = ShiftOp & 0x1f;
+ if (isASR)
+ O << ", asr #" << (Amt == 0 ? 32 : Amt);
+ else if (Amt)
+ O << ", lsl #" << Amt;
+}
+
+void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNum).getImm();
+ if (Imm == 0)
return;
- case ARM_AM::lsl:
- O << ", lsl #";
- break;
- case ARM_AM::asr:
- O << ", asr #";
- break;
- default:
- assert(0 && "unexpected shift opcode for shift immediate operand");
- }
- O << ARM_AM::getSORegOffset(ShiftOp);
+ assert(Imm > 0 && Imm < 32 && "Invalid PKH shift immediate value!");
+ O << ", lsl #" << Imm;
+}
+
+void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNum).getImm();
+ // A shift amount of 32 is encoded as 0.
+ if (Imm == 0)
+ Imm = 32;
+ assert(Imm > 0 && Imm <= 32 && "Invalid PKH shift immediate value!");
+ O << ", asr #" << Imm;
}
void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
@@ -450,6 +623,9 @@ void ARMInstPrinter::printCPSIFlag(const MCInst *MI, unsigned OpNum,
for (int i=2; i >= 0; --i)
if (IFlags & (1 << i))
O << ARM_PROC::IFlagsToString(1 << i);
+
+ if (IFlags == 0)
+ O << "none";
}
void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
@@ -458,10 +634,43 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
unsigned SpecRegRBit = Op.getImm() >> 4;
unsigned Mask = Op.getImm() & 0xf;
+ if (getAvailableFeatures() & ARM::FeatureMClass) {
+ switch (Op.getImm()) {
+ default: assert(0 && "Unexpected mask value!");
+ case 0: O << "apsr"; return;
+ case 1: O << "iapsr"; return;
+ case 2: O << "eapsr"; return;
+ case 3: O << "xpsr"; return;
+ case 5: O << "ipsr"; return;
+ case 6: O << "epsr"; return;
+ case 7: O << "iepsr"; return;
+ case 8: O << "msp"; return;
+ case 9: O << "psp"; return;
+ case 16: O << "primask"; return;
+ case 17: O << "basepri"; return;
+ case 18: O << "basepri_max"; return;
+ case 19: O << "faultmask"; return;
+ case 20: O << "control"; return;
+ }
+ }
+
+ // As special cases, CPSR_f, CPSR_s and CPSR_fs prefer printing as
+ // APSR_nzcvq, APSR_g and APSRnzcvqg, respectively.
+ if (!SpecRegRBit && (Mask == 8 || Mask == 4 || Mask == 12)) {
+ O << "APSR_";
+ switch (Mask) {
+ default: assert(0);
+ case 4: O << "g"; return;
+ case 8: O << "nzcvq"; return;
+ case 12: O << "nzcvqg"; return;
+ }
+ llvm_unreachable("Unexpected mask value!");
+ }
+
if (SpecRegRBit)
- O << "spsr";
+ O << "SPSR";
else
- O << "cpsr";
+ O << "CPSR";
if (Mask) {
O << '_';
@@ -501,15 +710,20 @@ void ARMInstPrinter::printNoHashImmediate(const MCInst *MI, unsigned OpNum,
}
void ARMInstPrinter::printPImmediate(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
+ raw_ostream &O) {
O << "p" << MI->getOperand(OpNum).getImm();
}
void ARMInstPrinter::printCImmediate(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
+ raw_ostream &O) {
O << "c" << MI->getOperand(OpNum).getImm();
}
+void ARMInstPrinter::printCoprocOptionImm(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "{" << MI->getOperand(OpNum).getImm() << "}";
+}
+
void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
llvm_unreachable("Unhandled PC-relative pseudo-instruction!");
@@ -517,7 +731,13 @@ void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum,
void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
- O << "#" << MI->getOperand(OpNum).getImm() * 4;
+ O << "#" << MI->getOperand(OpNum).getImm() * 4;
+}
+
+void ARMInstPrinter::printThumbSRImm(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNum).getImm();
+ O << "#" << (Imm == 0 ? 32 : Imm);
}
void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
@@ -610,7 +830,7 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm());
O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
if (ShOpc != ARM_AM::rrx)
- O << " #" << ARM_AM::getSORegOffset(MO2.getImm());
+ O << " #" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm()));
}
void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
@@ -647,7 +867,9 @@ void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
int32_t OffImm = (int32_t)MO2.getImm();
// Don't print +0.
- if (OffImm < 0)
+ if (OffImm == INT32_MIN)
+ O << ", #-0";
+ else if (OffImm < 0)
O << ", #-" << -OffImm;
else if (OffImm > 0)
O << ", #" << OffImm;
@@ -671,6 +893,18 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
O << "]";
}
+void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ O << "[" << getRegisterName(MO1.getReg());
+ if (MO2.getImm())
+ O << ", #" << MO2.getImm() * 4;
+ O << "]";
+}
+
void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
unsigned OpNum,
raw_ostream &O) {
@@ -678,9 +912,9 @@ void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
int32_t OffImm = (int32_t)MO1.getImm();
// Don't print +0.
if (OffImm < 0)
- O << "#-" << -OffImm;
- else if (OffImm > 0)
- O << "#" << OffImm;
+ O << ", #-" << -OffImm;
+ else
+ O << ", #" << OffImm;
}
void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
@@ -689,10 +923,13 @@ void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
const MCOperand &MO1 = MI->getOperand(OpNum);
int32_t OffImm = (int32_t)MO1.getImm() / 4;
// Don't print +0.
- if (OffImm < 0)
- O << "#-" << -OffImm * 4;
- else if (OffImm > 0)
- O << "#" << OffImm * 4;
+ if (OffImm != 0) {
+ O << ", ";
+ if (OffImm < 0)
+ O << "#-" << -OffImm * 4;
+ else if (OffImm > 0)
+ O << "#" << OffImm * 4;
+ }
}
void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
@@ -715,39 +952,10 @@ void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
O << "]";
}
-void ARMInstPrinter::printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- const MCOperand &MO = MI->getOperand(OpNum);
- O << '#';
- if (MO.isFPImm()) {
- O << (float)MO.getFPImm();
- } else {
- union {
- uint32_t I;
- float F;
- } FPUnion;
-
- FPUnion.I = MO.getImm();
- O << FPUnion.F;
- }
-}
-
-void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
+void ARMInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
- O << '#';
- if (MO.isFPImm()) {
- O << MO.getFPImm();
- } else {
- // We expect the binary encoding of a floating point number here.
- union {
- uint64_t I;
- double D;
- } FPUnion;
-
- FPUnion.I = MO.getImm();
- O << FPUnion.D;
- }
+ O << '#' << ARM_AM::getFPImmFloat(MO.getImm());
}
void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
@@ -757,3 +965,28 @@ void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
O << "#0x" << utohexstr(Val);
}
+
+void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNum).getImm();
+ O << "#" << Imm + 1;
+}
+
+void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNum).getImm();
+ if (Imm == 0)
+ return;
+ O << ", ror #";
+ switch (Imm) {
+ default: assert (0 && "illegal ror immediate!");
+ case 1: O << "8"; break;
+ case 2: O << "16"; break;
+ case 3: O << "24"; break;
+ }
+}
+
+void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "[" << MI->getOperand(OpNum).getImm() << "]";
+}
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index d5f238b..5c2173f 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -15,6 +15,7 @@
#define ARMINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
namespace llvm {
@@ -22,10 +23,9 @@ class MCOperand;
class ARMInstPrinter : public MCInstPrinter {
public:
- ARMInstPrinter(const MCAsmInfo &MAI)
- : MCInstPrinter(MAI) {}
+ ARMInstPrinter(const MCAsmInfo &MAI, const MCSubtargetInfo &STI);
- virtual void printInst(const MCInst *MI, raw_ostream &O);
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
virtual StringRef getOpcodeName(unsigned Opcode) const;
virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
@@ -38,8 +38,11 @@ public:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printSORegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printSORegRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printSORegImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrModeTBB(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrModeTBH(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printAM2PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum,
@@ -48,11 +51,15 @@ public:
raw_ostream &O);
void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printAM3PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
+ void printAM3PostIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O);
+ void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,raw_ostream &O);
+ void printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printPostIdxImm8s4Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
@@ -65,8 +72,11 @@ public:
raw_ostream &O);
void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printThumbSRImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
@@ -88,6 +98,8 @@ public:
raw_ostream &O);
void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
+ void printT2AddrModeImm0_1020s4Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum,
@@ -108,11 +120,15 @@ public:
void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printPImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printCImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printCoprocOptionImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printRotImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printT2LdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
};
} // end namespace llvm
diff --git a/lib/Target/ARM/InstPrinter/CMakeLists.txt b/lib/Target/ARM/InstPrinter/CMakeLists.txt
index 18645c0..fa0b495 100644
--- a/lib/Target/ARM/InstPrinter/CMakeLists.txt
+++ b/lib/Target/ARM/InstPrinter/CMakeLists.txt
@@ -3,4 +3,10 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
add_llvm_library(LLVMARMAsmPrinter
ARMInstPrinter.cpp
)
-add_dependencies(LLVMARMAsmPrinter ARMCodeGenTable_gen)
+
+add_dependencies(LLVMARMAsmPrinter ARMCommonTableGen)
+
+add_llvm_library_dependencies(LLVMARMAsmPrinter
+ LLVMMC
+ LLVMSupport
+ )
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
index 595708f..9982fa6 100644
--- a/lib/Target/ARM/ARMAddressingModes.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
@@ -14,7 +14,8 @@
#ifndef LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
#define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
-#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/Support/MathExtras.h"
#include <cassert>
@@ -32,7 +33,8 @@ namespace ARM_AM {
};
enum AddrOpc {
- add = '+', sub = '-'
+ sub = 0,
+ add
};
static inline const char *getAddrOpcStr(AddrOpc Op) {
@@ -60,20 +62,6 @@ namespace ARM_AM {
}
}
- static inline ShiftOpc getShiftOpcForNode(SDValue N) {
- switch (N.getOpcode()) {
- default: return ARM_AM::no_shift;
- case ISD::SHL: return ARM_AM::lsl;
- case ISD::SRL: return ARM_AM::lsr;
- case ISD::SRA: return ARM_AM::asr;
- case ISD::ROTR: return ARM_AM::ror;
- //case ISD::ROTL: // Only if imm -> turn into ROTR.
- // Can't handle RRX here, because it would require folding a flag into
- // the addressing mode. :( This causes us to miss certain things.
- //case ARMISD::RRX: return ARM_AM::rrx;
- }
- }
-
enum AMSubMode {
bad_am_submode = 0,
ia,
@@ -588,6 +576,90 @@ namespace ARM_AM {
AMSubMode getLoadStoreMultipleSubMode(int Opcode);
+ //===--------------------------------------------------------------------===//
+ // Floating-point Immediates
+ //
+ static inline float getFPImmFloat(unsigned Imm) {
+ // We expect an 8-bit binary encoding of a floating-point number here.
+ union {
+ uint32_t I;
+ float F;
+ } FPUnion;
+
+ uint8_t Sign = (Imm >> 7) & 0x1;
+ uint8_t Exp = (Imm >> 4) & 0x7;
+ uint8_t Mantissa = Imm & 0xf;
+
+ // 8-bit FP iEEEE Float Encoding
+ // abcd efgh aBbbbbbc defgh000 00000000 00000000
+ //
+ // where B = NOT(b);
+
+ FPUnion.I = 0;
+ FPUnion.I |= Sign << 31;
+ FPUnion.I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30;
+ FPUnion.I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25;
+ FPUnion.I |= (Exp & 0x3) << 23;
+ FPUnion.I |= Mantissa << 19;
+ return FPUnion.F;
+ }
+
+ /// getFP32Imm - Return an 8-bit floating-point version of the 32-bit
+ /// floating-point value. If the value cannot be represented as an 8-bit
+ /// floating-point value, then return -1.
+ static inline int getFP32Imm(const APInt &Imm) {
+ uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
+ int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127
+ int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits
+
+ // We can handle 4 bits of mantissa.
+ // mantissa = (16+UInt(e:f:g:h))/16.
+ if (Mantissa & 0x7ffff)
+ return -1;
+ Mantissa >>= 19;
+ if ((Mantissa & 0xf) != Mantissa)
+ return -1;
+
+ // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
+ if (Exp < -3 || Exp > 4)
+ return -1;
+ Exp = ((Exp+3) & 0x7) ^ 4;
+
+ return ((int)Sign << 7) | (Exp << 4) | Mantissa;
+ }
+
+ static inline int getFP32Imm(const APFloat &FPImm) {
+ return getFP32Imm(FPImm.bitcastToAPInt());
+ }
+
+ /// getFP64Imm - Return an 8-bit floating-point version of the 64-bit
+ /// floating-point value. If the value cannot be represented as an 8-bit
+ /// floating-point value, then return -1.
+ static inline int getFP64Imm(const APInt &Imm) {
+ uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
+ int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023
+ uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffULL;
+
+ // We can handle 4 bits of mantissa.
+ // mantissa = (16+UInt(e:f:g:h))/16.
+ if (Mantissa & 0xffffffffffffULL)
+ return -1;
+ Mantissa >>= 48;
+ if ((Mantissa & 0xf) != Mantissa)
+ return -1;
+
+ // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
+ if (Exp < -3 || Exp > 4)
+ return -1;
+ Exp = ((Exp+3) & 0x7) ^ 4;
+
+ return ((int)Sign << 7) | (Exp << 4) | Mantissa;
+ }
+
+ static inline int getFP64Imm(const APFloat &FPImm) {
+ return getFP64Imm(FPImm.bitcastToAPInt());
+ }
+
} // end namespace ARM_AM
} // end namespace llvm
diff --git a/lib/Target/ARM/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 5e438a9..c31c5e6 100644
--- a/lib/Target/ARM/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -7,9 +7,10 @@
//
//===----------------------------------------------------------------------===//
-#include "ARM.h"
-#include "ARMAddressingModes.h"
-#include "ARMFixupKinds.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMFixupKinds.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCDirectives.h"
@@ -19,12 +20,12 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetAsmBackend.h"
-#include "llvm/Target/TargetRegistry.h"
using namespace llvm;
namespace {
@@ -35,13 +36,24 @@ public:
/*HasRelocationAddend*/ false) {}
};
-class ARMAsmBackend : public TargetAsmBackend {
+class ARMAsmBackend : public MCAsmBackend {
+ const MCSubtargetInfo* STI;
bool isThumbMode; // Currently emitting Thumb code.
public:
- ARMAsmBackend(const Target &T) : TargetAsmBackend(), isThumbMode(false) {}
+ ARMAsmBackend(const Target &T, const StringRef TT)
+ : MCAsmBackend(), STI(ARM_MC::createARMMCSubtargetInfo(TT, "", "")),
+ isThumbMode(TT.startswith("thumb")) {}
+
+ ~ARMAsmBackend() {
+ delete STI;
+ }
unsigned getNumFixupKinds() const { return ARM::NumTargetFixupKinds; }
+ bool hasNOP() const {
+ return (STI->getFeatureBits() & ARM::HasV6T2Ops) != 0;
+ }
+
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
const static MCFixupKindInfo Infos[ARM::NumTargetFixupKinds] = {
// This table *must* be in the order that the fixup_* kinds are defined in
@@ -65,9 +77,9 @@ public:
{ "fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_blx", 7, 21, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_cp", 1, 8, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_cp", 0, 8, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_arm_thumb_bcc", 0, 8, MCFixupKindInfo::FKF_IsPCRel },
// movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 - 19.
{ "fixup_arm_movt_hi16", 0, 20, 0 },
@@ -81,7 +93,7 @@ public:
};
if (Kind < FirstTargetFixupKind)
- return TargetAsmBackend::getFixupKindInfo(Kind);
+ return MCAsmBackend::getFixupKindInfo(Kind);
assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
"Invalid kind!");
@@ -123,20 +135,28 @@ void ARMAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
}
bool ARMAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+ const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8
+ const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP
+ const uint32_t ARMv4_NopEncoding = 0xe1a0000; // using MOV r0,r0
+ const uint32_t ARMv6T2_NopEncoding = 0xe3207800; // NOP
if (isThumb()) {
- // FIXME: 0xbf00 is the ARMv7 value. For v6 and before, we'll need to
- // use 0x46c0 (which is a 'mov r8, r8' insn).
+ const uint16_t nopEncoding = hasNOP() ? Thumb2_16bitNopEncoding
+ : Thumb1_16bitNopEncoding;
uint64_t NumNops = Count / 2;
for (uint64_t i = 0; i != NumNops; ++i)
- OW->Write16(0xbf00);
+ OW->Write16(nopEncoding);
if (Count & 1)
OW->Write8(0);
return true;
}
// ARM mode
+ const uint32_t nopEncoding = hasNOP() ? ARMv6T2_NopEncoding
+ : ARMv4_NopEncoding;
uint64_t NumNops = Count / 4;
for (uint64_t i = 0; i != NumNops; ++i)
- OW->Write32(0xe1a00000);
+ OW->Write32(nopEncoding);
+ // FIXME: should this function return false when unable to write exactly
+ // 'Count' bytes with NOP encodings?
switch (Count % 4) {
default: break; // No leftover bytes to write
case 1: OW->Write8(0); break;
@@ -163,8 +183,6 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case ARM::fixup_arm_movw_lo16_pcrel: {
unsigned Hi4 = (Value & 0xF000) >> 12;
unsigned Lo12 = Value & 0x0FFF;
- assert ((((int64_t)Value) >= -0x8000) && (((int64_t)Value) <= 0x7fff) &&
- "Out of range pc-relative fixup value!");
// inst{19-16} = Hi4;
// inst{11-0} = Lo12;
Value = (Hi4 << 16) | (Lo12);
@@ -185,10 +203,6 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
// inst{26} = i;
// inst{14-12} = Mid3;
// inst{7-0} = Lo8;
- // The value comes in as the whole thing, not just the portion required
- // for this fixup, so we need to mask off the bits not handled by this
- // portion (lo vs. hi).
- Value &= 0xffff;
Value = (Hi4 << 16) | (i << 26) | (Mid3 << 12) | (Lo8);
uint64_t swapped = (Value & 0xFFFF0000) >> 16;
swapped |= (Value & 0x0000FFFF) << 16;
@@ -382,8 +396,9 @@ namespace {
class ELFARMAsmBackend : public ARMAsmBackend {
public:
Triple::OSType OSType;
- ELFARMAsmBackend(const Target &T, Triple::OSType _OSType)
- : ARMAsmBackend(T), OSType(_OSType) { }
+ ELFARMAsmBackend(const Target &T, const StringRef TT,
+ Triple::OSType _OSType)
+ : ARMAsmBackend(T, TT), OSType(_OSType) { }
void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value) const;
@@ -414,8 +429,9 @@ void ELFARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
class DarwinARMAsmBackend : public ARMAsmBackend {
public:
const object::mach::CPUSubtypeARM Subtype;
- DarwinARMAsmBackend(const Target &T, object::mach::CPUSubtypeARM st)
- : ARMAsmBackend(T), Subtype(st) { }
+ DarwinARMAsmBackend(const Target &T, const StringRef TT,
+ object::mach::CPUSubtypeARM st)
+ : ARMAsmBackend(T, TT), Subtype(st) { }
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
return createARMMachObjectWriter(OS, /*Is64Bit=*/false,
@@ -492,25 +508,24 @@ void DarwinARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
} // end anonymous namespace
-TargetAsmBackend *llvm::createARMAsmBackend(const Target &T,
- const std::string &TT) {
+MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT) {
Triple TheTriple(TT);
if (TheTriple.isOSDarwin()) {
if (TheTriple.getArchName() == "armv4t" ||
TheTriple.getArchName() == "thumbv4t")
- return new DarwinARMAsmBackend(T, object::mach::CSARM_V4T);
+ return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V4T);
else if (TheTriple.getArchName() == "armv5e" ||
TheTriple.getArchName() == "thumbv5e")
- return new DarwinARMAsmBackend(T, object::mach::CSARM_V5TEJ);
+ return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V5TEJ);
else if (TheTriple.getArchName() == "armv6" ||
TheTriple.getArchName() == "thumbv6")
- return new DarwinARMAsmBackend(T, object::mach::CSARM_V6);
- return new DarwinARMAsmBackend(T, object::mach::CSARM_V7);
+ return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V6);
+ return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7);
}
if (TheTriple.isOSWindows())
assert(0 && "Windows not supported on ARM");
- return new ELFARMAsmBackend(T, Triple(TT).getOS());
+ return new ELFARMAsmBackend(T, TT, Triple(TT).getOS());
}
diff --git a/lib/Target/ARM/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index 458f7dd..ec4b6ff 100644
--- a/lib/Target/ARM/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -17,12 +17,9 @@
#ifndef ARMBASEINFO_H
#define ARMBASEINFO_H
-#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "ARMMCTargetDesc.h"
#include "llvm/Support/ErrorHandling.h"
-// Note that the following auto-generated files only defined enum types, and
-// so are safe to include here.
-
namespace llvm {
// Enums corresponding to ARM condition codes
@@ -191,6 +188,22 @@ inline static unsigned getARMRegisterNumbering(unsigned Reg) {
}
}
+/// isARMLowRegister - Returns true if the register is a low register (r0-r7).
+///
+static inline bool isARMLowRegister(unsigned Reg) {
+ using namespace ARM;
+ switch (Reg) {
+ case R0: case R1: case R2: case R3:
+ case R4: case R5: case R6: case R7:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// ARMII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
namespace ARMII {
/// ARM Index Modes
@@ -287,6 +300,148 @@ namespace ARMII {
/// call operand.
MO_PLT
};
+
+ enum {
+ //===------------------------------------------------------------------===//
+ // Instruction Flags.
+
+ //===------------------------------------------------------------------===//
+ // This four-bit field describes the addressing mode used.
+ AddrModeMask = 0x1f, // The AddrMode enums are declared in ARMBaseInfo.h
+
+ // IndexMode - Unindex, pre-indexed, or post-indexed are valid for load
+ // and store ops only. Generic "updating" flag is used for ld/st multiple.
+ // The index mode enums are declared in ARMBaseInfo.h
+ IndexModeShift = 5,
+ IndexModeMask = 3 << IndexModeShift,
+
+ //===------------------------------------------------------------------===//
+ // Instruction encoding formats.
+ //
+ FormShift = 7,
+ FormMask = 0x3f << FormShift,
+
+ // Pseudo instructions
+ Pseudo = 0 << FormShift,
+
+ // Multiply instructions
+ MulFrm = 1 << FormShift,
+
+ // Branch instructions
+ BrFrm = 2 << FormShift,
+ BrMiscFrm = 3 << FormShift,
+
+ // Data Processing instructions
+ DPFrm = 4 << FormShift,
+ DPSoRegFrm = 5 << FormShift,
+
+ // Load and Store
+ LdFrm = 6 << FormShift,
+ StFrm = 7 << FormShift,
+ LdMiscFrm = 8 << FormShift,
+ StMiscFrm = 9 << FormShift,
+ LdStMulFrm = 10 << FormShift,
+
+ LdStExFrm = 11 << FormShift,
+
+ // Miscellaneous arithmetic instructions
+ ArithMiscFrm = 12 << FormShift,
+ SatFrm = 13 << FormShift,
+
+ // Extend instructions
+ ExtFrm = 14 << FormShift,
+
+ // VFP formats
+ VFPUnaryFrm = 15 << FormShift,
+ VFPBinaryFrm = 16 << FormShift,
+ VFPConv1Frm = 17 << FormShift,
+ VFPConv2Frm = 18 << FormShift,
+ VFPConv3Frm = 19 << FormShift,
+ VFPConv4Frm = 20 << FormShift,
+ VFPConv5Frm = 21 << FormShift,
+ VFPLdStFrm = 22 << FormShift,
+ VFPLdStMulFrm = 23 << FormShift,
+ VFPMiscFrm = 24 << FormShift,
+
+ // Thumb format
+ ThumbFrm = 25 << FormShift,
+
+ // Miscelleaneous format
+ MiscFrm = 26 << FormShift,
+
+ // NEON formats
+ NGetLnFrm = 27 << FormShift,
+ NSetLnFrm = 28 << FormShift,
+ NDupFrm = 29 << FormShift,
+ NLdStFrm = 30 << FormShift,
+ N1RegModImmFrm= 31 << FormShift,
+ N2RegFrm = 32 << FormShift,
+ NVCVTFrm = 33 << FormShift,
+ NVDupLnFrm = 34 << FormShift,
+ N2RegVShLFrm = 35 << FormShift,
+ N2RegVShRFrm = 36 << FormShift,
+ N3RegFrm = 37 << FormShift,
+ N3RegVShFrm = 38 << FormShift,
+ NVExtFrm = 39 << FormShift,
+ NVMulSLFrm = 40 << FormShift,
+ NVTBLFrm = 41 << FormShift,
+
+ //===------------------------------------------------------------------===//
+ // Misc flags.
+
+ // UnaryDP - Indicates this is a unary data processing instruction, i.e.
+ // it doesn't have a Rn operand.
+ UnaryDP = 1 << 13,
+
+ // Xform16Bit - Indicates this Thumb2 instruction may be transformed into
+ // a 16-bit Thumb instruction if certain conditions are met.
+ Xform16Bit = 1 << 14,
+
+ // ThumbArithFlagSetting - The instruction is a 16-bit flag setting Thumb
+ // instruction. Used by the parser to determine whether to require the 'S'
+ // suffix on the mnemonic (when not in an IT block) or preclude it (when
+ // in an IT block).
+ ThumbArithFlagSetting = 1 << 18,
+
+ //===------------------------------------------------------------------===//
+ // Code domain.
+ DomainShift = 15,
+ DomainMask = 7 << DomainShift,
+ DomainGeneral = 0 << DomainShift,
+ DomainVFP = 1 << DomainShift,
+ DomainNEON = 2 << DomainShift,
+ DomainNEONA8 = 4 << DomainShift,
+
+ //===------------------------------------------------------------------===//
+ // Field shifts - such shifts are used to set field while generating
+ // machine instructions.
+ //
+ // FIXME: This list will need adjusting/fixing as the MC code emitter
+ // takes shape and the ARMCodeEmitter.cpp bits go away.
+ ShiftTypeShift = 4,
+
+ M_BitShift = 5,
+ ShiftImmShift = 5,
+ ShiftShift = 7,
+ N_BitShift = 7,
+ ImmHiShift = 8,
+ SoRotImmShift = 8,
+ RegRsShift = 8,
+ ExtRotImmShift = 10,
+ RegRdLoShift = 12,
+ RegRdShift = 12,
+ RegRdHiShift = 16,
+ RegRnShift = 16,
+ S_BitShift = 20,
+ W_BitShift = 21,
+ AM3_I_BitShift = 22,
+ D_BitShift = 22,
+ U_BitShift = 23,
+ P_BitShift = 24,
+ I_BitShift = 25,
+ CondShift = 28
+ };
+
} // end namespace ARMII
} // end namespace llvm;
diff --git a/lib/Target/ARM/ARMFixupKinds.h b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
index 350c92d..350c92d 100644
--- a/lib/Target/ARM/ARMFixupKinds.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 53b4c95..1c109e0 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -52,6 +52,9 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() {
AsmTransCBE = arm_asm_table;
Data64bitsDirective = 0;
CommentString = "@";
+ Code16Directive = ".code\t16";
+ Code32Directive = ".code\t32";
+
SupportsDebugInformation = true;
// Exceptions handling
@@ -64,12 +67,14 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo() {
Data64bitsDirective = 0;
CommentString = "@";
-
- HasLEB128 = true;
PrivateGlobalPrefix = ".L";
+ Code16Directive = ".code\t16";
+ Code32Directive = ".code\t32";
+
WeakRefDirective = "\t.weak\t";
- HasLCOMMDirective = true;
+ LCOMMDirectiveType = LCOMM::NoAlignment;
+ HasLEB128 = true;
SupportsDebugInformation = true;
// Exceptions handling
diff --git a/lib/Target/ARM/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 39be3f0..865c3e2 100644
--- a/lib/Target/ARM/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -12,17 +12,18 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mccodeemitter"
-#include "ARM.h"
-#include "ARMAddressingModes.h"
-#include "ARMFixupKinds.h"
-#include "ARMInstrInfo.h"
-#include "ARMMCExpr.h"
-#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMFixupKinds.h"
+#include "MCTargetDesc/ARMMCExpr.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/raw_ostream.h"
@@ -112,11 +113,13 @@ public:
/// immediate Thumb2 direct branch target.
uint32_t getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const;
-
+
/// getARMBranchTargetOpValue - Return encoding info for 24-bit immediate
/// branch target.
uint32_t getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const;
+ uint32_t getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
/// getAdrLabelOpValue - Return encoding info for 12-bit immediate
/// ADR label target.
@@ -142,6 +145,16 @@ public:
uint32_t getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const;
+ /// getT2AddrModeImm0_1020s4OpValue - Return encoding info for 'reg + imm8<<2'
+ /// operand.
+ uint32_t getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getT2Imm8s4OpValue - Return encoding info for '+/- imm8<<2'
+ /// operand.
+ uint32_t getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
/// getLdStSORegOpValue - Return encoding info for 'reg +/- reg shop imm'
/// operand as needed by load/store instructions.
@@ -183,6 +196,10 @@ public:
uint32_t getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const;
+ /// getPostIdxRegOpValue - Return encoding for postidx_reg operands.
+ uint32_t getPostIdxRegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
/// getAddrMode3OffsetOpValue - Return encoding for am3offset operands.
uint32_t getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const;
@@ -251,27 +268,13 @@ public:
SmallVectorImpl<MCFixup> &Fixups) const;
/// getSORegOpValue - Return an encoded so_reg shifted register value.
- unsigned getSORegOpValue(const MCInst &MI, unsigned Op,
+ unsigned getSORegRegOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getSORegImmOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups) const;
unsigned getT2SORegOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups) const;
- unsigned getRotImmOpValue(const MCInst &MI, unsigned Op,
- SmallVectorImpl<MCFixup> &Fixups) const {
- switch (MI.getOperand(Op).getImm()) {
- default: assert (0 && "Not a valid rot_imm value!");
- case 0: return 0;
- case 8: return 1;
- case 16: return 2;
- case 24: return 3;
- }
- }
-
- unsigned getImmMinusOneOpValue(const MCInst &MI, unsigned Op,
- SmallVectorImpl<MCFixup> &Fixups) const {
- return MI.getOperand(Op).getImm() - 1;
- }
-
unsigned getNEONVcvtImm32OpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups) const {
return 64 - MI.getOperand(Op).getImm();
@@ -280,12 +283,6 @@ public:
unsigned getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups) const;
- unsigned getMsbOpValue(const MCInst &MI, unsigned Op,
- SmallVectorImpl<MCFixup> &Fixups) const;
-
- unsigned getSsatBitPosValue(const MCInst &MI, unsigned Op,
- SmallVectorImpl<MCFixup> &Fixups) const;
-
unsigned getRegisterListOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups) const;
unsigned getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op,
@@ -306,6 +303,9 @@ public:
unsigned getShiftRight64Imm(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getThumbSRImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
unsigned NEONThumb2DataIPostEncoder(const MCInst &MI,
unsigned EncodedValue) const;
unsigned NEONThumb2LoadStorePostEncoder(const MCInst &MI,
@@ -439,8 +439,10 @@ EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg,
bool isAdd = true;
// Special value for #-0
- if (SImm == INT32_MIN)
+ if (SImm == INT32_MIN) {
SImm = 0;
+ isAdd = false;
+ }
// Immediate is always encoded as positive. The 'U' bit controls add vs sub.
if (SImm < 0) {
@@ -470,11 +472,34 @@ static uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
return 0;
}
+// Thumb BL and BLX use a strange offset encoding where bits 22 and 21 are
+// determined by negating them and XOR'ing them with bit 23.
+static int32_t encodeThumbBLOffset(int32_t offset) {
+ offset >>= 1;
+ uint32_t S = (offset & 0x800000) >> 23;
+ uint32_t J1 = (offset & 0x400000) >> 22;
+ uint32_t J2 = (offset & 0x200000) >> 21;
+ J1 = (~J1 & 0x1);
+ J2 = (~J2 & 0x1);
+ J1 ^= S;
+ J2 ^= S;
+
+ offset &= ~0x600000;
+ offset |= J1 << 22;
+ offset |= J2 << 21;
+
+ return offset;
+}
+
/// getThumbBLTargetOpValue - Return encoding info for immediate branch target.
uint32_t ARMMCCodeEmitter::
getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
- return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bl, Fixups);
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bl,
+ Fixups);
+ return encodeThumbBLOffset(MO.getImm());
}
/// getThumbBLXTargetOpValue - Return encoding info for Thumb immediate
@@ -482,28 +507,43 @@ getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
uint32_t ARMMCCodeEmitter::
getThumbBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
- return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_blx, Fixups);
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_blx,
+ Fixups);
+ return encodeThumbBLOffset(MO.getImm());
}
/// getThumbBRTargetOpValue - Return encoding info for Thumb branch target.
uint32_t ARMMCCodeEmitter::
getThumbBRTargetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
- return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_br, Fixups);
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_br,
+ Fixups);
+ return (MO.getImm() >> 1);
}
/// getThumbBCCTargetOpValue - Return encoding info for Thumb branch target.
uint32_t ARMMCCodeEmitter::
getThumbBCCTargetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
- return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bcc, Fixups);
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bcc,
+ Fixups);
+ return (MO.getImm() >> 1);
}
/// getThumbCBTargetOpValue - Return encoding info for Thumb branch target.
uint32_t ARMMCCodeEmitter::
getThumbCBTargetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
- return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cb, Fixups);
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cb, Fixups);
+ return (MO.getImm() >> 1);
}
/// Return true if this branch has a non-always predication
@@ -513,9 +553,9 @@ static bool HasConditionalBranch(const MCInst &MI) {
for (int i = 0; i < NumOp-1; ++i) {
const MCOperand &MCOp1 = MI.getOperand(i);
const MCOperand &MCOp2 = MI.getOperand(i + 1);
- if (MCOp1.isImm() && MCOp2.isReg() &&
+ if (MCOp1.isImm() && MCOp2.isReg() &&
(MCOp2.getReg() == 0 || MCOp2.getReg() == ARM::CPSR)) {
- if (ARMCC::CondCodes(MCOp1.getImm()) != ARMCC::AL)
+ if (ARMCC::CondCodes(MCOp1.getImm()) != ARMCC::AL)
return true;
}
}
@@ -541,15 +581,32 @@ getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
uint32_t ARMMCCodeEmitter::
getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
- if (HasConditionalBranch(MI))
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr()) {
+ if (HasConditionalBranch(MI))
+ return ::getBranchTargetOpValue(MI, OpIdx,
+ ARM::fixup_arm_condbranch, Fixups);
return ::getBranchTargetOpValue(MI, OpIdx,
- ARM::fixup_arm_condbranch, Fixups);
- return ::getBranchTargetOpValue(MI, OpIdx,
- ARM::fixup_arm_uncondbranch, Fixups);
-}
+ ARM::fixup_arm_uncondbranch, Fixups);
+ }
+ return MO.getImm() >> 2;
+}
+uint32_t ARMMCCodeEmitter::
+getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr()) {
+ if (HasConditionalBranch(MI))
+ return ::getBranchTargetOpValue(MI, OpIdx,
+ ARM::fixup_arm_condbranch, Fixups);
+ return ::getBranchTargetOpValue(MI, OpIdx,
+ ARM::fixup_arm_uncondbranch, Fixups);
+ }
+ return MO.getImm() >> 1;
+}
/// getUnconditionalBranchTargetOpValue - Return encoding info for 24-bit
/// immediate branch target.
@@ -579,9 +636,18 @@ getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
uint32_t ARMMCCodeEmitter::
getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
- assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!");
- return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_adr_pcrel_12,
- Fixups);
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_adr_pcrel_12,
+ Fixups);
+ int32_t offset = MO.getImm();
+ uint32_t Val = 0x2000;
+ if (offset < 0) {
+ Val = 0x1000;
+ offset *= -1;
+ }
+ Val |= offset;
+ return Val;
}
/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
@@ -589,9 +655,16 @@ getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
uint32_t ARMMCCodeEmitter::
getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
- assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!");
- return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_adr_pcrel_12,
- Fixups);
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_adr_pcrel_12,
+ Fixups);
+ int32_t Val = MO.getImm();
+ if (Val < 0) {
+ Val *= -1;
+ Val |= 0x1000;
+ }
+ return Val;
}
/// getAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label
@@ -599,9 +672,11 @@ getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
uint32_t ARMMCCodeEmitter::
getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
- assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!");
- return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_thumb_adr_pcrel_10,
- Fixups);
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_thumb_adr_pcrel_10,
+ Fixups);
+ return MO.getImm();
}
/// getThumbAddrModeRegRegOpValue - Return encoding info for 'reg + reg'
@@ -635,17 +710,26 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
Imm12 = 0;
isAdd = false ; // 'U' bit is set as part of the fixup.
- assert(MO.isExpr() && "Unexpected machine operand type!");
- const MCExpr *Expr = MO.getExpr();
-
- MCFixupKind Kind;
- if (isThumb2())
- Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12);
- else
- Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12);
- Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+ if (MO.isExpr()) {
+ const MCExpr *Expr = MO.getExpr();
- ++MCNumCPRelocations;
+ MCFixupKind Kind;
+ if (isThumb2())
+ Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12);
+ else
+ Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+ ++MCNumCPRelocations;
+ } else {
+ Reg = ARM::PC;
+ int32_t Offset = MO.getImm();
+ if (Offset < 0) {
+ Offset *= -1;
+ isAdd = false;
+ }
+ Imm12 = Offset;
+ }
} else
isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm12, Fixups);
@@ -657,6 +741,37 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
return Binary;
}
+/// getT2Imm8s4OpValue - Return encoding info for
+/// '+/- imm8<<2' operand.
+uint32_t ARMMCCodeEmitter::
+getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // FIXME: The immediate operand should have already been encoded like this
+ // before ever getting here. The encoder method should just need to combine
+ // the MI operands for the register and the offset into a single
+ // representation for the complex operand in the .td file. This isn't just
+ // style, unfortunately. As-is, we can't represent the distinct encoding
+ // for #-0.
+
+ // {8} = (U)nsigned (add == '1', sub == '0')
+ // {7-0} = imm8
+ int32_t Imm8 = MI.getOperand(OpIdx).getImm();
+ bool isAdd = Imm8 >= 0;
+
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (Imm8 < 0)
+ Imm8 = -Imm8;
+
+ // Scaled by 4.
+ Imm8 /= 4;
+
+ uint32_t Binary = Imm8 & 0xff;
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (isAdd)
+ Binary |= (1 << 8);
+ return Binary;
+}
+
/// getT2AddrModeImm8s4OpValue - Return encoding info for
/// 'reg +/- imm8<<2' operand.
uint32_t ARMMCCodeEmitter::
@@ -683,6 +798,12 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
} else
isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups);
+ // FIXME: The immediate operand should have already been encoded like this
+ // before ever getting here. The encoder method should just need to combine
+ // the MI operands for the register and the offset into a single
+ // representation for the complex operand in the .td file. This isn't just
+ // style, unfortunately. As-is, we can't represent the distinct encoding
+ // for #-0.
uint32_t Binary = (Imm8 >> 2) & 0xff;
// Immediate is always encoded as positive. The 'U' bit controls add vs sub.
if (isAdd)
@@ -691,6 +812,20 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
return Binary;
}
+/// getT2AddrModeImm0_1020s4OpValue - Return encoding info for
+/// 'reg + imm8<<2' operand.
+uint32_t ARMMCCodeEmitter::
+getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {11-8} = reg
+ // {7-0} = imm8
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ unsigned Reg = getARMRegisterNumbering(MO.getReg());
+ unsigned Imm8 = MO1.getImm();
+ return (Reg << 8) | Imm8;
+}
+
// FIXME: This routine assumes that a binary
// expression will always result in a PCRel expression
// In reality, its only true if one or more subexpressions
@@ -818,6 +953,17 @@ getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
}
uint32_t ARMMCCodeEmitter::
+getPostIdxRegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {4} isAdd
+ // {3-0} Rm
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+ bool isAdd = MO1.getImm() != 0;
+ return getARMRegisterNumbering(MO.getReg()) | (isAdd << 4);
+}
+
+uint32_t ARMMCCodeEmitter::
getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
// {9} 1 == imm8, 0 == Rm
@@ -891,7 +1037,10 @@ getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx,
uint32_t ARMMCCodeEmitter::
getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
- return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cp, Fixups);
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cp, Fixups);
+ return (MO.getImm() >> 2);
}
/// getAddrMode5OpValue - Return encoding info for 'reg +/- imm10' operand.
@@ -934,20 +1083,17 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
}
unsigned ARMMCCodeEmitter::
-getSORegOpValue(const MCInst &MI, unsigned OpIdx,
+getSORegRegOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
// Sub-operands are [reg, reg, imm]. The first register is Rm, the reg to be
- // shifted. The second is either Rs, the amount to shift by, or reg0 in which
- // case the imm contains the amount to shift by.
+ // shifted. The second is Rs, the amount to shift by, and the third specifies
+ // the type of the shift.
//
// {3-0} = Rm.
- // {4} = 1 if reg shift, 0 if imm shift
+ // {4} = 1
// {6-5} = type
- // If reg shift:
- // {11-8} = Rs
- // {7} = 0
- // else (imm shift)
- // {11-7} = imm
+ // {11-8} = Rs
+ // {7} = 0
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
@@ -966,45 +1112,70 @@ getSORegOpValue(const MCInst &MI, unsigned OpIdx,
// LSR - 0011
// ASR - 0101
// ROR - 0111
- // RRX - 0110 and bit[11:8] clear.
switch (SOpc) {
default: llvm_unreachable("Unknown shift opc!");
case ARM_AM::lsl: SBits = 0x1; break;
case ARM_AM::lsr: SBits = 0x3; break;
case ARM_AM::asr: SBits = 0x5; break;
case ARM_AM::ror: SBits = 0x7; break;
- case ARM_AM::rrx: SBits = 0x6; break;
- }
- } else {
- // Set shift operand (bit[6:4]).
- // LSL - 000
- // LSR - 010
- // ASR - 100
- // ROR - 110
- switch (SOpc) {
- default: llvm_unreachable("Unknown shift opc!");
- case ARM_AM::lsl: SBits = 0x0; break;
- case ARM_AM::lsr: SBits = 0x2; break;
- case ARM_AM::asr: SBits = 0x4; break;
- case ARM_AM::ror: SBits = 0x6; break;
}
}
Binary |= SBits << 4;
- if (SOpc == ARM_AM::rrx)
- return Binary;
- // Encode the shift operation Rs or shift_imm (except rrx).
- if (Rs) {
- // Encode Rs bit[11:8].
- assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
- return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift);
+ // Encode the shift operation Rs.
+ // Encode Rs bit[11:8].
+ assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
+ return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift);
+}
+
+unsigned ARMMCCodeEmitter::
+getSORegImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Sub-operands are [reg, imm]. The first register is Rm, the reg to be
+ // shifted. The second is the amount to shift by.
+ //
+ // {3-0} = Rm.
+ // {4} = 0
+ // {6-5} = type
+ // {11-7} = imm
+
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm());
+
+ // Encode Rm.
+ unsigned Binary = getARMRegisterNumbering(MO.getReg());
+
+ // Encode the shift opcode.
+ unsigned SBits = 0;
+
+ // Set shift operand (bit[6:4]).
+ // LSL - 000
+ // LSR - 010
+ // ASR - 100
+ // ROR - 110
+ // RRX - 110 and bit[11:8] clear.
+ switch (SOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x0; break;
+ case ARM_AM::lsr: SBits = 0x2; break;
+ case ARM_AM::asr: SBits = 0x4; break;
+ case ARM_AM::ror: SBits = 0x6; break;
+ case ARM_AM::rrx:
+ Binary |= 0x60;
+ return Binary;
}
// Encode shift_imm bit[11:7].
- return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7;
+ Binary |= SBits << 4;
+ unsigned Offset = ARM_AM::getSORegOffset(MO1.getImm());
+ assert(Offset && "Offset must be in range 1-32!");
+ if (Offset == 32) Offset = 0;
+ return Binary | (Offset << 7);
}
+
unsigned ARMMCCodeEmitter::
getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups) const {
@@ -1106,6 +1277,7 @@ getT2SORegOpValue(const MCInst &MI, unsigned OpIdx,
case ARM_AM::lsl: SBits = 0x0; break;
case ARM_AM::lsr: SBits = 0x2; break;
case ARM_AM::asr: SBits = 0x4; break;
+ case ARM_AM::rrx: // FALLTHROUGH
case ARM_AM::ror: SBits = 0x6; break;
}
@@ -1131,24 +1303,6 @@ getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op,
}
unsigned ARMMCCodeEmitter::
-getMsbOpValue(const MCInst &MI, unsigned Op,
- SmallVectorImpl<MCFixup> &Fixups) const {
- // MSB - 5 bits.
- uint32_t lsb = MI.getOperand(Op-1).getImm();
- uint32_t width = MI.getOperand(Op).getImm();
- uint32_t msb = lsb+width-1;
- assert (width != 0 && msb < 32 && "Illegal bit width!");
- return msb;
-}
-
-unsigned ARMMCCodeEmitter::
-getSsatBitPosValue(const MCInst &MI, unsigned Op,
- SmallVectorImpl<MCFixup> &Fixups) const {
- // For ssat instructions, the bit position should be encoded decremented by 1
- return MI.getOperand(Op).getImm()-1;
-}
-
-unsigned ARMMCCodeEmitter::
getRegisterListOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups) const {
// VLDM/VSTM:
@@ -1158,8 +1312,8 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op,
// LDM/STM:
// {15-0} = Bitfield of GPRs.
unsigned Reg = MI.getOperand(Op).getReg();
- bool SPRRegs = ARM::SPRRegClass.contains(Reg);
- bool DPRRegs = ARM::DPRRegClass.contains(Reg);
+ bool SPRRegs = llvm::ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg);
+ bool DPRRegs = llvm::ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg);
unsigned Binary = 0;
@@ -1299,7 +1453,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
Size = Desc.getSize();
else
llvm_unreachable("Unexpected instruction size!");
-
+
uint32_t Binary = getBinaryCodeForInstr(MI, Fixups);
// Thumb 32-bit wide instructions need to emit the high order halfword
// first.
diff --git a/lib/Target/ARM/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
index 2727ba8..2727ba8 100644
--- a/lib/Target/ARM/ARMMCExpr.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
diff --git a/lib/Target/ARM/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index 0a2e883..0a2e883 100644
--- a/lib/Target/ARM/ARMMCExpr.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index f8fcf2b..a55c410 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -13,10 +13,16 @@
#include "ARMMCTargetDesc.h"
#include "ARMMCAsmInfo.h"
+#include "ARMBaseInfo.h"
+#include "InstPrinter/ARMInstPrinter.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_REGINFO_MC_DESC
#include "ARMGenRegisterInfo.inc"
@@ -35,7 +41,7 @@ std::string ARM_MC::ParseARMTriple(StringRef TT) {
unsigned Len = TT.size();
unsigned Idx = 0;
- // FIXME: Enahnce Triple helper class to extract ARM version.
+ // FIXME: Enhance Triple helper class to extract ARM version.
bool isThumb = false;
if (Len >= 5 && TT.substr(0, 4) == "armv")
Idx = 4;
@@ -50,18 +56,21 @@ std::string ARM_MC::ParseARMTriple(StringRef TT) {
unsigned SubVer = TT[Idx];
if (SubVer >= '7' && SubVer <= '9') {
if (Len >= Idx+2 && TT[Idx+1] == 'm') {
- // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv
- ARMArchFeature = "+v7,+noarm,+db,+hwdiv";
+ // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass
+ ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass";
} else if (Len >= Idx+3 && TT[Idx+1] == 'e'&& TT[Idx+2] == 'm') {
// v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2,
- // FeatureT2XtPk
- ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk";
+ // FeatureT2XtPk, FeatureMClass
+ ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk,+mclass";
} else
- // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2
- ARMArchFeature = "+v7,+neon,+db,+t2dsp";
+ // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk
+ ARMArchFeature = "+v7,+neon,+db,+t2dsp,+t2xtpk";
} else if (SubVer == '6') {
if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2')
ARMArchFeature = "+v6t2";
+ else if (Len >= Idx+2 && TT[Idx+1] == 'm')
+ // v6m: FeatureNoARM, FeatureMClass
+ ARMArchFeature = "+v6t2,+noarm,+mclass";
else
ARMArchFeature = "+v6";
} else if (SubVer == '5') {
@@ -80,6 +89,14 @@ std::string ARM_MC::ParseARMTriple(StringRef TT) {
ARMArchFeature += ",+thumb-mode";
}
+ Triple TheTriple(TT);
+ if (TheTriple.getOS() == Triple::NativeClient) {
+ if (ARMArchFeature.empty())
+ ARMArchFeature = "+nacl-mode";
+ else
+ ARMArchFeature += ",+nacl-mode";
+ }
+
return ARMArchFeature;
}
@@ -98,36 +115,18 @@ MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(StringRef TT, StringRef CPU,
return X;
}
-// Force static initialization.
-extern "C" void LLVMInitializeARMMCSubtargetInfo() {
- TargetRegistry::RegisterMCSubtargetInfo(TheARMTarget,
- ARM_MC::createARMMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheThumbTarget,
- ARM_MC::createARMMCSubtargetInfo);
-}
-
static MCInstrInfo *createARMMCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitARMMCInstrInfo(X);
return X;
}
-extern "C" void LLVMInitializeARMMCInstrInfo() {
- TargetRegistry::RegisterMCInstrInfo(TheARMTarget, createARMMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheThumbTarget, createARMMCInstrInfo);
-}
-
-static MCRegisterInfo *createARMMCRegisterInfo() {
+static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) {
MCRegisterInfo *X = new MCRegisterInfo();
- InitARMMCRegisterInfo(X);
+ InitARMMCRegisterInfo(X, ARM::LR);
return X;
}
-extern "C" void LLVMInitializeARMMCRegInfo() {
- TargetRegistry::RegisterMCRegInfo(TheARMTarget, createARMMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheThumbTarget, createARMMCRegisterInfo);
-}
-
static MCAsmInfo *createARMMCAsmInfo(const Target &T, StringRef TT) {
Triple TheTriple(TT);
@@ -137,8 +136,128 @@ static MCAsmInfo *createARMMCAsmInfo(const Target &T, StringRef TT) {
return new ARMELFMCAsmInfo();
}
-extern "C" void LLVMInitializeARMMCAsmInfo() {
- // Register the target asm info.
+static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ if (RM == Reloc::Default) {
+ Triple TheTriple(TT);
+ // Default relocation model on Darwin is PIC, not DynamicNoPIC.
+ RM = TheTriple.isOSDarwin() ? Reloc::PIC_ : Reloc::DynamicNoPIC;
+ }
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
+}
+
+// This is duplicated code. Refactor this.
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin())
+ return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
+
+ if (TheTriple.isOSWindows()) {
+ llvm_unreachable("ARM does not support Windows COFF format");
+ return NULL;
+ }
+
+ return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+}
+
+static MCInstPrinter *createARMMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCSubtargetInfo &STI) {
+ if (SyntaxVariant == 0)
+ return new ARMInstPrinter(MAI, STI);
+ return 0;
+}
+
+namespace {
+
+class ARMMCInstrAnalysis : public MCInstrAnalysis {
+public:
+ ARMMCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {}
+
+ virtual bool isUnconditionalBranch(const MCInst &Inst) const {
+ // BCCs with the "always" predicate are unconditional branches.
+ if (Inst.getOpcode() == ARM::Bcc && Inst.getOperand(1).getImm()==ARMCC::AL)
+ return true;
+ return MCInstrAnalysis::isUnconditionalBranch(Inst);
+ }
+
+ virtual bool isConditionalBranch(const MCInst &Inst) const {
+ // BCCs with the "always" predicate are unconditional branches.
+ if (Inst.getOpcode() == ARM::Bcc && Inst.getOperand(1).getImm()==ARMCC::AL)
+ return false;
+ return MCInstrAnalysis::isConditionalBranch(Inst);
+ }
+
+ uint64_t evaluateBranch(const MCInst &Inst, uint64_t Addr,
+ uint64_t Size) const {
+ // We only handle PCRel branches for now.
+ if (Info->get(Inst.getOpcode()).OpInfo[0].OperandType!=MCOI::OPERAND_PCREL)
+ return -1ULL;
+
+ int64_t Imm = Inst.getOperand(0).getImm();
+ // FIXME: This is not right for thumb.
+ return Addr+Imm+8; // In ARM mode the PC is always off by 8 bytes.
+ }
+};
+
+}
+
+static MCInstrAnalysis *createARMMCInstrAnalysis(const MCInstrInfo *Info) {
+ return new ARMMCInstrAnalysis(Info);
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeARMTargetMC() {
+ // Register the MC asm info.
RegisterMCAsmInfoFn A(TheARMTarget, createARMMCAsmInfo);
RegisterMCAsmInfoFn B(TheThumbTarget, createARMMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheARMTarget, createARMMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheThumbTarget, createARMMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheARMTarget, createARMMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheThumbTarget, createARMMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheARMTarget, createARMMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheThumbTarget, createARMMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheARMTarget,
+ ARM_MC::createARMMCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(TheThumbTarget,
+ ARM_MC::createARMMCSubtargetInfo);
+
+ // Register the MC instruction analyzer.
+ TargetRegistry::RegisterMCInstrAnalysis(TheARMTarget,
+ createARMMCInstrAnalysis);
+ TargetRegistry::RegisterMCInstrAnalysis(TheThumbTarget,
+ createARMMCInstrAnalysis);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterMCCodeEmitter(TheARMTarget, createARMMCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(TheThumbTarget, createARMMCCodeEmitter);
+
+ // Register the asm backend.
+ TargetRegistry::RegisterMCAsmBackend(TheARMTarget, createARMAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(TheThumbTarget, createARMAsmBackend);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterMCObjectStreamer(TheARMTarget, createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(TheThumbTarget, createMCStreamer);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(TheARMTarget, createARMMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(TheThumbTarget, createARMMCInstPrinter);
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 74701e3..9b3d3bd 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -14,12 +14,19 @@
#ifndef ARMMCTARGETDESC_H
#define ARMMCTARGETDESC_H
+#include "llvm/Support/DataTypes.h"
#include <string>
namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
class MCSubtargetInfo;
-class Target;
class StringRef;
+class Target;
+class raw_ostream;
extern Target TheARMTarget, TheThumbTarget;
@@ -33,6 +40,18 @@ namespace ARM_MC {
StringRef FS);
}
+MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCAsmBackend *createARMAsmBackend(const Target &T, StringRef TT);
+
+/// createARMMachObjectWriter - Construct an ARM Mach-O object writer.
+MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype);
+
} // End llvm namespace
// Defines symbolic names for ARM registers. This defines a mapping from
diff --git a/lib/Target/ARM/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index a36e47d..352c73e 100644
--- a/lib/Target/ARM/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#include "ARM.h"
-#include "ARMFixupKinds.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMFixupKinds.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCAsmLayout.h"
@@ -19,7 +19,6 @@
#include "llvm/MC/MCValue.h"
#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetAsmBackend.h"
using namespace llvm;
using namespace llvm::object;
diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
index 68daf42..adc37cb 100644
--- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
@@ -1,7 +1,19 @@
add_llvm_library(LLVMARMDesc
- ARMMCTargetDesc.cpp
+ ARMAsmBackend.cpp
ARMMCAsmInfo.cpp
+ ARMMCCodeEmitter.cpp
+ ARMMCExpr.cpp
+ ARMMCTargetDesc.cpp
+ ARMMachObjectWriter.cpp
)
+add_dependencies(LLVMARMDesc ARMCommonTableGen)
# Hack: we need to include 'main' target directory to grab private headers
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
+
+add_llvm_library_dependencies(LLVMARMDesc
+ LLVMARMInfo
+ LLVMARMAsmPrinter
+ LLVMMC
+ LLVMSupport
+ )
diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile
index eb8c603..3e48ed1 100644
--- a/lib/Target/ARM/Makefile
+++ b/lib/Target/ARM/Makefile
@@ -16,9 +16,8 @@ BUILT_SOURCES = ARMGenRegisterInfo.inc ARMGenInstrInfo.inc \
ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \
ARMGenDAGISel.inc ARMGenSubtargetInfo.inc \
ARMGenCodeEmitter.inc ARMGenCallingConv.inc \
- ARMGenDecoderTables.inc ARMGenEDInfo.inc \
- ARMGenFastISel.inc ARMGenMCCodeEmitter.inc \
- ARMGenMCPseudoLowering.inc
+ ARMGenEDInfo.inc ARMGenFastISel.inc ARMGenMCCodeEmitter.inc \
+ ARMGenMCPseudoLowering.inc ARMGenDisassemblerTables.inc
DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc
diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp
deleted file mode 100644
index c85d1e9..0000000
--- a/lib/Target/ARM/NEONMoveFix.cpp
+++ /dev/null
@@ -1,149 +0,0 @@
-//===-- NEONMoveFix.cpp - Convert vfp reg-reg moves into neon ---*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "neon-mov-fix"
-#include "ARM.h"
-#include "ARMMachineFunctionInfo.h"
-#include "ARMInstrInfo.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-STATISTIC(NumVMovs, "Number of reg-reg moves converted");
-
-namespace {
- struct NEONMoveFixPass : public MachineFunctionPass {
- static char ID;
- NEONMoveFixPass() : MachineFunctionPass(ID) {}
-
- virtual bool runOnMachineFunction(MachineFunction &Fn);
-
- virtual const char *getPassName() const {
- return "NEON reg-reg move conversion";
- }
-
- private:
- const TargetRegisterInfo *TRI;
- const ARMBaseInstrInfo *TII;
- bool isA8;
-
- typedef DenseMap<unsigned, const MachineInstr*> RegMap;
-
- bool InsertMoves(MachineBasicBlock &MBB);
- };
- char NEONMoveFixPass::ID = 0;
-}
-
-static bool inNEONDomain(unsigned Domain, bool isA8) {
- return (Domain & ARMII::DomainNEON) ||
- (isA8 && (Domain & ARMII::DomainNEONA8));
-}
-
-bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
- RegMap Defs;
- bool Modified = false;
-
- // Walk over MBB tracking the def points of the registers.
- MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
- MachineBasicBlock::iterator NextMII;
- for (; MII != E; MII = NextMII) {
- NextMII = llvm::next(MII);
- MachineInstr *MI = &*MII;
-
- if (MI->getOpcode() == ARM::VMOVD &&
- !TII->isPredicated(MI)) {
- unsigned SrcReg = MI->getOperand(1).getReg();
- // If we do not find an instruction defining the reg, this means the
- // register should be live-in for this BB. It's always to better to use
- // NEON reg-reg moves.
- unsigned Domain = ARMII::DomainNEON;
- RegMap::iterator DefMI = Defs.find(SrcReg);
- if (DefMI != Defs.end()) {
- Domain = DefMI->second->getDesc().TSFlags & ARMII::DomainMask;
- // Instructions in general domain are subreg accesses.
- // Map them to NEON reg-reg moves.
- if (Domain == ARMII::DomainGeneral)
- Domain = ARMII::DomainNEON;
- }
-
- if (inNEONDomain(Domain, isA8)) {
- // Convert VMOVD to VORRd
- unsigned DestReg = MI->getOperand(0).getReg();
-
- DEBUG({errs() << "vmov convert: "; MI->dump();});
-
- // It's safe to ignore imp-defs / imp-uses here, since:
- // - We're running late, no intelligent condegen passes should be run
- // afterwards
- // - The imp-defs / imp-uses are superregs only, we don't care about
- // them.
- AddDefaultPred(BuildMI(MBB, *MI, MI->getDebugLoc(),
- TII->get(ARM::VORRd), DestReg)
- .addReg(SrcReg).addReg(SrcReg));
- MBB.erase(MI);
- MachineBasicBlock::iterator I = prior(NextMII);
- MI = &*I;
-
- DEBUG({errs() << " into: "; MI->dump();});
-
- Modified = true;
- ++NumVMovs;
- } else {
- assert((Domain & ARMII::DomainVFP) && "Invalid domain!");
- // Do nothing.
- }
- }
-
- // Update def information.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand& MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isDef())
- continue;
- unsigned MOReg = MO.getReg();
-
- Defs[MOReg] = MI;
- // Catch aliases as well.
- for (const unsigned *R = TRI->getAliasSet(MOReg); *R; ++R)
- Defs[*R] = MI;
- }
- }
-
- return Modified;
-}
-
-bool NEONMoveFixPass::runOnMachineFunction(MachineFunction &Fn) {
- ARMFunctionInfo *AFI = Fn.getInfo<ARMFunctionInfo>();
- const TargetMachine &TM = Fn.getTarget();
-
- if (AFI->isThumb1OnlyFunction())
- return false;
-
- TRI = TM.getRegisterInfo();
- TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
- isA8 = TM.getSubtarget<ARMSubtarget>().isCortexA8();
-
- bool Modified = false;
- for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
- ++MFI) {
- MachineBasicBlock &MBB = *MFI;
- Modified |= InsertMoves(MBB);
- }
-
- return Modified;
-}
-
-/// createNEONMoveFixPass - Returns an instance of the NEON reg-reg moves fix
-/// pass.
-FunctionPass *llvm::createNEONMoveFixPass() {
- return new NEONMoveFixPass();
-}
diff --git a/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
index 163a0a9..500e3de 100644
--- a/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
+++ b/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
@@ -9,7 +9,7 @@
#include "ARM.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
Target llvm::TheARMTarget, llvm::TheThumbTarget;
diff --git a/lib/Target/ARM/TargetInfo/CMakeLists.txt b/lib/Target/ARM/TargetInfo/CMakeLists.txt
index 3910bb0..8b38b13 100644
--- a/lib/Target/ARM/TargetInfo/CMakeLists.txt
+++ b/lib/Target/ARM/TargetInfo/CMakeLists.txt
@@ -4,4 +4,10 @@ add_llvm_library(LLVMARMInfo
ARMTargetInfo.cpp
)
-add_dependencies(LLVMARMInfo ARMCodeGenTable_gen)
+add_dependencies(LLVMARMInfo ARMCommonTableGen)
+
+add_llvm_library_dependencies(LLVMARMInfo
+ LLVMMC
+ LLVMSupport
+ LLVMTarget
+ )
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index c258870..d848177 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -21,7 +21,7 @@
using namespace llvm;
-bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{
const MachineFrameInfo *FFI = MF.getFrameInfo();
unsigned CFSize = FFI->getMaxCallFrameSize();
// It's not always a good idea to include the call frame as part of the
@@ -133,9 +133,9 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
// Adjust FP so it point to the stack slot that contains the previous FP.
if (hasFP(MF)) {
- BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
.addFrameIndex(FramePtrSpillFI).addImm(0)
- .setMIFlags(MachineInstr::FrameSetup);
+ .setMIFlags(MachineInstr::FrameSetup));
if (NumBytes > 508)
// If offset is > 508 then sp cannot be adjusted in a single instruction,
// try restoring from fp instead.
@@ -155,6 +155,11 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+ // Thumb1 does not currently support dynamic stack realignment. Report a
+ // fatal error rather then silently generate bad code.
+ if (RegInfo->needsStackRealignment(MF))
+ report_fatal_error("Dynamic stack realignment not supported for thumb1.");
+
// If we need a base pointer, set it up here. It's whatever the value
// of the stack pointer is at this point. Any variable size objects
// will be allocated after this, so we can still use the base pointer
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 4eb0b6c..e8ed482 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -13,12 +13,12 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
-#include "ARMAddressingModes.h"
#include "ARMBaseInstrInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
#include "Thumb1InstrInfo.h"
#include "Thumb1RegisterInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
@@ -27,7 +27,6 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
@@ -182,7 +181,6 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
int Opc = 0;
int ExtraOpc = 0;
bool NeedCC = false;
- bool NeedPred = false;
if (DestReg == BaseReg && BaseReg == ARM::SP) {
assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
@@ -217,7 +215,7 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
} else {
Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
NumBits = 8;
- NeedPred = NeedCC = true;
+ NeedCC = true;
}
isTwoAddr = true;
}
@@ -241,7 +239,8 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
Bytes -= ThisVal;
const MCInstrDesc &MCID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3);
const MachineInstrBuilder MIB =
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg).setMIFlags(MIFlags));
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg)
+ .setMIFlags(MIFlags));
AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal));
} else {
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
@@ -262,18 +261,15 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
if (NeedCC)
MIB = AddDefaultT1CC(MIB);
MIB.addReg(DestReg).addImm(ThisVal);
- if (NeedPred)
- MIB = AddDefaultPred(MIB);
+ MIB = AddDefaultPred(MIB);
MIB.setMIFlags(MIFlags);
- }
- else {
+ } else {
bool isKill = BaseReg != ARM::SP;
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
if (NeedCC)
MIB = AddDefaultT1CC(MIB);
MIB.addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal);
- if (NeedPred)
- MIB = AddDefaultPred(MIB);
+ MIB = AddDefaultPred(MIB);
MIB.setMIFlags(MIFlags);
BaseReg = DestReg;
@@ -285,7 +281,7 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
Scale = 1;
Chunk = ((1 << NumBits) - 1) * Scale;
Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
- NeedPred = NeedCC = isTwoAddr = true;
+ NeedCC = isTwoAddr = true;
}
}
}
@@ -405,7 +401,6 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
unsigned Scale = 1;
if (FrameReg != ARM::SP) {
Opcode = ARM::tADDi3;
- MI.setDesc(TII.get(Opcode));
NumBits = 3;
} else {
NumBits = 8;
@@ -419,10 +414,9 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
// Turn it into a move.
MI.setDesc(TII.get(ARM::tMOVr));
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
- // Remove offset and add predicate operands.
+ // Remove offset
MI.RemoveOperand(FrameRegIdx+1);
MachineInstrBuilder MIB(&MI);
- AddDefaultPred(MIB);
return true;
}
@@ -431,6 +425,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
if (((Offset / Scale) & ~Mask) == 0) {
// Replace the FrameIndex with sp / fp
if (Opcode == ARM::tADDi3) {
+ MI.setDesc(TII.get(Opcode));
removeOperands(MI, FrameRegIdx);
MachineInstrBuilder MIB(&MI);
AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg)
@@ -459,6 +454,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
// r0 = add sp, 255*4
// r0 = add r0, (imm - 255*4)
if (Opcode == ARM::tADDi3) {
+ MI.setDesc(TII.get(Opcode));
removeOperands(MI, FrameRegIdx);
MachineInstrBuilder MIB(&MI);
AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask));
@@ -479,10 +475,6 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
MI.setDesc(TII.get(ARM::tADDhirr));
MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false, false, true);
MI.getOperand(FrameRegIdx+1).ChangeToRegister(FrameReg, false);
- if (Opcode == ARM::tADDi3) {
- MachineInstrBuilder MIB(&MI);
- AddDefaultPred(MIB);
- }
}
return true;
} else {
@@ -545,9 +537,9 @@ Thumb1RegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
++i;
assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
}
- bool Done = false;
- Done = rewriteFrameIndex(MI, i, BaseReg, Off, TII);
+ bool Done = rewriteFrameIndex(MI, i, BaseReg, Off, TII);
assert (Done && "Unable to resolve frame index!");
+ (void)Done;
}
/// saveScavengerRegister - Spill the register so it can be used by the
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index 360ec00..b627400 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -124,6 +124,27 @@ Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
if (Uses.count(DstReg) || Defs.count(SrcReg))
return false;
+ // If the CPSR is defined by this copy, then we don't want to move it. E.g.,
+ // if we have:
+ //
+ // movs r1, r1
+ // rsb r1, 0
+ // movs r2, r2
+ // rsb r2, 0
+ //
+ // we don't want this to be converted to:
+ //
+ // movs r1, r1
+ // movs r2, r2
+ // itt mi
+ // rsb r1, 0
+ // rsb r2, 0
+ //
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.hasOptionalDef() &&
+ MI->getOperand(MCID.getNumOperands() - 1).getReg() == ARM::CPSR)
+ return false;
+
// Then peek at the next instruction to see if it's predicated on CC or OCC.
// If not, then there is nothing to be gained by moving the copy.
MachineBasicBlock::iterator I = MI; ++I;
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 51b56aa..cf040c82 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -14,9 +14,9 @@
#include "Thumb2InstrInfo.h"
#include "ARM.h"
#include "ARMConstantPoolValue.h"
-#include "ARMAddressingModes.h"
#include "ARMMachineFunctionInfo.h"
#include "Thumb2InstrInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -122,7 +122,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
- RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) {
+ RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass ||
+ RC == ARM::GPRnopcRegisterClass) {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -149,7 +150,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
- RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) {
+ RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass ||
+ RC == ARM::GPRnopcRegisterClass) {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -233,9 +235,8 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
if (DestReg == ARM::SP && (ThisVal < ((1 << 7)-1) * 4)) {
assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?");
Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
- // FIXME: Fix Thumb1 immediate encoding.
- BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
- .addReg(BaseReg).addImm(ThisVal/4).setMIFlags(MIFlags);
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg).addImm(ThisVal/4).setMIFlags(MIFlags));
NumBytes = 0;
continue;
}
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index c741a6e..89a155c 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -9,11 +9,11 @@
#define DEBUG_TYPE "t2-reduce-size"
#include "ARM.h"
-#include "ARMAddressingModes.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMBaseInstrInfo.h"
#include "ARMSubtarget.h"
#include "Thumb2InstrInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -97,11 +97,11 @@ namespace {
{ ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0 },
{ ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0 },
{ ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0 },
- { ARM::t2SXTBr, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,0 },
- { ARM::t2SXTHr, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,0 },
+ { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1 },
+ { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1 },
{ ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0 },
- { ARM::t2UXTBr, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,0 },
- { ARM::t2UXTHr, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,0 },
+ { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1 },
+ { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1 },
// FIXME: Clean this up after splitting each Thumb load / store opcode
// into multiple ones.
@@ -507,6 +507,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
.addImm(Imm / 4); // The tADDrSPi has an implied scale by four.
+ AddDefaultPred(MIB);
// Transfer MI flags.
MIB.setMIFlags(MI->getFlags());
@@ -546,6 +547,10 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
}
case ARM::t2RSBri:
case ARM::t2RSBSri:
+ case ARM::t2SXTB:
+ case ARM::t2SXTH:
+ case ARM::t2UXTB:
+ case ARM::t2UXTH:
if (MI->getOperand(2).getImm() == 0)
return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
break;
@@ -742,7 +747,11 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
continue;
if ((MCID.getOpcode() == ARM::t2RSBSri ||
- MCID.getOpcode() == ARM::t2RSBri) && i == 2)
+ MCID.getOpcode() == ARM::t2RSBri ||
+ MCID.getOpcode() == ARM::t2SXTB ||
+ MCID.getOpcode() == ARM::t2SXTH ||
+ MCID.getOpcode() == ARM::t2UXTB ||
+ MCID.getOpcode() == ARM::t2UXTH) && i == 2)
// Skip the zero immediate operand, it's now implicit.
continue;
bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate());
diff --git a/lib/Target/Alpha/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AlphaAsmPrinter.cpp
index 46ae286..5dce06a 100644
--- a/lib/Target/Alpha/AlphaAsmPrinter.cpp
+++ b/lib/Target/Alpha/AlphaAsmPrinter.cpp
@@ -26,8 +26,8 @@
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
index 7b91fea..f877c65 100644
--- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
+++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
@@ -80,7 +80,7 @@ namespace {
// Otherwise we don't know that the it's okay to zapnot this entire
// byte. Only do this iff we can prove that the missing bits are
// already null, so the bytezap doesn't need to really null them.
- BitsToCheck |= ~Constant & (0xFF << 8*i);
+ BitsToCheck |= ~Constant & (0xFFULL << 8*i);
}
}
}
@@ -114,9 +114,8 @@ namespace {
if (!x) return 0;
unsigned at = CountLeadingZeros_64(x);
uint64_t complow = 1ULL << (63 - at);
- uint64_t comphigh = 1ULL << (64 - at);
- //cerr << x << ":" << complow << ":" << comphigh << "\n";
- if (abs64(complow - x) <= abs64(comphigh - x))
+ uint64_t comphigh = complow << 1;
+ if (x - complow <= comphigh - x)
return complow;
else
return comphigh;
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
index de003fb..3057eb8 100644
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -49,6 +49,7 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM)
// Set up the TargetLowering object.
//I am having problems with shr n i8 1
setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
addRegisterClass(MVT::i64, Alpha::GPRCRegisterClass);
addRegisterClass(MVT::f64, Alpha::F8RCRegisterClass);
@@ -153,6 +154,9 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM)
setOperationAction(ISD::JumpTable, MVT::i64, Custom);
setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
+
setStackPointerRegisterToSaveRestore(Alpha::R30);
setJumpBufSize(272);
@@ -160,10 +164,12 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM)
setMinFunctionAlignment(4);
+ setInsertFencesForAtomic(true);
+
computeRegisterProperties();
}
-MVT::SimpleValueType AlphaTargetLowering::getSetCCResultType(EVT VT) const {
+EVT AlphaTargetLowering::getSetCCResultType(EVT VT) const {
return MVT::i64;
}
diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h
index 13383f4..80f8efa 100644
--- a/lib/Target/Alpha/AlphaISelLowering.h
+++ b/lib/Target/Alpha/AlphaISelLowering.h
@@ -66,7 +66,7 @@ namespace llvm {
virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; }
/// getSetCCResultType - Get the SETCC result ValueType
- virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+ virtual EVT getSetCCResultType(EVT VT) const;
/// LowerOperation - Provide custom lowering hooks for some operations.
///
diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp
index 4dcec8f..8df2ed7 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.cpp
+++ b/lib/Target/Alpha/AlphaInstrInfo.cpp
@@ -16,7 +16,6 @@
#include "AlphaMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td
index b201712..c8c9377 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.td
+++ b/lib/Target/Alpha/AlphaInstrInfo.td
@@ -607,6 +607,8 @@ def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 1), (i64 imm)),
def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 imm), (i64 imm)),
(MB)>;
+def : Pat<(atomic_fence (imm), (imm)), (MB)>;
+
//Basic Floating point ops
//Floats
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp
index df8f157..8b6230f 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.cpp
+++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp
@@ -21,7 +21,6 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -40,8 +39,7 @@
using namespace llvm;
AlphaRegisterInfo::AlphaRegisterInfo(const TargetInstrInfo &tii)
- : AlphaGenRegisterInfo(),
- TII(tii) {
+ : AlphaGenRegisterInfo(Alpha::R26), TII(tii) {
}
static long getUpper16(long l) {
@@ -178,10 +176,6 @@ AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
}
-unsigned AlphaRegisterInfo::getRARegister() const {
- return Alpha::R26;
-}
-
unsigned AlphaRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
@@ -198,16 +192,6 @@ unsigned AlphaRegisterInfo::getEHHandlerRegister() const {
return 0;
}
-int AlphaRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
- llvm_unreachable("What is the dwarf register number");
- return -1;
-}
-
-int AlphaRegisterInfo::getLLVMRegNum(unsigned DwarfRegNum, bool isEH) const {
- llvm_unreachable("What is the dwarf register number");
- return -1;
-}
-
std::string AlphaRegisterInfo::getPrettyName(unsigned reg)
{
std::string s(AlphaRegDesc[reg].Name);
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h
index 1072bf7..e35be27 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.h
+++ b/lib/Target/Alpha/AlphaRegisterInfo.h
@@ -42,16 +42,12 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo {
int SPAdj, RegScavenger *RS = NULL) const;
// Debug information queries.
- unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
// Exception handling queries.
unsigned getEHExceptionRegister() const;
unsigned getEHHandlerRegister() const;
- int getDwarfRegNum(unsigned RegNum, bool isEH) const;
- int getLLVMRegNum(unsigned RegNum, bool isEH) const;
-
static std::string getPrettyName(unsigned reg);
};
diff --git a/lib/Target/Alpha/AlphaSubtarget.cpp b/lib/Target/Alpha/AlphaSubtarget.cpp
index 624a5e2..bd55ce9 100644
--- a/lib/Target/Alpha/AlphaSubtarget.cpp
+++ b/lib/Target/Alpha/AlphaSubtarget.cpp
@@ -13,7 +13,6 @@
#include "AlphaSubtarget.h"
#include "Alpha.h"
-#include "llvm/Target/TargetRegistry.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp
index 3b65d41..fc9a677 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.cpp
+++ b/lib/Target/Alpha/AlphaTargetMachine.cpp
@@ -14,7 +14,7 @@
#include "AlphaTargetMachine.h"
#include "llvm/PassManager.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
extern "C" void LLVMInitializeAlphaTarget() {
@@ -22,19 +22,17 @@ extern "C" void LLVMInitializeAlphaTarget() {
RegisterTargetMachine<AlphaTargetMachine> X(TheAlphaTarget);
}
-AlphaTargetMachine::AlphaTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : LLVMTargetMachine(T, TT, CPU, FS),
+AlphaTargetMachine::AlphaTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
DataLayout("e-f128:128:128-n64"),
FrameLowering(Subtarget),
Subtarget(TT, CPU, FS),
TLInfo(*this),
TSInfo(*this) {
- setRelocationModel(Reloc::PIC_);
}
-
//===----------------------------------------------------------------------===//
// Pass Pipeline Configuration
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h
index cf00e58..48bb948 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.h
+++ b/lib/Target/Alpha/AlphaTargetMachine.h
@@ -36,8 +36,9 @@ class AlphaTargetMachine : public LLVMTargetMachine {
AlphaSelectionDAGInfo TSInfo;
public:
- AlphaTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ AlphaTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
virtual const AlphaInstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const TargetFrameLowering *getFrameLowering() const {
diff --git a/lib/Target/Alpha/CMakeLists.txt b/lib/Target/Alpha/CMakeLists.txt
index a6027bb..a6d5516 100644
--- a/lib/Target/Alpha/CMakeLists.txt
+++ b/lib/Target/Alpha/CMakeLists.txt
@@ -1,11 +1,12 @@
set(LLVM_TARGET_DEFINITIONS Alpha.td)
-tablegen(AlphaGenRegisterInfo.inc -gen-register-info)
-tablegen(AlphaGenInstrInfo.inc -gen-instr-info)
-tablegen(AlphaGenAsmWriter.inc -gen-asm-writer)
-tablegen(AlphaGenDAGISel.inc -gen-dag-isel)
-tablegen(AlphaGenCallingConv.inc -gen-callingconv)
-tablegen(AlphaGenSubtargetInfo.inc -gen-subtarget)
+llvm_tablegen(AlphaGenRegisterInfo.inc -gen-register-info)
+llvm_tablegen(AlphaGenInstrInfo.inc -gen-instr-info)
+llvm_tablegen(AlphaGenAsmWriter.inc -gen-asm-writer)
+llvm_tablegen(AlphaGenDAGISel.inc -gen-dag-isel)
+llvm_tablegen(AlphaGenCallingConv.inc -gen-callingconv)
+llvm_tablegen(AlphaGenSubtargetInfo.inc -gen-subtarget)
+add_public_tablegen_target(AlphaCommonTableGen)
add_llvm_target(AlphaCodeGen
AlphaAsmPrinter.cpp
@@ -21,5 +22,17 @@ add_llvm_target(AlphaCodeGen
AlphaSelectionDAGInfo.cpp
)
+add_llvm_library_dependencies(LLVMAlphaCodeGen
+ LLVMAlphaDesc
+ LLVMAlphaInfo
+ LLVMAsmPrinter
+ LLVMCodeGen
+ LLVMCore
+ LLVMMC
+ LLVMSelectionDAG
+ LLVMSupport
+ LLVMTarget
+ )
+
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp b/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp
index 562052b..4ad021c 100644
--- a/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp
+++ b/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp
@@ -13,10 +13,11 @@
#include "AlphaMCTargetDesc.h"
#include "AlphaMCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "AlphaGenInstrInfo.inc"
@@ -36,8 +37,10 @@ static MCInstrInfo *createAlphaMCInstrInfo() {
return X;
}
-extern "C" void LLVMInitializeAlphaMCInstrInfo() {
- TargetRegistry::RegisterMCInstrInfo(TheAlphaTarget, createAlphaMCInstrInfo);
+static MCRegisterInfo *createAlphaMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitAlphaMCRegisterInfo(X, Alpha::R26);
+ return X;
}
static MCSubtargetInfo *createAlphaMCSubtargetInfo(StringRef TT, StringRef CPU,
@@ -47,11 +50,29 @@ static MCSubtargetInfo *createAlphaMCSubtargetInfo(StringRef TT, StringRef CPU,
return X;
}
-extern "C" void LLVMInitializeAlphaMCSubtargetInfo() {
- TargetRegistry::RegisterMCSubtargetInfo(TheAlphaTarget,
- createAlphaMCSubtargetInfo);
+static MCCodeGenInfo *createAlphaMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(Reloc::PIC_, CM);
+ return X;
}
-extern "C" void LLVMInitializeAlphaMCAsmInfo() {
+// Force static initialization.
+extern "C" void LLVMInitializeAlphaTargetMC() {
+ // Register the MC asm info.
RegisterMCAsmInfo<AlphaMCAsmInfo> X(TheAlphaTarget);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheAlphaTarget,
+ createAlphaMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheAlphaTarget, createAlphaMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheAlphaTarget, createAlphaMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheAlphaTarget,
+ createAlphaMCSubtargetInfo);
}
diff --git a/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt b/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt
index ad0dd26..f745ecb 100644
--- a/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt
@@ -2,3 +2,10 @@ add_llvm_library(LLVMAlphaDesc
AlphaMCTargetDesc.cpp
AlphaMCAsmInfo.cpp
)
+
+add_llvm_library_dependencies(LLVMAlphaDesc
+ LLVMAlphaInfo
+ LLVMMC
+ )
+
+add_dependencies(LLVMAlphaDesc AlphaCommonTableGen)
diff --git a/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp b/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp
index f7099b9..bdc69e7 100644
--- a/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp
+++ b/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp
@@ -9,7 +9,7 @@
#include "Alpha.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
llvm::Target llvm::TheAlphaTarget;
diff --git a/lib/Target/Alpha/TargetInfo/CMakeLists.txt b/lib/Target/Alpha/TargetInfo/CMakeLists.txt
index 2a7291b..cac3178 100644
--- a/lib/Target/Alpha/TargetInfo/CMakeLists.txt
+++ b/lib/Target/Alpha/TargetInfo/CMakeLists.txt
@@ -4,4 +4,10 @@ add_llvm_library(LLVMAlphaInfo
AlphaTargetInfo.cpp
)
-add_dependencies(LLVMAlphaInfo AlphaCodeGenTable_gen)
+add_llvm_library_dependencies(LLVMAlphaInfo
+ LLVMMC
+ LLVMSupport
+ LLVMTarget
+ )
+
+add_dependencies(LLVMAlphaInfo AlphaCommonTableGen)
diff --git a/lib/Target/Blackfin/BlackfinAsmPrinter.cpp b/lib/Target/Blackfin/BlackfinAsmPrinter.cpp
index 6ba258b..ed9844e 100644
--- a/lib/Target/Blackfin/BlackfinAsmPrinter.cpp
+++ b/lib/Target/Blackfin/BlackfinAsmPrinter.cpp
@@ -29,9 +29,9 @@
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Target/Blackfin/BlackfinFrameLowering.h b/lib/Target/Blackfin/BlackfinFrameLowering.h
index 726fa2c..169aa8e 100644
--- a/lib/Target/Blackfin/BlackfinFrameLowering.h
+++ b/lib/Target/Blackfin/BlackfinFrameLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ALPHA_FRAMEINFO_H
-#define ALPHA_FRAMEINFO_H
+#ifndef BLACKFIN_FRAMEINFO_H
+#define BLACKFIN_FRAMEINFO_H
#include "Blackfin.h"
#include "BlackfinSubtarget.h"
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp
index d572832..7d4c45f 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.cpp
+++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp
@@ -42,6 +42,7 @@ using namespace llvm;
BlackfinTargetLowering::BlackfinTargetLowering(TargetMachine &TM)
: TargetLowering(TM, new TargetLoweringObjectFileELF()) {
setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
setStackPointerRegisterToSaveRestore(BF::SP);
setIntDivIsCheap(false);
@@ -99,6 +100,7 @@ BlackfinTargetLowering::BlackfinTargetLowering(TargetMachine &TM)
// Blackfin has no intrinsics for these particular operations.
setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
@@ -134,7 +136,7 @@ const char *BlackfinTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
}
-MVT::SimpleValueType BlackfinTargetLowering::getSetCCResultType(EVT VT) const {
+EVT BlackfinTargetLowering::getSetCCResultType(EVT VT) const {
// SETCC always sets the CC register. Technically that is an i1 register, but
// that type is not legal, so we treat it as an i32 register.
return MVT::i32;
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.h b/lib/Target/Blackfin/BlackfinISelLowering.h
index b65775b..90908ba 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.h
+++ b/lib/Target/Blackfin/BlackfinISelLowering.h
@@ -33,7 +33,7 @@ namespace llvm {
public:
BlackfinTargetLowering(TargetMachine &TM);
virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i16; }
- virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+ virtual EVT getSetCCResultType(EVT VT) const;
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
virtual void ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/lib/Target/Blackfin/BlackfinInstrInfo.cpp
index d190ae7..c06a919 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.cpp
@@ -16,10 +16,10 @@
#include "Blackfin.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_CTOR
#include "BlackfinGenInstrInfo.inc"
diff --git a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
index ae8ee9e..7135676 100644
--- a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
@@ -34,7 +34,7 @@ namespace bfinIntrinsic {
}
-std::string BlackfinIntrinsicInfo::getName(unsigned IntrID, const Type **Tys,
+std::string BlackfinIntrinsicInfo::getName(unsigned IntrID, Type **Tys,
unsigned numTys) const {
static const char *const names[] = {
#define GET_INTRINSIC_NAME_TABLE
@@ -81,8 +81,8 @@ bool BlackfinIntrinsicInfo::isOverloaded(unsigned IntrID) const {
#include "BlackfinGenIntrinsics.inc"
#undef GET_INTRINSIC_ATTRIBUTES
-static const FunctionType *getType(LLVMContext &Context, unsigned id) {
- const Type *ResultTy = NULL;
+static FunctionType *getType(LLVMContext &Context, unsigned id) {
+ Type *ResultTy = NULL;
std::vector<Type*> ArgTys;
bool IsVarArg = false;
@@ -94,7 +94,7 @@ static const FunctionType *getType(LLVMContext &Context, unsigned id) {
}
Function *BlackfinIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
- const Type **Tys,
+ Type **Tys,
unsigned numTy) const {
assert(!isOverloaded(IntrID) && "Blackfin intrinsics are not overloaded");
AttrListPtr AList = getAttributes((bfinIntrinsic::ID) IntrID);
diff --git a/lib/Target/Blackfin/BlackfinIntrinsicInfo.h b/lib/Target/Blackfin/BlackfinIntrinsicInfo.h
index 7c4b5a9..f05db5a 100644
--- a/lib/Target/Blackfin/BlackfinIntrinsicInfo.h
+++ b/lib/Target/Blackfin/BlackfinIntrinsicInfo.h
@@ -19,11 +19,11 @@ namespace llvm {
class BlackfinIntrinsicInfo : public TargetIntrinsicInfo {
public:
- std::string getName(unsigned IntrID, const Type **Tys = 0,
+ std::string getName(unsigned IntrID, Type **Tys = 0,
unsigned numTys = 0) const;
unsigned lookupName(const char *Name, unsigned Len) const;
bool isOverloaded(unsigned IID) const;
- Function *getDeclaration(Module *M, unsigned ID, const Type **Tys = 0,
+ Function *getDeclaration(Module *M, unsigned ID, Type **Tys = 0,
unsigned numTys = 0) const;
};
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
index 3a7c104..0d415c5 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
@@ -20,7 +20,6 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
@@ -37,7 +36,7 @@ using namespace llvm;
BlackfinRegisterInfo::BlackfinRegisterInfo(BlackfinSubtarget &st,
const TargetInstrInfo &tii)
- : BlackfinGenRegisterInfo(), Subtarget(st), TII(tii) {}
+ : BlackfinGenRegisterInfo(BF::RETS), Subtarget(st), TII(tii) {}
const unsigned*
BlackfinRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
@@ -327,10 +326,6 @@ BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
}
-unsigned BlackfinRegisterInfo::getRARegister() const {
- return BF::RETS;
-}
-
unsigned
BlackfinRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
@@ -347,14 +342,3 @@ unsigned BlackfinRegisterInfo::getEHHandlerRegister() const {
llvm_unreachable("What is the exception handler register");
return 0;
}
-
-int BlackfinRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
- llvm_unreachable("What is the dwarf register number");
- return -1;
-}
-
-int BlackfinRegisterInfo::getLLVMRegNum(unsigned DwarfRegNum,
- bool isEH) const {
- llvm_unreachable("What is the dwarf register number");
- return -1;
-}
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h
index 86f45c1..6ac22af 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.h
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.h
@@ -53,15 +53,11 @@ namespace llvm {
int SPAdj, RegScavenger *RS = NULL) const;
unsigned getFrameRegister(const MachineFunction &MF) const;
- unsigned getRARegister() const;
// Exception handling queries.
unsigned getEHExceptionRegister() const;
unsigned getEHHandlerRegister() const;
- int getDwarfRegNum(unsigned RegNum, bool isEH) const;
- int getLLVMRegNum(unsigned RegNum, bool isEH) const;
-
// Utility functions
void adjustRegister(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
diff --git a/lib/Target/Blackfin/BlackfinSubtarget.cpp b/lib/Target/Blackfin/BlackfinSubtarget.cpp
index ec919cd..0bdce09 100644
--- a/lib/Target/Blackfin/BlackfinSubtarget.cpp
+++ b/lib/Target/Blackfin/BlackfinSubtarget.cpp
@@ -13,7 +13,7 @@
#include "BlackfinSubtarget.h"
#include "Blackfin.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.cpp b/lib/Target/Blackfin/BlackfinTargetMachine.cpp
index a1c9f1c..a4ae46b 100644
--- a/lib/Target/Blackfin/BlackfinTargetMachine.cpp
+++ b/lib/Target/Blackfin/BlackfinTargetMachine.cpp
@@ -13,7 +13,7 @@
#include "BlackfinTargetMachine.h"
#include "Blackfin.h"
#include "llvm/PassManager.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -22,10 +22,12 @@ extern "C" void LLVMInitializeBlackfinTarget() {
}
BlackfinTargetMachine::BlackfinTargetMachine(const Target &T,
- const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : LLVMTargetMachine(T, TT, CPU, FS),
+ StringRef TT,
+ StringRef CPU,
+ StringRef FS,
+ Reloc::Model RM,
+ CodeModel::Model CM)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
DataLayout("e-p:32:32-i64:32-f64:32-n32"),
Subtarget(TT, CPU, FS),
TLInfo(*this),
diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.h b/lib/Target/Blackfin/BlackfinTargetMachine.h
index bd7dc84..c85337fe2 100644
--- a/lib/Target/Blackfin/BlackfinTargetMachine.h
+++ b/lib/Target/Blackfin/BlackfinTargetMachine.h
@@ -35,8 +35,9 @@ namespace llvm {
BlackfinFrameLowering FrameLowering;
BlackfinIntrinsicInfo IntrinsicInfo;
public:
- BlackfinTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ BlackfinTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
virtual const BlackfinInstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const TargetFrameLowering *getFrameLowering() const {
diff --git a/lib/Target/Blackfin/CMakeLists.txt b/lib/Target/Blackfin/CMakeLists.txt
index d3f33a9..94d05fb 100644
--- a/lib/Target/Blackfin/CMakeLists.txt
+++ b/lib/Target/Blackfin/CMakeLists.txt
@@ -1,12 +1,13 @@
set(LLVM_TARGET_DEFINITIONS Blackfin.td)
-tablegen(BlackfinGenRegisterInfo.inc -gen-register-info)
-tablegen(BlackfinGenInstrInfo.inc -gen-instr-info)
-tablegen(BlackfinGenAsmWriter.inc -gen-asm-writer)
-tablegen(BlackfinGenDAGISel.inc -gen-dag-isel)
-tablegen(BlackfinGenSubtargetInfo.inc -gen-subtarget)
-tablegen(BlackfinGenCallingConv.inc -gen-callingconv)
-tablegen(BlackfinGenIntrinsics.inc -gen-tgt-intrinsic)
+llvm_tablegen(BlackfinGenRegisterInfo.inc -gen-register-info)
+llvm_tablegen(BlackfinGenInstrInfo.inc -gen-instr-info)
+llvm_tablegen(BlackfinGenAsmWriter.inc -gen-asm-writer)
+llvm_tablegen(BlackfinGenDAGISel.inc -gen-dag-isel)
+llvm_tablegen(BlackfinGenSubtargetInfo.inc -gen-subtarget)
+llvm_tablegen(BlackfinGenCallingConv.inc -gen-callingconv)
+llvm_tablegen(BlackfinGenIntrinsics.inc -gen-tgt-intrinsic)
+add_public_tablegen_target(BlackfinCommonTableGen)
add_llvm_target(BlackfinCodeGen
BlackfinAsmPrinter.cpp
@@ -21,5 +22,17 @@ add_llvm_target(BlackfinCodeGen
BlackfinSelectionDAGInfo.cpp
)
+add_llvm_library_dependencies(LLVMBlackfinCodeGen
+ LLVMAsmPrinter
+ LLVMBlackfinDesc
+ LLVMBlackfinInfo
+ LLVMCodeGen
+ LLVMCore
+ LLVMMC
+ LLVMSelectionDAG
+ LLVMSupport
+ LLVMTarget
+ )
+
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp
index 0fa1471..272e3c2 100644
--- a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp
+++ b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp
@@ -13,10 +13,11 @@
#include "BlackfinMCTargetDesc.h"
#include "BlackfinMCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "BlackfinGenInstrInfo.inc"
@@ -36,12 +37,12 @@ static MCInstrInfo *createBlackfinMCInstrInfo() {
return X;
}
-extern "C" void LLVMInitializeBlackfinMCInstrInfo() {
- TargetRegistry::RegisterMCInstrInfo(TheBlackfinTarget,
- createBlackfinMCInstrInfo);
+static MCRegisterInfo *createBlackfinMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitBlackfinMCRegisterInfo(X, BF::RETS);
+ return X;
}
-
static MCSubtargetInfo *createBlackfinMCSubtargetInfo(StringRef TT,
StringRef CPU,
StringRef FS) {
@@ -50,11 +51,31 @@ static MCSubtargetInfo *createBlackfinMCSubtargetInfo(StringRef TT,
return X;
}
-extern "C" void LLVMInitializeBlackfinMCSubtargetInfo() {
- TargetRegistry::RegisterMCSubtargetInfo(TheBlackfinTarget,
- createBlackfinMCSubtargetInfo);
+static MCCodeGenInfo *createBlackfinMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
}
-extern "C" void LLVMInitializeBlackfinMCAsmInfo() {
+// Force static initialization.
+extern "C" void LLVMInitializeBlackfinTargetMC() {
+ // Register the MC asm info.
RegisterMCAsmInfo<BlackfinMCAsmInfo> X(TheBlackfinTarget);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheBlackfinTarget,
+ createBlackfinMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheBlackfinTarget,
+ createBlackfinMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheBlackfinTarget,
+ createBlackfinMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheBlackfinTarget,
+ createBlackfinMCSubtargetInfo);
}
diff --git a/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt b/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt
index 8cd924f..73315d8 100644
--- a/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt
@@ -2,3 +2,10 @@ add_llvm_library(LLVMBlackfinDesc
BlackfinMCTargetDesc.cpp
BlackfinMCAsmInfo.cpp
)
+
+add_llvm_library_dependencies(LLVMBlackfinDesc
+ LLVMBlackfinInfo
+ LLVMMC
+ )
+
+add_dependencies(LLVMBlackfinDesc BlackfinCommonTableGen)
diff --git a/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp b/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp
index 402e0af..57f1d3e 100644
--- a/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp
+++ b/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp
@@ -9,7 +9,7 @@
#include "Blackfin.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/Blackfin/TargetInfo/CMakeLists.txt b/lib/Target/Blackfin/TargetInfo/CMakeLists.txt
index 5ca8060..771f092 100644
--- a/lib/Target/Blackfin/TargetInfo/CMakeLists.txt
+++ b/lib/Target/Blackfin/TargetInfo/CMakeLists.txt
@@ -4,4 +4,10 @@ add_llvm_library(LLVMBlackfinInfo
BlackfinTargetInfo.cpp
)
-add_dependencies(LLVMBlackfinInfo BlackfinCodeGenTable_gen)
+add_llvm_library_dependencies(LLVMBlackfinInfo
+ LLVMMC
+ LLVMSupport
+ LLVMTarget
+ )
+
+add_dependencies(LLVMBlackfinInfo BlackfinCommonTableGen)
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index 415beb1..69d8c46 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -37,10 +37,11 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/ErrorHandling.h"
@@ -48,6 +49,7 @@
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/InstVisitor.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/Host.h"
#include "llvm/Config/config.h"
#include <algorithm>
@@ -62,12 +64,6 @@ extern "C" void LLVMInitializeCBackendTarget() {
RegisterTargetMachine<CTargetMachine> X(TheCBackendTarget);
}
-extern "C" void LLVMInitializeCBackendMCAsmInfo() {}
-
-extern "C" void LLVMInitializeCBackendMCInstrInfo() {}
-
-extern "C" void LLVMInitializeCBackendMCSubtargetInfo() {}
-
namespace {
class CBEMCAsmInfo : public MCAsmInfo {
public:
@@ -86,6 +82,8 @@ namespace {
LoopInfo *LI;
const Module *TheModule;
const MCAsmInfo* TAsm;
+ const MCRegisterInfo *MRI;
+ const MCObjectFileInfo *MOFI;
MCContext *TCtx;
const TargetData* TD;
@@ -99,14 +97,14 @@ namespace {
/// UnnamedStructIDs - This contains a unique ID for each struct that is
/// either anonymous or has no name.
- DenseMap<const StructType*, unsigned> UnnamedStructIDs;
+ DenseMap<StructType*, unsigned> UnnamedStructIDs;
public:
static char ID;
explicit CWriter(formatted_raw_ostream &o)
: FunctionPass(ID), Out(o), IL(0), Mang(0), LI(0),
- TheModule(0), TAsm(0), TCtx(0), TD(0), OpaqueCounter(0),
- NextAnonValueNumber(0) {
+ TheModule(0), TAsm(0), MRI(0), MOFI(0), TCtx(0), TD(0),
+ OpaqueCounter(0), NextAnonValueNumber(0) {
initializeLoopInfoPass(*PassRegistry::getPassRegistry());
FPCounter = 0;
}
@@ -145,6 +143,8 @@ namespace {
delete Mang;
delete TCtx;
delete TAsm;
+ delete MRI;
+ delete MOFI;
FPConstantMap.clear();
ByValParams.clear();
intrinsicPrototypesAlreadyGenerated.clear();
@@ -152,20 +152,20 @@ namespace {
return false;
}
- raw_ostream &printType(raw_ostream &Out, const Type *Ty,
+ raw_ostream &printType(raw_ostream &Out, Type *Ty,
bool isSigned = false,
const std::string &VariableName = "",
bool IgnoreName = false,
const AttrListPtr &PAL = AttrListPtr());
- raw_ostream &printSimpleType(raw_ostream &Out, const Type *Ty,
+ raw_ostream &printSimpleType(raw_ostream &Out, Type *Ty,
bool isSigned,
const std::string &NameSoFar = "");
void printStructReturnPointerFunctionType(raw_ostream &Out,
const AttrListPtr &PAL,
- const PointerType *Ty);
+ PointerType *Ty);
- std::string getStructName(const StructType *ST);
+ std::string getStructName(StructType *ST);
/// writeOperandDeref - Print the result of dereferencing the specified
/// operand with '*'. This is equivalent to printing '*' then using
@@ -188,7 +188,7 @@ namespace {
void writeOperandWithCast(Value* Operand, const ICmpInst &I);
bool writeInstructionCast(const Instruction &I);
- void writeMemoryAccess(Value *Operand, const Type *OperandType,
+ void writeMemoryAccess(Value *Operand, Type *OperandType,
bool IsVolatile, unsigned Alignment);
private :
@@ -200,7 +200,7 @@ namespace {
void printIntrinsicDefinition(const Function &F, raw_ostream &Out);
void printModuleTypes();
- void printContainedStructs(const Type *Ty, SmallPtrSet<const Type *, 16> &);
+ void printContainedStructs(Type *Ty, SmallPtrSet<Type *, 16> &);
void printFloatingPointConstants(Function &F);
void printFloatingPointConstants(const Constant *C);
void printFunctionSignature(const Function *F, bool Prototype);
@@ -209,7 +209,7 @@ namespace {
void printBasicBlock(BasicBlock *BB);
void printLoop(Loop *L);
- void printCast(unsigned opcode, const Type *SrcTy, const Type *DstTy);
+ void printCast(unsigned opcode, Type *SrcTy, Type *DstTy);
void printConstant(Constant *CPV, bool Static);
void printConstantWithCast(Constant *CPV, unsigned Opcode);
bool printConstExprCast(const ConstantExpr *CE, bool Static);
@@ -288,10 +288,12 @@ namespace {
void visitInvokeInst(InvokeInst &I) {
llvm_unreachable("Lowerinvoke pass didn't work!");
}
-
void visitUnwindInst(UnwindInst &I) {
llvm_unreachable("Lowerinvoke pass didn't work!");
}
+ void visitResumeInst(ResumeInst &I) {
+ llvm_unreachable("DwarfEHPrepare pass didn't work!");
+ }
void visitUnreachableInst(UnreachableInst &I);
void visitPHINode(PHINode &I);
@@ -360,8 +362,8 @@ static std::string CBEMangle(const std::string &S) {
return Result;
}
-std::string CWriter::getStructName(const StructType *ST) {
- if (!ST->isAnonymous() && !ST->getName().empty())
+std::string CWriter::getStructName(StructType *ST) {
+ if (!ST->isLiteral() && !ST->getName().empty())
return CBEMangle("l_"+ST->getName().str());
return "l_unnamed_" + utostr(UnnamedStructIDs[ST]);
@@ -373,20 +375,20 @@ std::string CWriter::getStructName(const StructType *ST) {
/// print it as "Struct (*)(...)", for struct return functions.
void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out,
const AttrListPtr &PAL,
- const PointerType *TheTy) {
- const FunctionType *FTy = cast<FunctionType>(TheTy->getElementType());
+ PointerType *TheTy) {
+ FunctionType *FTy = cast<FunctionType>(TheTy->getElementType());
std::string tstr;
raw_string_ostream FunctionInnards(tstr);
FunctionInnards << " (*) (";
bool PrintedType = false;
FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end();
- const Type *RetTy = cast<PointerType>(*I)->getElementType();
+ Type *RetTy = cast<PointerType>(*I)->getElementType();
unsigned Idx = 1;
for (++I, ++Idx; I != E; ++I, ++Idx) {
if (PrintedType)
FunctionInnards << ", ";
- const Type *ArgTy = *I;
+ Type *ArgTy = *I;
if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
assert(ArgTy->isPointerTy());
ArgTy = cast<PointerType>(ArgTy)->getElementType();
@@ -408,7 +410,7 @@ void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out,
}
raw_ostream &
-CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned,
+CWriter::printSimpleType(raw_ostream &Out, Type *Ty, bool isSigned,
const std::string &NameSoFar) {
assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) &&
"Invalid type for printSimpleType");
@@ -444,7 +446,7 @@ CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned,
" __attribute__((vector_size(64))) " + NameSoFar);
case Type::VectorTyID: {
- const VectorType *VTy = cast<VectorType>(Ty);
+ VectorType *VTy = cast<VectorType>(Ty);
return printSimpleType(Out, VTy->getElementType(), isSigned,
" __attribute__((vector_size(" +
utostr(TD->getTypeAllocSize(VTy)) + " ))) " + NameSoFar);
@@ -461,7 +463,7 @@ CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned,
// Pass the Type* and the variable name and this prints out the variable
// declaration.
//
-raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
+raw_ostream &CWriter::printType(raw_ostream &Out, Type *Ty,
bool isSigned, const std::string &NameSoFar,
bool IgnoreName, const AttrListPtr &PAL) {
if (Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) {
@@ -471,14 +473,14 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
switch (Ty->getTypeID()) {
case Type::FunctionTyID: {
- const FunctionType *FTy = cast<FunctionType>(Ty);
+ FunctionType *FTy = cast<FunctionType>(Ty);
std::string tstr;
raw_string_ostream FunctionInnards(tstr);
FunctionInnards << " (" << NameSoFar << ") (";
unsigned Idx = 1;
for (FunctionType::param_iterator I = FTy->param_begin(),
E = FTy->param_end(); I != E; ++I) {
- const Type *ArgTy = *I;
+ Type *ArgTy = *I;
if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
assert(ArgTy->isPointerTy());
ArgTy = cast<PointerType>(ArgTy)->getElementType();
@@ -502,7 +504,7 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
return Out;
}
case Type::StructTyID: {
- const StructType *STy = cast<StructType>(Ty);
+ StructType *STy = cast<StructType>(Ty);
// Check to see if the type is named.
if (!IgnoreName)
@@ -523,7 +525,7 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
}
case Type::PointerTyID: {
- const PointerType *PTy = cast<PointerType>(Ty);
+ PointerType *PTy = cast<PointerType>(Ty);
std::string ptrName = "*" + NameSoFar;
if (PTy->getElementType()->isArrayTy() ||
@@ -537,7 +539,7 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
}
case Type::ArrayTyID: {
- const ArrayType *ATy = cast<ArrayType>(Ty);
+ ArrayType *ATy = cast<ArrayType>(Ty);
unsigned NumElements = ATy->getNumElements();
if (NumElements == 0) NumElements = 1;
// Arrays are wrapped in structs to allow them to have normal
@@ -560,7 +562,7 @@ void CWriter::printConstantArray(ConstantArray *CPA, bool Static) {
// As a special case, print the array as a string if it is an array of
// ubytes or an array of sbytes with positive values.
//
- const Type *ETy = CPA->getType()->getElementType();
+ Type *ETy = CPA->getType()->getElementType();
bool isString = (ETy == Type::getInt8Ty(CPA->getContext()) ||
ETy == Type::getInt8Ty(CPA->getContext()));
@@ -682,7 +684,7 @@ static bool isFPCSafeToPrint(const ConstantFP *CFP) {
/// Print out the casting for a cast operation. This does the double casting
/// necessary for conversion to the destination type, if necessary.
/// @brief Print a cast
-void CWriter::printCast(unsigned opc, const Type *SrcTy, const Type *DstTy) {
+void CWriter::printCast(unsigned opc, Type *SrcTy, Type *DstTy) {
// Print the destination type cast
switch (opc) {
case Instruction::UIToFP:
@@ -917,7 +919,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
}
if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
- const Type* Ty = CI->getType();
+ Type* Ty = CI->getType();
if (Ty == Type::getInt1Ty(CPV->getContext()))
Out << (CI->getZExtValue() ? '1' : '0');
else if (Ty == Type::getInt32Ty(CPV->getContext()))
@@ -1027,7 +1029,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
printConstantArray(CA, Static);
} else {
assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV));
- const ArrayType *AT = cast<ArrayType>(CPV->getType());
+ ArrayType *AT = cast<ArrayType>(CPV->getType());
Out << '{';
if (AT->getNumElements()) {
Out << ' ';
@@ -1054,7 +1056,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
printConstantVector(CV, Static);
} else {
assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV));
- const VectorType *VT = cast<VectorType>(CPV->getType());
+ VectorType *VT = cast<VectorType>(CPV->getType());
Out << "{ ";
Constant *CZ = Constant::getNullValue(VT->getElementType());
printConstant(CZ, Static);
@@ -1074,7 +1076,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
Out << ")";
}
if (isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)) {
- const StructType *ST = cast<StructType>(CPV->getType());
+ StructType *ST = cast<StructType>(CPV->getType());
Out << '{';
if (ST->getNumElements()) {
Out << ' ';
@@ -1123,7 +1125,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
// care of detecting that case and printing the cast for the ConstantExpr.
bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) {
bool NeedsExplicitCast = false;
- const Type *Ty = CE->getOperand(0)->getType();
+ Type *Ty = CE->getOperand(0)->getType();
bool TypeIsSigned = false;
switch (CE->getOpcode()) {
case Instruction::Add:
@@ -1175,7 +1177,7 @@ bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) {
void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) {
// Extract the operand's type, we'll need it.
- const Type* OpTy = CPV->getType();
+ Type* OpTy = CPV->getType();
// Indicate whether to do the cast or not.
bool shouldCast = false;
@@ -1267,7 +1269,7 @@ std::string CWriter::GetValueName(const Value *Operand) {
void CWriter::writeInstComputationInline(Instruction &I) {
// We can't currently support integer types other than 1, 8, 16, 32, 64.
// Validate this.
- const Type *Ty = I.getType();
+ Type *Ty = I.getType();
if (Ty->isIntegerTy() && (Ty!=Type::getInt1Ty(I.getContext()) &&
Ty!=Type::getInt8Ty(I.getContext()) &&
Ty!=Type::getInt16Ty(I.getContext()) &&
@@ -1330,7 +1332,7 @@ void CWriter::writeOperand(Value *Operand, bool Static) {
// This function takes care of detecting that case and printing the cast
// for the Instruction.
bool CWriter::writeInstructionCast(const Instruction &I) {
- const Type *Ty = I.getOperand(0)->getType();
+ Type *Ty = I.getOperand(0)->getType();
switch (I.getOpcode()) {
case Instruction::Add:
case Instruction::Sub:
@@ -1362,7 +1364,7 @@ bool CWriter::writeInstructionCast(const Instruction &I) {
void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) {
// Extract the operand's type, we'll need it.
- const Type* OpTy = Operand->getType();
+ Type* OpTy = Operand->getType();
// Indicate whether to do the cast or not.
bool shouldCast = false;
@@ -1430,7 +1432,7 @@ void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) {
bool castIsSigned = Cmp.isSigned();
// If the operand was a pointer, convert to a large integer type.
- const Type* OpTy = Operand->getType();
+ Type* OpTy = Operand->getType();
if (OpTy->isPointerTy())
OpTy = TD->getIntPtrType(Operand->getContext());
@@ -1665,7 +1667,8 @@ bool CWriter::doInitialization(Module &M) {
TAsm = Match->createMCAsmInfo(Triple);
#endif
TAsm = new CBEMCAsmInfo();
- TCtx = new MCContext(*TAsm, NULL);
+ MRI = new MCRegisterInfo();
+ TCtx = new MCContext(*TAsm, *MRI, NULL);
Mang = new Mangler(*TCtx, *TD);
// Keep track of which functions are static ctors/dtors so they can have
@@ -2049,7 +2052,7 @@ void CWriter::printModuleTypes() {
for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
StructType *ST = StructTypes[i];
- if (ST->isAnonymous() || ST->getName().empty())
+ if (ST->isLiteral() || ST->getName().empty())
UnnamedStructIDs[ST] = NextTypeID++;
std::string Name = getStructName(ST);
@@ -2060,7 +2063,7 @@ void CWriter::printModuleTypes() {
Out << '\n';
// Keep track of which structures have been printed so far.
- SmallPtrSet<const Type *, 16> StructPrinted;
+ SmallPtrSet<Type *, 16> StructPrinted;
// Loop over all structures then push them into the stack so they are
// printed in the correct order.
@@ -2077,8 +2080,8 @@ void CWriter::printModuleTypes() {
//
// TODO: Make this work properly with vector types
//
-void CWriter::printContainedStructs(const Type *Ty,
- SmallPtrSet<const Type *, 16> &StructPrinted) {
+void CWriter::printContainedStructs(Type *Ty,
+ SmallPtrSet<Type *, 16> &StructPrinted) {
// Don't walk through pointers.
if (Ty->isPointerTy() || Ty->isPrimitiveType() || Ty->isIntegerTy())
return;
@@ -2088,7 +2091,7 @@ void CWriter::printContainedStructs(const Type *Ty,
E = Ty->subtype_end(); I != E; ++I)
printContainedStructs(*I, StructPrinted);
- if (const StructType *ST = dyn_cast<StructType>(Ty)) {
+ if (StructType *ST = dyn_cast<StructType>(Ty)) {
// Check to see if we have already printed this struct.
if (!StructPrinted.insert(Ty)) return;
@@ -2120,7 +2123,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
}
// Loop over the arguments, printing them...
- const FunctionType *FT = cast<FunctionType>(F->getFunctionType());
+ FunctionType *FT = cast<FunctionType>(F->getFunctionType());
const AttrListPtr &PAL = F->getAttributes();
std::string tstr;
@@ -2150,7 +2153,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
ArgName = GetValueName(I);
else
ArgName = "";
- const Type *ArgTy = I->getType();
+ Type *ArgTy = I->getType();
if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
ArgTy = cast<PointerType>(ArgTy)->getElementType();
ByValParams.insert(I);
@@ -2177,7 +2180,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
for (; I != E; ++I) {
if (PrintedArg) FunctionInnards << ", ";
- const Type *ArgTy = *I;
+ Type *ArgTy = *I;
if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
assert(ArgTy->isPointerTy());
ArgTy = cast<PointerType>(ArgTy)->getElementType();
@@ -2205,7 +2208,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
FunctionInnards << ')';
// Get the return tpe for the function.
- const Type *RetTy;
+ Type *RetTy;
if (!isStructReturn)
RetTy = F->getReturnType();
else {
@@ -2222,8 +2225,8 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
static inline bool isFPIntBitCast(const Instruction &I) {
if (!isa<BitCastInst>(I))
return false;
- const Type *SrcTy = I.getOperand(0)->getType();
- const Type *DstTy = I.getType();
+ Type *SrcTy = I.getOperand(0)->getType();
+ Type *DstTy = I.getType();
return (SrcTy->isFloatingPointTy() && DstTy->isIntegerTy()) ||
(DstTy->isFloatingPointTy() && SrcTy->isIntegerTy());
}
@@ -2237,7 +2240,7 @@ void CWriter::printFunction(Function &F) {
// If this is a struct return function, handle the result with magic.
if (isStructReturn) {
- const Type *StructTy =
+ Type *StructTy =
cast<PointerType>(F.arg_begin()->getType())->getElementType();
Out << " ";
printType(Out, StructTy, false, "StructReturn");
@@ -2380,22 +2383,29 @@ void CWriter::visitReturnInst(ReturnInst &I) {
void CWriter::visitSwitchInst(SwitchInst &SI) {
+ Value* Cond = SI.getCondition();
+
Out << " switch (";
- writeOperand(SI.getOperand(0));
+ writeOperand(Cond);
Out << ") {\n default:\n";
printPHICopiesForSuccessor (SI.getParent(), SI.getDefaultDest(), 2);
printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2);
Out << ";\n";
- for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2) {
+
+ unsigned NumCases = SI.getNumCases();
+ // Skip the first item since that's the default case.
+ for (unsigned i = 1; i < NumCases; ++i) {
+ ConstantInt* CaseVal = SI.getCaseValue(i);
+ BasicBlock* Succ = SI.getSuccessor(i);
Out << " case ";
- writeOperand(SI.getOperand(i));
+ writeOperand(CaseVal);
Out << ":\n";
- BasicBlock *Succ = cast<BasicBlock>(SI.getOperand(i+1));
printPHICopiesForSuccessor (SI.getParent(), Succ, 2);
printBranchToBlock(SI.getParent(), Succ, 2);
if (Function::iterator(Succ) == llvm::next(Function::iterator(SI.getParent())))
Out << " break;\n";
}
+
Out << " }\n";
}
@@ -2656,7 +2666,7 @@ void CWriter::visitFCmpInst(FCmpInst &I) {
Out << ")";
}
-static const char * getFloatBitCastField(const Type *Ty) {
+static const char * getFloatBitCastField(Type *Ty) {
switch (Ty->getTypeID()) {
default: llvm_unreachable("Invalid Type");
case Type::FloatTyID: return "Float";
@@ -2672,8 +2682,8 @@ static const char * getFloatBitCastField(const Type *Ty) {
}
void CWriter::visitCastInst(CastInst &I) {
- const Type *DstTy = I.getType();
- const Type *SrcTy = I.getOperand(0)->getType();
+ Type *DstTy = I.getType();
+ Type *SrcTy = I.getOperand(0)->getType();
if (isFPIntBitCast(I)) {
Out << '(';
// These int<->float and long<->double casts need to be handled specially
@@ -2719,7 +2729,7 @@ void CWriter::visitSelectInst(SelectInst &I) {
// Returns the macro name or value of the max or min of an integer type
// (as defined in limits.h).
-static void printLimitValue(const IntegerType &Ty, bool isSigned, bool isMax,
+static void printLimitValue(IntegerType &Ty, bool isSigned, bool isMax,
raw_ostream &Out) {
const char* type;
const char* sprefix = "";
@@ -2745,16 +2755,16 @@ static void printLimitValue(const IntegerType &Ty, bool isSigned, bool isMax,
}
#ifndef NDEBUG
-static bool isSupportedIntegerSize(const IntegerType &T) {
+static bool isSupportedIntegerSize(IntegerType &T) {
return T.getBitWidth() == 8 || T.getBitWidth() == 16 ||
T.getBitWidth() == 32 || T.getBitWidth() == 64;
}
#endif
void CWriter::printIntrinsicDefinition(const Function &F, raw_ostream &Out) {
- const FunctionType *funT = F.getFunctionType();
- const Type *retT = F.getReturnType();
- const IntegerType *elemT = cast<IntegerType>(funT->getParamType(1));
+ FunctionType *funT = F.getFunctionType();
+ Type *retT = F.getReturnType();
+ IntegerType *elemT = cast<IntegerType>(funT->getParamType(1));
assert(isSupportedIntegerSize(*elemT) &&
"CBackend does not support arbitrary size integers.");
@@ -2829,7 +2839,6 @@ void CWriter::lowerIntrinsics(Function &F) {
if (Function *F = CI->getCalledFunction())
switch (F->getIntrinsicID()) {
case Intrinsic::not_intrinsic:
- case Intrinsic::memory_barrier:
case Intrinsic::vastart:
case Intrinsic::vacopy:
case Intrinsic::vaend:
@@ -2908,8 +2917,8 @@ void CWriter::visitCallInst(CallInst &I) {
Value *Callee = I.getCalledValue();
- const PointerType *PTy = cast<PointerType>(Callee->getType());
- const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+ PointerType *PTy = cast<PointerType>(Callee->getType());
+ FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
// If this is a call to a struct-return function, assign to the first
// parameter instead of passing it to the call.
@@ -3020,9 +3029,6 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
WroteCallee = true;
return false;
}
- case Intrinsic::memory_barrier:
- Out << "__sync_synchronize()";
- return true;
case Intrinsic::vastart:
Out << "0; ";
@@ -3217,7 +3223,7 @@ void CWriter::visitInlineAsm(CallInst &CI) {
std::vector<std::pair<Value*, int> > ResultVals;
if (CI.getType() == Type::getVoidTy(CI.getContext()))
;
- else if (const StructType *ST = dyn_cast<StructType>(CI.getType())) {
+ else if (StructType *ST = dyn_cast<StructType>(CI.getType())) {
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i)
ResultVals.push_back(std::make_pair(&CI, (int)i));
} else {
@@ -3348,7 +3354,7 @@ void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
// Find out if the last index is into a vector. If so, we have to print this
// specially. Since vectors can't have elements of indexable type, only the
// last index could possibly be of a vector element.
- const VectorType *LastIndexIsVector = 0;
+ VectorType *LastIndexIsVector = 0;
{
for (gep_type_iterator TmpI = I; TmpI != E; ++TmpI)
LastIndexIsVector = dyn_cast<VectorType>(*TmpI);
@@ -3421,7 +3427,7 @@ void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
Out << ")";
}
-void CWriter::writeMemoryAccess(Value *Operand, const Type *OperandType,
+void CWriter::writeMemoryAccess(Value *Operand, Type *OperandType,
bool IsVolatile, unsigned Alignment) {
bool IsUnaligned = Alignment &&
@@ -3463,7 +3469,7 @@ void CWriter::visitStoreInst(StoreInst &I) {
Out << " = ";
Value *Operand = I.getOperand(0);
Constant *BitMask = 0;
- if (const IntegerType* ITy = dyn_cast<IntegerType>(Operand->getType()))
+ if (IntegerType* ITy = dyn_cast<IntegerType>(Operand->getType()))
if (!ITy->isPowerOf2ByteWidth())
// We have a bit width that doesn't match an even power-of-2 byte
// size. Consequently we must & the value with the type's bit mask
@@ -3492,7 +3498,7 @@ void CWriter::visitVAArgInst(VAArgInst &I) {
}
void CWriter::visitInsertElementInst(InsertElementInst &I) {
- const Type *EltTy = I.getType()->getElementType();
+ Type *EltTy = I.getType()->getElementType();
writeOperand(I.getOperand(0));
Out << ";\n ";
Out << "((";
@@ -3507,7 +3513,7 @@ void CWriter::visitInsertElementInst(InsertElementInst &I) {
void CWriter::visitExtractElementInst(ExtractElementInst &I) {
// We know that our operand is not inlined.
Out << "((";
- const Type *EltTy =
+ Type *EltTy =
cast<VectorType>(I.getOperand(0)->getType())->getElementType();
printType(Out, PointerType::getUnqual(EltTy));
Out << ")(&" << GetValueName(I.getOperand(0)) << "))[";
@@ -3519,9 +3525,9 @@ void CWriter::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
Out << "(";
printType(Out, SVI.getType());
Out << "){ ";
- const VectorType *VT = SVI.getType();
+ VectorType *VT = SVI.getType();
unsigned NumElts = VT->getNumElements();
- const Type *EltTy = VT->getElementType();
+ Type *EltTy = VT->getElementType();
for (unsigned i = 0; i != NumElts; ++i) {
if (i) Out << ", ";
@@ -3557,9 +3563,9 @@ void CWriter::visitInsertValueInst(InsertValueInst &IVI) {
Out << GetValueName(&IVI);
for (const unsigned *b = IVI.idx_begin(), *i = b, *e = IVI.idx_end();
i != e; ++i) {
- const Type *IndexedTy =
+ Type *IndexedTy =
ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(),
- ArrayRef<unsigned>(b, i+1));
+ makeArrayRef(b, i+1));
if (IndexedTy->isArrayTy())
Out << ".array[" << *i << "]";
else
@@ -3579,9 +3585,9 @@ void CWriter::visitExtractValueInst(ExtractValueInst &EVI) {
Out << GetValueName(EVI.getOperand(0));
for (const unsigned *b = EVI.idx_begin(), *i = b, *e = EVI.idx_end();
i != e; ++i) {
- const Type *IndexedTy =
+ Type *IndexedTy =
ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(),
- ArrayRef<unsigned>(b, i+1));
+ makeArrayRef(b, i+1));
if (IndexedTy->isArrayTy())
Out << ".array[" << *i << "]";
else
diff --git a/lib/Target/CBackend/CMakeLists.txt b/lib/Target/CBackend/CMakeLists.txt
index a23ff85..96ae49f 100644
--- a/lib/Target/CBackend/CMakeLists.txt
+++ b/lib/Target/CBackend/CMakeLists.txt
@@ -2,4 +2,16 @@ add_llvm_target(CBackend
CBackend.cpp
)
+add_llvm_library_dependencies(LLVMCBackend
+ LLVMAnalysis
+ LLVMCBackendInfo
+ LLVMCodeGen
+ LLVMCore
+ LLVMMC
+ LLVMScalarOpts
+ LLVMSupport
+ LLVMTarget
+ LLVMTransformUtils
+ )
+
add_subdirectory(TargetInfo)
diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h
index e64216b..4f1ca97 100644
--- a/lib/Target/CBackend/CTargetMachine.h
+++ b/lib/Target/CBackend/CTargetMachine.h
@@ -20,8 +20,9 @@
namespace llvm {
struct CTargetMachine : public TargetMachine {
- CTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS)
+ CTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
: TargetMachine(T, TT, CPU, FS) {}
virtual bool addPassesToEmitFile(PassManagerBase &PM,
diff --git a/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp b/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp
index f7e8ff2..e8274ff 100644
--- a/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp
+++ b/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp
@@ -9,7 +9,7 @@
#include "CTargetMachine.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
Target llvm::TheCBackendTarget;
@@ -17,3 +17,5 @@ Target llvm::TheCBackendTarget;
extern "C" void LLVMInitializeCBackendTargetInfo() {
RegisterTarget<> X(TheCBackendTarget, "c", "C backend");
}
+
+extern "C" void LLVMInitializeCBackendTargetMC() {}
diff --git a/lib/Target/CBackend/TargetInfo/CMakeLists.txt b/lib/Target/CBackend/TargetInfo/CMakeLists.txt
index 5b35fa7..8e616be 100644
--- a/lib/Target/CBackend/TargetInfo/CMakeLists.txt
+++ b/lib/Target/CBackend/TargetInfo/CMakeLists.txt
@@ -4,3 +4,8 @@ add_llvm_library(LLVMCBackendInfo
CBackendTargetInfo.cpp
)
+add_llvm_library_dependencies(LLVMCBackendInfo
+ LLVMMC
+ LLVMSupport
+ LLVMTarget
+ )
diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt
index f982316..030f808 100644
--- a/lib/Target/CMakeLists.txt
+++ b/lib/Target/CMakeLists.txt
@@ -1,8 +1,6 @@
add_llvm_library(LLVMTarget
Mangler.cpp
Target.cpp
- TargetAsmInfo.cpp
- TargetAsmLexer.cpp
TargetData.cpp
TargetELFWriterInfo.cpp
TargetFrameLowering.cpp
@@ -15,6 +13,12 @@ add_llvm_library(LLVMTarget
TargetSubtargetInfo.cpp
)
+add_llvm_library_dependencies(LLVMTarget
+ LLVMCore
+ LLVMMC
+ LLVMSupport
+ )
+
set(LLVM_ENUM_ASM_PRINTERS "")
set(LLVM_ENUM_ASM_PARSERS "")
set(LLVM_ENUM_DISASSEMBLERS "")
diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt
index 0b94e0c..158fb3e 100644
--- a/lib/Target/CellSPU/CMakeLists.txt
+++ b/lib/Target/CellSPU/CMakeLists.txt
@@ -1,12 +1,13 @@
set(LLVM_TARGET_DEFINITIONS SPU.td)
-tablegen(SPUGenAsmWriter.inc -gen-asm-writer)
-tablegen(SPUGenCodeEmitter.inc -gen-emitter)
-tablegen(SPUGenRegisterInfo.inc -gen-register-info)
-tablegen(SPUGenInstrInfo.inc -gen-instr-info)
-tablegen(SPUGenDAGISel.inc -gen-dag-isel)
-tablegen(SPUGenSubtargetInfo.inc -gen-subtarget)
-tablegen(SPUGenCallingConv.inc -gen-callingconv)
+llvm_tablegen(SPUGenAsmWriter.inc -gen-asm-writer)
+llvm_tablegen(SPUGenCodeEmitter.inc -gen-emitter)
+llvm_tablegen(SPUGenRegisterInfo.inc -gen-register-info)
+llvm_tablegen(SPUGenInstrInfo.inc -gen-instr-info)
+llvm_tablegen(SPUGenDAGISel.inc -gen-dag-isel)
+llvm_tablegen(SPUGenSubtargetInfo.inc -gen-subtarget)
+llvm_tablegen(SPUGenCallingConv.inc -gen-callingconv)
+add_public_tablegen_target(CellSPUCommonTableGen)
add_llvm_target(CellSPUCodeGen
SPUAsmPrinter.cpp
@@ -22,5 +23,17 @@ add_llvm_target(CellSPUCodeGen
SPUNopFiller.cpp
)
+add_llvm_library_dependencies(LLVMCellSPUCodeGen
+ LLVMAsmPrinter
+ LLVMCellSPUDesc
+ LLVMCellSPUInfo
+ LLVMCodeGen
+ LLVMCore
+ LLVMMC
+ LLVMSelectionDAG
+ LLVMSupport
+ LLVMTarget
+ )
+
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt b/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
index 85fb258..d41fe93 100644
--- a/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
@@ -2,3 +2,10 @@ add_llvm_library(LLVMCellSPUDesc
SPUMCTargetDesc.cpp
SPUMCAsmInfo.cpp
)
+
+add_llvm_library_dependencies(LLVMCellSPUDesc
+ LLVMCellSPUInfo
+ LLVMMC
+ )
+
+add_dependencies(LLVMCellSPUDesc CellSPUCommonTableGen)
diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
index 26c5a4b..d5af2a8 100644
--- a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
+++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
@@ -13,10 +13,12 @@
#include "SPUMCTargetDesc.h"
#include "SPUMCAsmInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "SPUGenInstrInfo.inc"
@@ -35,8 +37,10 @@ static MCInstrInfo *createSPUMCInstrInfo() {
return X;
}
-extern "C" void LLVMInitializeCellSPUMCInstrInfo() {
- TargetRegistry::RegisterMCInstrInfo(TheCellSPUTarget, createSPUMCInstrInfo);
+static MCRegisterInfo *createCellSPUMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitSPUMCRegisterInfo(X, SPU::R0);
+ return X;
}
static MCSubtargetInfo *createSPUMCSubtargetInfo(StringRef TT, StringRef CPU,
@@ -46,11 +50,43 @@ static MCSubtargetInfo *createSPUMCSubtargetInfo(StringRef TT, StringRef CPU,
return X;
}
-extern "C" void LLVMInitializeCellSPUMCSubtargetInfo() {
- TargetRegistry::RegisterMCSubtargetInfo(TheCellSPUTarget,
- createSPUMCSubtargetInfo);
+static MCAsmInfo *createSPUMCAsmInfo(const Target &T, StringRef TT) {
+ MCAsmInfo *MAI = new SPULinuxMCAsmInfo(T, TT);
+
+ // Initial state of the frame pointer is R1.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(SPU::R1, 0);
+ MAI->addInitialFrameState(0, Dst, Src);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createSPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ // For the time being, use static relocations, since there's really no
+ // support for PIC yet.
+ X->InitMCCodeGenInfo(Reloc::Static, CM);
+ return X;
}
-extern "C" void LLVMInitializeCellSPUMCAsmInfo() {
- RegisterMCAsmInfo<SPULinuxMCAsmInfo> X(TheCellSPUTarget);
+// Force static initialization.
+extern "C" void LLVMInitializeCellSPUTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(TheCellSPUTarget, createSPUMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheCellSPUTarget,
+ createSPUMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheCellSPUTarget, createSPUMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheCellSPUTarget,
+ createCellSPUMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheCellSPUTarget,
+ createSPUMCSubtargetInfo);
}
diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
index c5c037d..a3717b0 100644
--- a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
+++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
@@ -1,4 +1,4 @@
-//===-- SPUMCTargetDesc.h - Alpha Target Descriptions ---------*- C++ -*-===//
+//===-- SPUMCTargetDesc.h - CellSPU Target Descriptions ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file provides Alpha specific target descriptions.
+// This file provides CellSPU specific target descriptions.
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/CellSPU/SPUAsmPrinter.cpp b/lib/Target/CellSPU/SPUAsmPrinter.cpp
index fd96694..90b5270 100644
--- a/lib/Target/CellSPU/SPUAsmPrinter.cpp
+++ b/lib/Target/CellSPU/SPUAsmPrinter.cpp
@@ -29,10 +29,10 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Target/CellSPU/SPUFrameLowering.cpp b/lib/Target/CellSPU/SPUFrameLowering.cpp
index a3e7e73..093f99f 100644
--- a/lib/Target/CellSPU/SPUFrameLowering.cpp
+++ b/lib/Target/CellSPU/SPUFrameLowering.cpp
@@ -181,18 +181,6 @@ void SPUFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineLocation FPSrc(MachineLocation::VirtualFP);
Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
}
- } else {
- // This is a leaf function -- insert a branch hint iff there are
- // sufficient number instructions in the basic block. Note that
- // this is just a best guess based on the basic block's size.
- if (MBB.size() >= (unsigned) SPUFrameLowering::branchHintPenalty()) {
- MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- dl = MBBI->getDebugLoc();
-
- // Insert terminator label
- BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL))
- .addSym(MMI.getContext().CreateTempSymbol());
- }
}
}
@@ -249,14 +237,6 @@ void SPUFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
-void SPUFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves)
- const {
- // Initial state of the frame pointer is R1.
- MachineLocation Dst(MachineLocation::VirtualFP);
- MachineLocation Src(SPU::R1, 0);
- Moves.push_back(MachineMove(0, Dst, Src));
-}
-
void SPUFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const{
// Mark LR and SP unused, since the prolog spills them to stack and
diff --git a/lib/Target/CellSPU/SPUFrameLowering.h b/lib/Target/CellSPU/SPUFrameLowering.h
index 4fee72d..b837f2c 100644
--- a/lib/Target/CellSPU/SPUFrameLowering.h
+++ b/lib/Target/CellSPU/SPUFrameLowering.h
@@ -43,9 +43,6 @@ namespace llvm {
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;
- //! Perform target-specific stack frame setup.
- void getInitialFrameState(std::vector<MachineMove> &Moves) const;
-
//! Return a function's saved spill slots
/*!
For CellSPU, a function's saved spill slots is just the link register.
@@ -77,17 +74,6 @@ namespace llvm {
static int FItoStackOffset(int frame_index) {
return frame_index * stackSlotSize();
}
- //! Number of instructions required to overcome hint-for-branch latency
- /*!
- HBR (hint-for-branch) instructions can be inserted when, for example,
- we know that a given function is going to be called, such as printf(),
- in the control flow graph. HBRs are only inserted if a sufficient number
- of instructions occurs between the HBR and the target. Currently, HBRs
- take 6 cycles, ergo, the magic number 6.
- */
- static int branchHintPenalty() {
- return 6;
- }
};
}
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index f0ceee2..ac33111 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -69,7 +69,7 @@ namespace {
TargetLowering::ArgListEntry Entry;
for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
EVT ArgVT = Op.getOperand(i).getValueType();
- const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Op.getOperand(i);
Entry.Ty = ArgTy;
Entry.isSExt = isSigned;
@@ -80,7 +80,7 @@ namespace {
TLI.getPointerTy());
// Splice the libcall in wherever FindInputOutputChains tells us to.
- const Type *RetTy =
+ Type *RetTy =
Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
@@ -174,6 +174,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
// SPU has no intrinsics for these particular operations:
setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
// SPU has no division/remainder instructions
setOperationAction(ISD::SREM, MVT::i8, Expand);
@@ -401,6 +402,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
+ // Set operation actions to legal types only.
+ if (!isTypeLegal(VT)) continue;
+
// add/sub are legal for all supported vector VT's.
setOperationAction(ISD::ADD, VT, Legal);
setOperationAction(ISD::SUB, VT, Legal);
@@ -438,6 +442,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
setBooleanContents(ZeroOrNegativeOneBooleanContent);
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // FIXME: Is this correct?
setStackPointerRegisterToSaveRestore(SPU::R1);
@@ -497,7 +502,7 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
// Return the Cell SPU's SETCC result type
//===----------------------------------------------------------------------===//
-MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
+EVT SPUTargetLowering::getSetCCResultType(EVT VT) const {
// i8, i16 and i32 are valid SETCC result types
MVT::SimpleValueType retval;
@@ -2727,6 +2732,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
// the type to extend from needs to be i64 or i32.
assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
"LowerSIGN_EXTEND: input and/or output operand have wrong size");
+ (void)OpVT;
// Create shuffle mask
unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
@@ -3216,7 +3222,7 @@ SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
/// isLegalAddressImmediate - Return true if the integer value can be used
/// as the offset of the target addressing mode.
bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
- const Type *Ty) const {
+ Type *Ty) const {
// SPU's addresses are 256K:
return (V > -(1 << 18) && V < (1 << 18) - 1);
}
@@ -3239,7 +3245,7 @@ bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
bool
SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- const Type * ) const{
+ Type * ) const{
// A-form: 18bit absolute address.
if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0)
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
index d23f6cc..aa4a168 100644
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -107,7 +107,7 @@ namespace llvm {
virtual const char *getTargetNodeName(unsigned Opcode) const;
/// getSetCCResultType - Return the ValueType for ISD::SETCC
- virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+ virtual EVT getSetCCResultType(EVT VT) const;
virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
@@ -147,7 +147,7 @@ namespace llvm {
/// isLegalAddressImmediate - Return true if the integer value can be used
/// as the offset of the target addressing mode.
- virtual bool isLegalAddressImmediate(int64_t V, const Type *Ty) const;
+ virtual bool isLegalAddressImmediate(int64_t V, Type *Ty) const;
virtual bool isLegalAddressImmediate(GlobalValue *) const;
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
@@ -179,7 +179,7 @@ namespace llvm {
virtual bool isLegalICmpImmediate(int64_t Imm) const;
virtual bool isLegalAddressingMode(const AddrMode &AM,
- const Type *Ty) const;
+ Type *Ty) const;
};
}
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index e67b10c..007bc0e 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -17,9 +17,9 @@
#include "SPUHazardRecognizers.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#define GET_INSTRINFO_CTOR
@@ -290,6 +290,8 @@ static void removeHBR( MachineBasicBlock &MBB) {
if (I->getOpcode() == SPU::HBRA ||
I->getOpcode() == SPU::HBR_LABEL){
I=MBB.erase(I);
+ if (I == MBB.end())
+ break;
}
}
}
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index e103c9b..f76ebd7 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -1594,8 +1594,8 @@ multiclass BitwiseOrImm
{
def v4i32: ORIVecInst<v4i32, v4i32Uns10Imm>;
- def r32: ORIInst<(outs R32C:$rT), (ins R32C:$rA, u10imm_i32:$val),
- [(set R32C:$rT, (or R32C:$rA, i32ImmUns10:$val))]>;
+ def r32: ORIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (or R32C:$rA, i32ImmSExt10:$val))]>;
// i16i32: hacked version of the ori instruction to extend 16-bit quantities
// to 32-bit quantities. used exclusively to match "anyext" conversions (vide
@@ -3467,8 +3467,10 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
[/* no pattern */]>;
// Indirect branch
- def BI:
- BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>;
+ let isIndirectBranch = 1 in {
+ def BI:
+ BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>;
+ }
}
// Conditional branches:
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index 19896c0..bbac6fd 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -25,7 +25,6 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/ValueTypes.h"
@@ -187,7 +186,7 @@ unsigned SPURegisterInfo::getRegisterNumbering(unsigned RegEnum) {
SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget,
const TargetInstrInfo &tii) :
- SPUGenRegisterInfo(), Subtarget(subtarget), TII(tii)
+ SPUGenRegisterInfo(SPU::R0), Subtarget(subtarget), TII(tii)
{
}
@@ -311,28 +310,12 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
}
unsigned
-SPURegisterInfo::getRARegister() const
-{
- return SPU::R0;
-}
-
-unsigned
SPURegisterInfo::getFrameRegister(const MachineFunction &MF) const
{
return SPU::R1;
}
int
-SPURegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
- // FIXME: Most probably dwarf numbers differs for Linux and Darwin
- return SPUGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
-}
-
-int SPURegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const {
- return SPUGenRegisterInfo::getLLVMRegNumFull(RegNum, 0);
-}
-
-int
SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const
{
switch(dFormOpcode)
diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h
index 5e014f8..b7818a4 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.h
+++ b/lib/Target/CellSPU/SPURegisterInfo.h
@@ -74,8 +74,6 @@ namespace llvm {
void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
RegScavenger *RS = NULL) const;
- //! Get return address register (LR, aka R0)
- unsigned getRARegister() const;
//! Get the stack frame register (SP, aka R1)
unsigned getFrameRegister(const MachineFunction &MF) const;
@@ -83,10 +81,6 @@ namespace llvm {
// New methods added:
//------------------------------------------------------------------------
- //! Get DWARF debugging register number
- int getDwarfRegNum(unsigned RegNum, bool isEH) const;
- int getLLVMRegNum(unsigned RegNum, bool isEH) const;
-
//! Convert D-form load/store to X-form load/store
/*!
Converts a regiser displacement load/store into a register-indexed
diff --git a/lib/Target/CellSPU/SPUSubtarget.cpp b/lib/Target/CellSPU/SPUSubtarget.cpp
index 856dc82..43335ab 100644
--- a/lib/Target/CellSPU/SPUSubtarget.cpp
+++ b/lib/Target/CellSPU/SPUSubtarget.cpp
@@ -14,7 +14,7 @@
#include "SPUSubtarget.h"
#include "SPU.h"
#include "SPURegisterInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/ADT/SmallVector.h"
#define GET_SUBTARGETINFO_TARGET_DESC
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index 3542a2b..93a7f6e 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -16,7 +16,8 @@
#include "llvm/PassManager.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -31,9 +32,10 @@ SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
return &LR[0];
}
-SPUTargetMachine::SPUTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU,const std::string &FS)
- : LLVMTargetMachine(T, TT, CPU, FS),
+SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
Subtarget(TT, CPU, FS),
DataLayout(Subtarget.getTargetDataString()),
InstrInfo(*this),
@@ -41,9 +43,6 @@ SPUTargetMachine::SPUTargetMachine(const Target &T, const std::string &TT,
TLInfo(*this),
TSInfo(*this),
InstrItins(Subtarget.getInstrItineraryData()) {
- // For the time being, use static relocations, since there's really no
- // support for PIC yet.
- setRelocationModel(Reloc::Static);
}
//===----------------------------------------------------------------------===//
@@ -59,8 +58,16 @@ bool SPUTargetMachine::addInstSelector(PassManagerBase &PM,
// passes to run just before printing the assembly
bool SPUTargetMachine::
-addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
-{
+addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
+ // load the TCE instruction scheduler, if available via
+ // loaded plugins
+ typedef llvm::FunctionPass* (*BuilderFunc)(const char*);
+ BuilderFunc schedulerCreator =
+ (BuilderFunc)(intptr_t)sys::DynamicLibrary::SearchForAddressOfSymbol(
+ "createTCESchedulerPass");
+ if (schedulerCreator != NULL)
+ PM.add(schedulerCreator("cellspu"));
+
//align instructions with nops/lnops for dual issue
PM.add(createSPUNopFillerPass(*this));
return true;
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index d96f86d..fffe77c 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -38,8 +38,9 @@ class SPUTargetMachine : public LLVMTargetMachine {
SPUSelectionDAGInfo TSInfo;
InstrItineraryData InstrItins;
public:
- SPUTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ SPUTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
/// Return the subtarget implementation object
virtual const SPUSubtarget *getSubtargetImpl() const {
diff --git a/lib/Target/CellSPU/TargetInfo/CMakeLists.txt b/lib/Target/CellSPU/TargetInfo/CMakeLists.txt
index 928d0fe..3f2d6b0 100644
--- a/lib/Target/CellSPU/TargetInfo/CMakeLists.txt
+++ b/lib/Target/CellSPU/TargetInfo/CMakeLists.txt
@@ -4,4 +4,10 @@ add_llvm_library(LLVMCellSPUInfo
CellSPUTargetInfo.cpp
)
-add_dependencies(LLVMCellSPUInfo CellSPUCodeGenTable_gen)
+add_llvm_library_dependencies(LLVMCellSPUInfo
+ LLVMMC
+ LLVMSupport
+ LLVMTarget
+ )
+
+add_dependencies(LLVMCellSPUInfo CellSPUCommonTableGen)
diff --git a/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp b/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp
index 049ea23..84aadfa 100644
--- a/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp
+++ b/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp
@@ -9,7 +9,7 @@
#include "SPU.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
Target llvm::TheCellSPUTarget;
diff --git a/lib/Target/CppBackend/CMakeLists.txt b/lib/Target/CppBackend/CMakeLists.txt
index e937559..95b6058 100644
--- a/lib/Target/CppBackend/CMakeLists.txt
+++ b/lib/Target/CppBackend/CMakeLists.txt
@@ -2,4 +2,11 @@ add_llvm_target(CppBackend
CPPBackend.cpp
)
+add_llvm_library_dependencies(LLVMCppBackend
+ LLVMCore
+ LLVMCppBackendInfo
+ LLVMSupport
+ LLVMTarget
+ )
+
add_subdirectory(TargetInfo)
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 10d18f6..ae0e3c4 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -29,7 +29,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Config/config.h"
#include <algorithm>
@@ -77,22 +77,12 @@ extern "C" void LLVMInitializeCppBackendTarget() {
RegisterTargetMachine<CPPTargetMachine> X(TheCppBackendTarget);
}
-extern "C" void LLVMInitializeCppBackendMCAsmInfo() {}
-
-extern "C" void LLVMInitializeCppBackendMCInstrInfo() {
- RegisterMCInstrInfo<MCInstrInfo> X(TheCppBackendTarget);
-}
-
-extern "C" void LLVMInitializeCppBackendMCSubtargetInfo() {
- RegisterMCSubtargetInfo<MCSubtargetInfo> X(TheCppBackendTarget);
-}
-
namespace {
- typedef std::vector<const Type*> TypeList;
- typedef std::map<const Type*,std::string> TypeMap;
+ typedef std::vector<Type*> TypeList;
+ typedef std::map<Type*,std::string> TypeMap;
typedef std::map<const Value*,std::string> ValueMap;
typedef std::set<std::string> NameSet;
- typedef std::set<const Type*> TypeSet;
+ typedef std::set<Type*> TypeSet;
typedef std::set<const Value*> ValueSet;
typedef std::map<const Value*,std::string> ForwardRefMap;
@@ -143,14 +133,14 @@ namespace {
void printEscapedString(const std::string& str);
void printCFP(const ConstantFP* CFP);
- std::string getCppName(const Type* val);
- inline void printCppName(const Type* val);
+ std::string getCppName(Type* val);
+ inline void printCppName(Type* val);
std::string getCppName(const Value* val);
inline void printCppName(const Value* val);
void printAttributes(const AttrListPtr &PAL, const std::string &name);
- void printType(const Type* Ty);
+ void printType(Type* Ty);
void printTypes(const Module* M);
void printConstant(const Constant *CPV);
@@ -164,7 +154,7 @@ namespace {
void printFunctionHead(const Function *F);
void printFunctionBody(const Function *F);
void printInstruction(const Instruction *I, const std::string& bbname);
- std::string getOpName(Value*);
+ std::string getOpName(const Value*);
void printModuleBody();
};
@@ -184,7 +174,7 @@ static inline void sanitize(std::string &str) {
str[i] = '_';
}
-static std::string getTypePrefix(const Type *Ty) {
+static std::string getTypePrefix(Type *Ty) {
switch (Ty->getTypeID()) {
case Type::VoidTyID: return "void_";
case Type::IntegerTyID:
@@ -339,7 +329,7 @@ void CppWriter::printEscapedString(const std::string &Str) {
}
}
-std::string CppWriter::getCppName(const Type* Ty) {
+std::string CppWriter::getCppName(Type* Ty) {
// First, handle the primitive types .. easy
if (Ty->isPrimitiveType() || Ty->isIntegerTy()) {
switch (Ty->getTypeID()) {
@@ -379,7 +369,7 @@ std::string CppWriter::getCppName(const Type* Ty) {
// See if the type has a name in the symboltable and build accordingly
std::string name;
- if (const StructType *STy = dyn_cast<StructType>(Ty))
+ if (StructType *STy = dyn_cast<StructType>(Ty))
if (STy->hasName())
name = STy->getName();
@@ -393,7 +383,7 @@ std::string CppWriter::getCppName(const Type* Ty) {
return TypeNames[Ty] = name;
}
-void CppWriter::printCppName(const Type* Ty) {
+void CppWriter::printCppName(Type* Ty) {
printEscapedString(getCppName(Ty));
}
@@ -480,6 +470,9 @@ void CppWriter::printAttributes(const AttrListPtr &PAL,
HANDLE_ATTR(NoImplicitFloat);
HANDLE_ATTR(Naked);
HANDLE_ATTR(InlineHint);
+ HANDLE_ATTR(ReturnsTwice);
+ HANDLE_ATTR(UWTable);
+ HANDLE_ATTR(NonLazyBind);
#undef HANDLE_ATTR
if (attrs & Attribute::StackAlignment)
Out << " | Attribute::constructStackAlignmentFromInt("
@@ -499,7 +492,7 @@ void CppWriter::printAttributes(const AttrListPtr &PAL,
}
}
-void CppWriter::printType(const Type* Ty) {
+void CppWriter::printType(Type* Ty) {
// We don't print definitions for primitive types
if (Ty->isPrimitiveType() || Ty->isIntegerTy())
return;
@@ -514,13 +507,13 @@ void CppWriter::printType(const Type* Ty) {
// Print the type definition
switch (Ty->getTypeID()) {
case Type::FunctionTyID: {
- const FunctionType* FT = cast<FunctionType>(Ty);
+ FunctionType* FT = cast<FunctionType>(Ty);
Out << "std::vector<Type*>" << typeName << "_args;";
nl(Out);
FunctionType::param_iterator PI = FT->param_begin();
FunctionType::param_iterator PE = FT->param_end();
for (; PI != PE; ++PI) {
- const Type* argTy = static_cast<const Type*>(*PI);
+ Type* argTy = static_cast<Type*>(*PI);
printType(argTy);
std::string argName(getCppName(argTy));
Out << typeName << "_args.push_back(" << argName;
@@ -539,13 +532,21 @@ void CppWriter::printType(const Type* Ty) {
break;
}
case Type::StructTyID: {
- const StructType* ST = cast<StructType>(Ty);
- if (!ST->isAnonymous()) {
- Out << "StructType *" << typeName << " = ";
- Out << "StructType::createNamed(mod->getContext(), \"";
+ StructType* ST = cast<StructType>(Ty);
+ if (!ST->isLiteral()) {
+ Out << "StructType *" << typeName << " = mod->getTypeByName(\"";
+ printEscapedString(ST->getName());
+ Out << "\");";
+ nl(Out);
+ Out << "if (!" << typeName << ") {";
+ nl(Out);
+ Out << typeName << " = ";
+ Out << "StructType::create(mod->getContext(), \"";
printEscapedString(ST->getName());
Out << "\");";
nl(Out);
+ Out << "}";
+ nl(Out);
// Indicate that this type is now defined.
DefinedTypes.insert(Ty);
}
@@ -555,7 +556,7 @@ void CppWriter::printType(const Type* Ty) {
StructType::element_iterator EI = ST->element_begin();
StructType::element_iterator EE = ST->element_end();
for (; EI != EE; ++EI) {
- const Type* fieldTy = static_cast<const Type*>(*EI);
+ Type* fieldTy = static_cast<Type*>(*EI);
printType(fieldTy);
std::string fieldName(getCppName(fieldTy));
Out << typeName << "_fields.push_back(" << fieldName;
@@ -563,21 +564,27 @@ void CppWriter::printType(const Type* Ty) {
nl(Out);
}
- if (ST->isAnonymous()) {
+ if (ST->isLiteral()) {
Out << "StructType *" << typeName << " = ";
Out << "StructType::get(" << "mod->getContext(), ";
} else {
+ Out << "if (" << typeName << "->isOpaque()) {";
+ nl(Out);
Out << typeName << "->setBody(";
}
Out << typeName << "_fields, /*isPacked=*/"
<< (ST->isPacked() ? "true" : "false") << ");";
nl(Out);
+ if (!ST->isLiteral()) {
+ Out << "}";
+ nl(Out);
+ }
break;
}
case Type::ArrayTyID: {
- const ArrayType* AT = cast<ArrayType>(Ty);
- const Type* ET = AT->getElementType();
+ ArrayType* AT = cast<ArrayType>(Ty);
+ Type* ET = AT->getElementType();
printType(ET);
if (DefinedTypes.find(Ty) == DefinedTypes.end()) {
std::string elemName(getCppName(ET));
@@ -589,8 +596,8 @@ void CppWriter::printType(const Type* Ty) {
break;
}
case Type::PointerTyID: {
- const PointerType* PT = cast<PointerType>(Ty);
- const Type* ET = PT->getElementType();
+ PointerType* PT = cast<PointerType>(Ty);
+ Type* ET = PT->getElementType();
printType(ET);
if (DefinedTypes.find(Ty) == DefinedTypes.end()) {
std::string elemName(getCppName(ET));
@@ -602,8 +609,8 @@ void CppWriter::printType(const Type* Ty) {
break;
}
case Type::VectorTyID: {
- const VectorType* PT = cast<VectorType>(Ty);
- const Type* ET = PT->getElementType();
+ VectorType* PT = cast<VectorType>(Ty);
+ Type* ET = PT->getElementType();
printType(ET);
if (DefinedTypes.find(Ty) == DefinedTypes.end()) {
std::string elemName(getCppName(ET));
@@ -766,9 +773,7 @@ void CppWriter::printConstant(const Constant *CV) {
Out << "Constant* " << constName
<< " = ConstantExpr::getGetElementPtr("
<< getCppName(CE->getOperand(0)) << ", "
- << "&" << constName << "_indices[0], "
- << constName << "_indices.size()"
- << ");";
+ << constName << "_indices);";
} else if (CE->isCast()) {
printConstant(CE->getOperand(0));
Out << "Constant* " << constName << " = ConstantExpr::getCast(";
@@ -988,7 +993,7 @@ void CppWriter::printVariableBody(const GlobalVariable *GV) {
}
}
-std::string CppWriter::getOpName(Value* V) {
+std::string CppWriter::getOpName(const Value* V) {
if (!isa<Instruction>(V) || DefinedValues.find(V) != DefinedValues.end())
return getCppName(V);
@@ -1053,14 +1058,17 @@ void CppWriter::printInstruction(const Instruction *I,
case Instruction::Switch: {
const SwitchInst *SI = cast<SwitchInst>(I);
Out << "SwitchInst* " << iName << " = SwitchInst::Create("
- << opNames[0] << ", "
- << opNames[1] << ", "
+ << getOpName(SI->getCondition()) << ", "
+ << getOpName(SI->getDefaultDest()) << ", "
<< SI->getNumCases() << ", " << bbname << ");";
nl(Out);
- for (unsigned i = 2; i != SI->getNumOperands(); i += 2) {
+ unsigned NumCases = SI->getNumCases();
+ for (unsigned i = 1; i < NumCases; ++i) {
+ const ConstantInt* CaseVal = SI->getCaseValue(i);
+ const BasicBlock* BB = SI->getSuccessor(i);
Out << iName << "->addCase("
- << opNames[i] << ", "
- << opNames[i+1] << ");";
+ << getOpName(CaseVal) << ", "
+ << getOpName(BB) << ");";
nl(Out);
}
break;
@@ -1076,6 +1084,11 @@ void CppWriter::printInstruction(const Instruction *I,
}
break;
}
+ case Instruction::Resume: {
+ Out << "ResumeInst::Create(mod->getContext(), " << opNames[0]
+ << ", " << bbname << ");";
+ break;
+ }
case Instruction::Invoke: {
const InvokeInst* inv = cast<InvokeInst>(I);
Out << "std::vector<Value*> " << iName << "_params;";
@@ -1090,8 +1103,7 @@ void CppWriter::printInstruction(const Instruction *I,
<< getOpName(inv->getCalledFunction()) << ", "
<< getOpName(inv->getNormalDest()) << ", "
<< getOpName(inv->getUnwindDest()) << ", "
- << iName << "_params.begin(), "
- << iName << "_params.end(), \"";
+ << iName << "_params, \"";
printEscapedString(inv->getName());
Out << "\", " << bbname << ");";
nl(Out) << iName << "->setCallingConv(";
@@ -1252,8 +1264,7 @@ void CppWriter::printInstruction(const Instruction *I,
nl(Out);
}
Out << "Instruction* " << iName << " = GetElementPtrInst::Create("
- << opNames[0] << ", " << iName << "_indices.begin(), "
- << iName << "_indices.end()";
+ << opNames[0] << ", " << iName << "_indices";
}
Out << ", \"";
printEscapedString(gep->getName());
@@ -1304,7 +1315,7 @@ void CppWriter::printInstruction(const Instruction *I,
case Instruction::PtrToInt: Out << "PtrToIntInst"; break;
case Instruction::IntToPtr: Out << "IntToPtrInst"; break;
case Instruction::BitCast: Out << "BitCastInst"; break;
- default: assert(!"Unreachable"); break;
+ default: assert(0 && "Unreachable"); break;
}
Out << "(" << opNames[0] << ", "
<< getCppName(cst->getType()) << ", \"";
@@ -1331,8 +1342,7 @@ void CppWriter::printInstruction(const Instruction *I,
}
Out << "CallInst* " << iName << " = CallInst::Create("
<< opNames[call->getNumArgOperands()] << ", "
- << iName << "_params.begin(), "
- << iName << "_params.end(), \"";
+ << iName << "_params, \"";
} else if (call->getNumArgOperands() == 1) {
Out << "CallInst* " << iName << " = CallInst::Create("
<< opNames[call->getNumArgOperands()] << ", " << opNames[0] << ", \"";
@@ -1415,7 +1425,7 @@ void CppWriter::printInstruction(const Instruction *I,
Out << "ExtractValueInst* " << getCppName(evi)
<< " = ExtractValueInst::Create(" << opNames[0]
<< ", "
- << iName << "_indices.begin(), " << iName << "_indices.end(), \"";
+ << iName << "_indices, \"";
printEscapedString(evi->getName());
Out << "\", " << bbname << ");";
break;
@@ -1432,7 +1442,7 @@ void CppWriter::printInstruction(const Instruction *I,
Out << "InsertValueInst* " << getCppName(ivi)
<< " = InsertValueInst::Create(" << opNames[0]
<< ", " << opNames[1] << ", "
- << iName << "_indices.begin(), " << iName << "_indices.end(), \"";
+ << iName << "_indices, \"";
printEscapedString(ivi->getName());
Out << "\", " << bbname << ");";
break;
@@ -1542,13 +1552,12 @@ void CppWriter::printFunctionUses(const Function* F) {
void CppWriter::printFunctionHead(const Function* F) {
nl(Out) << "Function* " << getCppName(F);
- if (is_inline) {
- Out << " = mod->getFunction(\"";
- printEscapedString(F->getName());
- Out << "\", " << getCppName(F->getFunctionType()) << ");";
- nl(Out) << "if (!" << getCppName(F) << ") {";
- nl(Out) << getCppName(F);
- }
+ Out << " = mod->getFunction(\"";
+ printEscapedString(F->getName());
+ Out << "\");";
+ nl(Out) << "if (!" << getCppName(F) << ") {";
+ nl(Out) << getCppName(F);
+
Out<< " = Function::Create(";
nl(Out,1) << "/*Type=*/" << getCppName(F->getFunctionType()) << ",";
nl(Out) << "/*Linkage=*/";
@@ -1585,10 +1594,8 @@ void CppWriter::printFunctionHead(const Function* F) {
Out << "->setGC(\"" << F->getGC() << "\");";
nl(Out);
}
- if (is_inline) {
- Out << "}";
- nl(Out);
- }
+ Out << "}";
+ nl(Out);
printAttributes(F->getAttributes(), getCppName(F));
printCppName(F);
Out << "->setAttributes(" << getCppName(F) << "_PAL);";
@@ -1873,7 +1880,7 @@ void CppWriter::printVariable(const std::string& fname,
void CppWriter::printType(const std::string &fname,
const std::string &typeName) {
- const Type* Ty = TheModule->getTypeByName(typeName);
+ Type* Ty = TheModule->getTypeByName(typeName);
if (!Ty) {
error(std::string("Type '") + typeName + "' not found in input module");
return;
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index 7322e3e..287e537 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -22,8 +22,9 @@ namespace llvm {
class formatted_raw_ostream;
struct CPPTargetMachine : public TargetMachine {
- CPPTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS)
+ CPPTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
: TargetMachine(T, TT, CPU, FS) {}
virtual bool addPassesToEmitFile(PassManagerBase &PM,
diff --git a/lib/Target/CppBackend/TargetInfo/CMakeLists.txt b/lib/Target/CppBackend/TargetInfo/CMakeLists.txt
index edaf5d3..7165d8f 100644
--- a/lib/Target/CppBackend/TargetInfo/CMakeLists.txt
+++ b/lib/Target/CppBackend/TargetInfo/CMakeLists.txt
@@ -4,3 +4,7 @@ add_llvm_library(LLVMCppBackendInfo
CppBackendTargetInfo.cpp
)
+add_llvm_library_dependencies(LLVMCppBackendInfo
+ LLVMMC
+ LLVMTarget
+ )
diff --git a/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp b/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
index d0aeb12..a8ac0a2 100644
--- a/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
+++ b/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
@@ -9,7 +9,7 @@
#include "CPPTargetMachine.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
Target llvm::TheCppBackendTarget;
@@ -24,3 +24,5 @@ extern "C" void LLVMInitializeCppBackendTargetInfo() {
"C++ backend",
&CppBackend_TripleMatchQuality);
}
+
+extern "C" void LLVMInitializeCppBackendTargetMC() {}
diff --git a/lib/Target/MBlaze/AsmParser/CMakeLists.txt b/lib/Target/MBlaze/AsmParser/CMakeLists.txt
index 87e7cb5..ec8f52a 100644
--- a/lib/Target/MBlaze/AsmParser/CMakeLists.txt
+++ b/lib/Target/MBlaze/AsmParser/CMakeLists.txt
@@ -6,3 +6,11 @@ add_llvm_library(LLVMMBlazeAsmParser
MBlazeAsmParser.cpp
)
+add_llvm_library_dependencies(LLVMMBlazeAsmParser
+ LLVMMBlazeInfo
+ LLVMMC
+ LLVMMCParser
+ LLVMSupport
+ )
+
+add_dependencies(LLVMMBlazeAsmParser MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
index 1596596..2d357bb 100644
--- a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
+++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
@@ -7,8 +7,7 @@
//
//===----------------------------------------------------------------------===//
-#include "MBlaze.h"
-#include "MBlazeTargetMachine.h"
+#include "MCTargetDesc/MBlazeBaseInfo.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallVector.h"
@@ -17,10 +16,10 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCTargetAsmLexer.h"
-#include "llvm/Target/TargetAsmLexer.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#include <string>
#include <map>
@@ -29,7 +28,7 @@ using namespace llvm;
namespace {
- class MBlazeBaseAsmLexer : public TargetAsmLexer {
+ class MBlazeBaseAsmLexer : public MCTargetAsmLexer {
const MCAsmInfo &AsmInfo;
const AsmToken &lexDefinite() {
@@ -42,7 +41,7 @@ namespace {
rmap_ty RegisterMap;
- void InitRegisterMap(const TargetRegisterInfo *info) {
+ void InitRegisterMap(const MCRegisterInfo *info) {
unsigned numRegs = info->getNumRegs();
for (unsigned i = 0; i < numRegs; ++i) {
@@ -76,20 +75,16 @@ namespace {
}
public:
MBlazeBaseAsmLexer(const Target &T, const MCAsmInfo &MAI)
- : TargetAsmLexer(T), AsmInfo(MAI) {
+ : MCTargetAsmLexer(T), AsmInfo(MAI) {
}
};
class MBlazeAsmLexer : public MBlazeBaseAsmLexer {
public:
- MBlazeAsmLexer(const Target &T, const MCAsmInfo &MAI)
+ MBlazeAsmLexer(const Target &T, const MCRegisterInfo &MRI,
+ const MCAsmInfo &MAI)
: MBlazeBaseAsmLexer(T, MAI) {
- std::string tripleString("mblaze-unknown-unknown");
- std::string featureString;
- std::string CPU;
- OwningPtr<const TargetMachine>
- targetMachine(T.createTargetMachine(tripleString, CPU, featureString));
- InitRegisterMap(targetMachine->getRegisterInfo());
+ InitRegisterMap(&MRI);
}
};
}
@@ -123,6 +118,6 @@ AsmToken MBlazeBaseAsmLexer::LexTokenUAL() {
}
extern "C" void LLVMInitializeMBlazeAsmLexer() {
- RegisterAsmLexer<MBlazeAsmLexer> X(TheMBlazeTarget);
+ RegisterMCAsmLexer<MBlazeAsmLexer> X(TheMBlazeTarget);
}
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
index eebd9d8..97d311c 100644
--- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
+++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
@@ -7,19 +7,16 @@
//
//===----------------------------------------------------------------------===//
-#include "MBlaze.h"
-#include "MBlazeSubtarget.h"
-#include "MBlazeRegisterInfo.h"
-#include "MBlazeISelLowering.h"
+#include "MCTargetDesc/MBlazeBaseInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallVector.h"
@@ -30,7 +27,7 @@ using namespace llvm;
namespace {
struct MBlazeOperand;
-class MBlazeAsmParser : public TargetAsmParser {
+class MBlazeAsmParser : public MCTargetAsmParser {
MCAsmParser &Parser;
MCAsmParser &getParser() const { return Parser; }
@@ -64,7 +61,7 @@ class MBlazeAsmParser : public TargetAsmParser {
public:
MBlazeAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
- : TargetAsmParser(), Parser(_Parser) {}
+ : MCTargetAsmParser(), Parser(_Parser) {}
virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
@@ -286,19 +283,19 @@ void MBlazeOperand::print(raw_ostream &OS) const {
break;
case Register:
OS << "<register R";
- OS << MBlazeRegisterInfo::getRegisterNumbering(getReg()) << ">";
+ OS << getMBlazeRegisterNumbering(getReg()) << ">";
break;
case Token:
OS << "'" << getToken() << "'";
break;
case Memory: {
OS << "<memory R";
- OS << MBlazeRegisterInfo::getRegisterNumbering(getMemBase());
+ OS << getMBlazeRegisterNumbering(getMemBase());
OS << ", ";
unsigned RegOff = getMemOffReg();
if (RegOff)
- OS << "R" << MBlazeRegisterInfo::getRegisterNumbering(RegOff);
+ OS << "R" << getMBlazeRegisterNumbering(RegOff);
else
OS << getMemOff();
OS << ">";
@@ -326,6 +323,7 @@ MatchAndEmitInstruction(SMLoc IDLoc,
unsigned ErrorInfo;
switch (MatchInstructionImpl(Operands, Inst, ErrorInfo)) {
+ default: break;
case Match_Success:
Out.EmitInstruction(Inst);
return false;
@@ -521,7 +519,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
return false;
}
-/// ParseDirective parses the arm specific directives
+/// ParseDirective parses the MBlaze specific directives
bool MBlazeAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getIdentifier();
if (IDVal == ".word")
@@ -558,7 +556,7 @@ extern "C" void LLVMInitializeMBlazeAsmLexer();
/// Force static initialization.
extern "C" void LLVMInitializeMBlazeAsmParser() {
- RegisterAsmParser<MBlazeAsmParser> X(TheMBlazeTarget);
+ RegisterMCAsmParser<MBlazeAsmParser> X(TheMBlazeTarget);
LLVMInitializeMBlazeAsmLexer();
}
diff --git a/lib/Target/MBlaze/AsmParser/Makefile b/lib/Target/MBlaze/AsmParser/Makefile
index 611a0f4..1e68766 100644
--- a/lib/Target/MBlaze/AsmParser/Makefile
+++ b/lib/Target/MBlaze/AsmParser/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/ARM/AsmParser/Makefile -------------------*- Makefile -*-===##
+##===- lib/Target/MBlaze/AsmParser/Makefile ----------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt
index 0bc5b78..47b0db2 100644
--- a/lib/Target/MBlaze/CMakeLists.txt
+++ b/lib/Target/MBlaze/CMakeLists.txt
@@ -1,15 +1,16 @@
set(LLVM_TARGET_DEFINITIONS MBlaze.td)
-tablegen(MBlazeGenRegisterInfo.inc -gen-register-info)
-tablegen(MBlazeGenInstrInfo.inc -gen-instr-info)
-tablegen(MBlazeGenCodeEmitter.inc -gen-emitter)
-tablegen(MBlazeGenAsmWriter.inc -gen-asm-writer)
-tablegen(MBlazeGenAsmMatcher.inc -gen-asm-matcher)
-tablegen(MBlazeGenDAGISel.inc -gen-dag-isel)
-tablegen(MBlazeGenCallingConv.inc -gen-callingconv)
-tablegen(MBlazeGenSubtargetInfo.inc -gen-subtarget)
-tablegen(MBlazeGenIntrinsics.inc -gen-tgt-intrinsic)
-tablegen(MBlazeGenEDInfo.inc -gen-enhanced-disassembly-info)
+llvm_tablegen(MBlazeGenRegisterInfo.inc -gen-register-info)
+llvm_tablegen(MBlazeGenInstrInfo.inc -gen-instr-info)
+llvm_tablegen(MBlazeGenCodeEmitter.inc -gen-emitter)
+llvm_tablegen(MBlazeGenAsmWriter.inc -gen-asm-writer)
+llvm_tablegen(MBlazeGenAsmMatcher.inc -gen-asm-matcher)
+llvm_tablegen(MBlazeGenDAGISel.inc -gen-dag-isel)
+llvm_tablegen(MBlazeGenCallingConv.inc -gen-callingconv)
+llvm_tablegen(MBlazeGenSubtargetInfo.inc -gen-subtarget)
+llvm_tablegen(MBlazeGenIntrinsics.inc -gen-tgt-intrinsic)
+llvm_tablegen(MBlazeGenEDInfo.inc -gen-enhanced-disassembly-info)
+add_public_tablegen_target(MBlazeCommonTableGen)
add_llvm_target(MBlazeCodeGen
MBlazeDelaySlotFiller.cpp
@@ -24,10 +25,21 @@ add_llvm_target(MBlazeCodeGen
MBlazeIntrinsicInfo.cpp
MBlazeSelectionDAGInfo.cpp
MBlazeAsmPrinter.cpp
- MBlazeAsmBackend.cpp
MBlazeMCInstLower.cpp
MBlazeELFWriterInfo.cpp
- MBlazeMCCodeEmitter.cpp
+ )
+
+add_llvm_library_dependencies(LLVMMBlazeCodeGen
+ LLVMAsmPrinter
+ LLVMCodeGen
+ LLVMCore
+ LLVMMBlazeAsmPrinter
+ LLVMMBlazeDesc
+ LLVMMBlazeInfo
+ LLVMMC
+ LLVMSelectionDAG
+ LLVMSupport
+ LLVMTarget
)
add_subdirectory(AsmParser)
diff --git a/lib/Target/MBlaze/Disassembler/CMakeLists.txt b/lib/Target/MBlaze/Disassembler/CMakeLists.txt
index 9376e68..112c64c 100644
--- a/lib/Target/MBlaze/Disassembler/CMakeLists.txt
+++ b/lib/Target/MBlaze/Disassembler/CMakeLists.txt
@@ -13,4 +13,12 @@ set_property(
)
endif()
-add_dependencies(LLVMMBlazeDisassembler MBlazeCodeGenTable_gen)
+add_llvm_library_dependencies(LLVMMBlazeDisassembler
+ LLVMMBlazeCodeGen
+ LLVMMBlazeDesc
+ LLVMMBlazeInfo
+ LLVMMC
+ LLVMSupport
+ )
+
+add_dependencies(LLVMMBlazeDisassembler MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
index 88d80a1..fd761f1 100644
--- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
+++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
@@ -20,9 +20,9 @@
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
// #include "MBlazeGenDecoderTables.inc"
@@ -60,27 +60,27 @@ static unsigned mblazeBinary2Opcode[] = {
};
static unsigned getRD(uint32_t insn) {
- if (!MBlazeRegisterInfo::isRegister((insn>>21)&0x1F))
+ if (!isMBlazeRegister((insn>>21)&0x1F))
return UNSUPPORTED;
- return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>21)&0x1F);
+ return getMBlazeRegisterFromNumbering((insn>>21)&0x1F);
}
static unsigned getRA(uint32_t insn) {
- if (!MBlazeRegisterInfo::getRegisterFromNumbering((insn>>16)&0x1F))
+ if (!getMBlazeRegisterFromNumbering((insn>>16)&0x1F))
return UNSUPPORTED;
- return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>16)&0x1F);
+ return getMBlazeRegisterFromNumbering((insn>>16)&0x1F);
}
static unsigned getRB(uint32_t insn) {
- if (!MBlazeRegisterInfo::getRegisterFromNumbering((insn>>11)&0x1F))
+ if (!getMBlazeRegisterFromNumbering((insn>>11)&0x1F))
return UNSUPPORTED;
- return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>11)&0x1F);
+ return getMBlazeRegisterFromNumbering((insn>>11)&0x1F);
}
static int64_t getRS(uint32_t insn) {
- if (!MBlazeRegisterInfo::isSpecialRegister(insn&0x3FFF))
+ if (!isSpecialMBlazeRegister(insn&0x3FFF))
return UNSUPPORTED;
- return MBlazeRegisterInfo::getSpecialRegisterFromNumbering(insn&0x3FFF);
+ return getSpecialMBlazeRegisterFromNumbering(insn&0x3FFF);
}
static int64_t getIMM(uint32_t insn) {
@@ -493,11 +493,12 @@ EDInstInfo *MBlazeDisassembler::getEDInfo() const {
// Public interface for the disassembler
//
-bool MBlazeDisassembler::getInstruction(MCInst &instr,
+MCDisassembler::DecodeStatus MBlazeDisassembler::getInstruction(MCInst &instr,
uint64_t &size,
const MemoryObject &region,
uint64_t address,
- raw_ostream &vStream) const {
+ raw_ostream &vStream,
+ raw_ostream &cStream) const {
// The machine instruction.
uint32_t insn;
uint64_t read;
@@ -508,7 +509,7 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
// We want to read exactly 4 bytes of data.
if (region.readBytes(address, 4, (uint8_t*)bytes, &read) == -1 || read < 4)
- return false;
+ return Fail;
// Encoded as a big-endian 32-bit word in the stream.
insn = (bytes[0]<<24) | (bytes[1]<<16) | (bytes[2]<< 8) | (bytes[3]<<0);
@@ -517,7 +518,7 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
// that it is a valid instruction.
unsigned opcode = getOPCODE(insn);
if (opcode == UNSUPPORTED)
- return false;
+ return Fail;
instr.setOpcode(opcode);
@@ -529,11 +530,11 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
uint64_t tsFlags = MBlazeInsts[opcode].TSFlags;
switch ((tsFlags & MBlazeII::FormMask)) {
default:
- return false;
+ return Fail;
case MBlazeII::FRRRR:
if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateReg(RB));
instr.addOperand(MCOperand::CreateReg(RA));
@@ -541,7 +542,7 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
case MBlazeII::FRRR:
if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateReg(RA));
instr.addOperand(MCOperand::CreateReg(RB));
@@ -550,23 +551,23 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
case MBlazeII::FRI:
switch (opcode) {
default:
- return false;
+ return Fail;
case MBlaze::MFS:
if (RD == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateImm(insn&0x3FFF));
break;
case MBlaze::MTS:
if (RA == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateImm(insn&0x3FFF));
instr.addOperand(MCOperand::CreateReg(RA));
break;
case MBlaze::MSRSET:
case MBlaze::MSRCLR:
if (RD == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateImm(insn&0x7FFF));
break;
@@ -575,7 +576,7 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
case MBlazeII::FRRI:
if (RD == UNSUPPORTED || RA == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateReg(RA));
switch (opcode) {
@@ -592,35 +593,35 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
case MBlazeII::FCRR:
if (RA == UNSUPPORTED || RB == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RA));
instr.addOperand(MCOperand::CreateReg(RB));
break;
case MBlazeII::FCRI:
if (RA == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RA));
instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
break;
case MBlazeII::FRCR:
if (RD == UNSUPPORTED || RB == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateReg(RB));
break;
case MBlazeII::FRCI:
if (RD == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
break;
case MBlazeII::FCCR:
if (RB == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RB));
break;
@@ -630,7 +631,7 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
case MBlazeII::FRRCI:
if (RD == UNSUPPORTED || RA == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateReg(RA));
instr.addOperand(MCOperand::CreateImm(getSHT(insn)));
@@ -638,35 +639,35 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
case MBlazeII::FRRC:
if (RD == UNSUPPORTED || RA == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateReg(RA));
break;
case MBlazeII::FRCX:
if (RD == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
break;
case MBlazeII::FRCS:
if (RD == UNSUPPORTED || RS == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateReg(RS));
break;
case MBlazeII::FCRCS:
if (RS == UNSUPPORTED || RA == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RS));
instr.addOperand(MCOperand::CreateReg(RA));
break;
case MBlazeII::FCRCX:
if (RA == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RA));
instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
break;
@@ -677,13 +678,13 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
case MBlazeII::FCR:
if (RB == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RB));
break;
case MBlazeII::FRIR:
if (RD == UNSUPPORTED || RA == UNSUPPORTED)
- return false;
+ return Fail;
instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
instr.addOperand(MCOperand::CreateReg(RA));
@@ -693,11 +694,12 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
// We always consume 4 bytes of data on success
size = 4;
- return true;
+ return Success;
}
-static MCDisassembler *createMBlazeDisassembler(const Target &T) {
- return new MBlazeDisassembler;
+static MCDisassembler *createMBlazeDisassembler(const Target &T,
+ const MCSubtargetInfo &STI) {
+ return new MBlazeDisassembler(STI);
}
extern "C" void LLVMInitializeMBlazeDisassembler() {
diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
index d05eced..0ac0d89 100644
--- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
+++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
@@ -32,19 +32,20 @@ class MBlazeDisassembler : public MCDisassembler {
public:
/// Constructor - Initializes the disassembler.
///
- MBlazeDisassembler() :
- MCDisassembler() {
+ MBlazeDisassembler(const MCSubtargetInfo &STI) :
+ MCDisassembler(STI) {
}
~MBlazeDisassembler() {
}
/// getInstruction - See MCDisassembler.
- bool getInstruction(MCInst &instr,
+ MCDisassembler::DecodeStatus getInstruction(MCInst &instr,
uint64_t &size,
const MemoryObject &region,
uint64_t address,
- raw_ostream &vStream) const;
+ raw_ostream &vStream,
+ raw_ostream &cStream) const;
/// getEDInfo - See MCDisassembler.
EDInstInfo *getEDInfo() const;
diff --git a/lib/Target/MBlaze/InstPrinter/CMakeLists.txt b/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
index 242a573..aff0b3d 100644
--- a/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
+++ b/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
@@ -2,7 +2,12 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/..
${CMAKE_CURRENT_SOURCE_DIR}/.. )
add_llvm_library(LLVMMBlazeAsmPrinter
- MBlazeInstPrinter.cpp
+ MBlazeInstPrinter.cpp
)
-add_dependencies(LLVMMBlazeAsmPrinter MBlazeCodeGenTable_gen)
+add_llvm_library_dependencies(LLVMMBlazeAsmPrinter
+ LLVMMC
+ LLVMSupport
+ )
+
+add_dependencies(LLVMMBlazeAsmPrinter MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
index a7fd287..a1f1dbc 100644
--- a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
+++ b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
@@ -25,8 +25,10 @@ using namespace llvm;
// Include the auto-generated portion of the assembly writer.
#include "MBlazeGenAsmWriter.inc"
-void MBlazeInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+void MBlazeInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
printInstruction(MI, O);
+ printAnnotation(O, Annot);
}
void MBlazeInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
diff --git a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
index eacca41..570ab08 100644
--- a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
+++ b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
@@ -24,7 +24,7 @@ namespace llvm {
MBlazeInstPrinter(const MCAsmInfo &MAI)
: MCInstPrinter(MAI) {}
- virtual void printInst(const MCInst *MI, raw_ostream &O);
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
diff --git a/lib/Target/MBlaze/MBlaze.h b/lib/Target/MBlaze/MBlaze.h
index 3390794..1399b85 100644
--- a/lib/Target/MBlaze/MBlaze.h
+++ b/lib/Target/MBlaze/MBlaze.h
@@ -15,6 +15,7 @@
#ifndef TARGET_MBLAZE_H
#define TARGET_MBLAZE_H
+#include "MCTargetDesc/MBlazeBaseInfo.h"
#include "MCTargetDesc/MBlazeMCTargetDesc.h"
#include "llvm/Target/TargetMachine.h"
@@ -22,17 +23,6 @@ namespace llvm {
class MBlazeTargetMachine;
class FunctionPass;
class MachineCodeEmitter;
- class MCCodeEmitter;
- class MCInstrInfo;
- class MCSubtargetInfo;
- class TargetAsmBackend;
- class formatted_raw_ostream;
-
- MCCodeEmitter *createMBlazeMCCodeEmitter(const MCInstrInfo &MCII,
- const MCSubtargetInfo &STI,
- MCContext &Ctx);
-
- TargetAsmBackend *createMBlazeAsmBackend(const Target &, const std::string &);
FunctionPass *createMBlazeISelDag(MBlazeTargetMachine &TM);
FunctionPass *createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &TM);
diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
index 0016df5..97bd083 100644
--- a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
+++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
@@ -38,10 +38,10 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <cctype>
@@ -136,19 +136,17 @@ void MBlazeAsmPrinter::printSavedRegsBitmask() {
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- unsigned RegNum = MBlazeRegisterInfo::getRegisterNumbering(Reg);
+ unsigned RegNum = getMBlazeRegisterNumbering(Reg);
if (MBlaze::GPRRegisterClass->contains(Reg))
CPUBitmask |= (1 << RegNum);
}
// Return Address and Frame registers must also be set in CPUBitmask.
if (TFI->hasFP(*MF))
- CPUBitmask |= (1 << MBlazeRegisterInfo::
- getRegisterNumbering(RI.getFrameRegister(*MF)));
+ CPUBitmask |= (1 << getMBlazeRegisterNumbering(RI.getFrameRegister(*MF)));
if (MFI->adjustsStack())
- CPUBitmask |= (1 << MBlazeRegisterInfo::
- getRegisterNumbering(RI.getRARegister()));
+ CPUBitmask |= (1 << getMBlazeRegisterNumbering(RI.getRARegister()));
// Print CPUBitmask
OutStreamer.EmitRawText("\t.mask\t0x" + Twine::utohexstr(CPUBitmask));
@@ -318,18 +316,7 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
return I == Pred->end() || !I->getDesc().isBarrier();
}
-static MCInstPrinter *createMBlazeMCInstPrinter(const Target &T,
- unsigned SyntaxVariant,
- const MCAsmInfo &MAI) {
- if (SyntaxVariant == 0)
- return new MBlazeInstPrinter(MAI);
- return 0;
-}
-
// Force static initialization.
extern "C" void LLVMInitializeMBlazeAsmPrinter() {
RegisterAsmPrinter<MBlazeAsmPrinter> X(TheMBlazeTarget);
- TargetRegistry::RegisterMCInstPrinter(TheMBlazeTarget,
- createMBlazeMCInstPrinter);
-
}
diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/lib/Target/MBlaze/MBlazeFrameLowering.cpp
index e763902..f28d5a7 100644
--- a/lib/Target/MBlaze/MBlazeFrameLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeFrameLowering.cpp
@@ -1,4 +1,4 @@
-//=======- MBlazeFrameLowering.cpp - MBlaze Frame Information ------*- C++ -*-====//
+//===- MBlazeFrameLowering.cpp - MBlaze Frame Information ------*- C++ -*-====//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
index 62dfdcc..8ec548f 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -59,6 +59,7 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
// MBlaze does not have i1 type, so use i32 for
// setcc operations results (slt, sgt, ...).
setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
// Set up the register classes
addRegisterClass(MVT::i32, MBlaze::GPRRegisterClass);
@@ -187,7 +188,7 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
computeRegisterProperties();
}
-MVT::SimpleValueType MBlazeTargetLowering::getSetCCResultType(EVT VT) const {
+EVT MBlazeTargetLowering::getSetCCResultType(EVT VT) const {
return MVT::i32;
}
@@ -964,13 +965,13 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
// The last register argument that must be saved is MBlaze::R10
TargetRegisterClass *RC = MBlaze::GPRRegisterClass;
- unsigned Begin = MBlazeRegisterInfo::getRegisterNumbering(MBlaze::R5);
- unsigned Start = MBlazeRegisterInfo::getRegisterNumbering(ArgRegEnd+1);
- unsigned End = MBlazeRegisterInfo::getRegisterNumbering(MBlaze::R10);
+ unsigned Begin = getMBlazeRegisterNumbering(MBlaze::R5);
+ unsigned Start = getMBlazeRegisterNumbering(ArgRegEnd+1);
+ unsigned End = getMBlazeRegisterNumbering(MBlaze::R10);
unsigned StackLoc = Start - Begin + 1;
for (; Start <= End; ++Start, ++StackLoc) {
- unsigned Reg = MBlazeRegisterInfo::getRegisterFromNumbering(Start);
+ unsigned Reg = getMBlazeRegisterFromNumbering(Start);
unsigned LiveReg = MF.addLiveIn(Reg, RC);
SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, LiveReg, MVT::i32);
@@ -1096,7 +1097,7 @@ MBlazeTargetLowering::getSingleConstraintMatchWeight(
// but allow it at the lowest weight.
if (CallOperandVal == NULL)
return CW_Default;
- const Type *type = CallOperandVal->getType();
+ Type *type = CallOperandVal->getType();
// Look at the constraint type.
switch (*constraint) {
default:
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h
index bb128da..8b49bc3 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.h
+++ b/lib/Target/MBlaze/MBlazeISelLowering.h
@@ -102,7 +102,7 @@ namespace llvm {
virtual const char *getTargetNodeName(unsigned Opcode) const;
/// getSetCCResultType - get the ISD::SETCC result ValueType
- MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+ EVT getSetCCResultType(EVT VT) const;
private:
// Subtarget Info
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
index 188f10a..7ae05b3 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -17,9 +17,9 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/ADT/STLExtras.h"
#define GET_INSTRINFO_CTOR
@@ -239,7 +239,8 @@ unsigned MBlazeInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
return 2;
}
-bool MBlazeInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+bool MBlazeInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand>
+ &Cond) const {
assert(Cond.size() == 2 && "Invalid MBlaze branch opcode!");
switch (Cond[0].getImm()) {
default: return true;
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h
index 79f962b..7174405 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.h
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.h
@@ -166,62 +166,6 @@ namespace MBlaze {
}
}
-/// MBlazeII - This namespace holds all of the target specific flags that
-/// instruction info tracks.
-///
-namespace MBlazeII {
- enum {
- // PseudoFrm - This represents an instruction that is a pseudo instruction
- // or one that has not been implemented yet. It is illegal to code generate
- // it, but tolerated for intermediate implementation stages.
- FPseudo = 0,
- FRRR,
- FRRI,
- FCRR,
- FCRI,
- FRCR,
- FRCI,
- FCCR,
- FCCI,
- FRRCI,
- FRRC,
- FRCX,
- FRCS,
- FCRCS,
- FCRCX,
- FCX,
- FCR,
- FRIR,
- FRRRR,
- FRI,
- FC,
- FormMask = 63
-
- //===------------------------------------------------------------------===//
- // MBlaze Specific MachineOperand flags.
- // MO_NO_FLAG,
-
- /// MO_GOT - Represents the offset into the global offset table at which
- /// the address the relocation entry symbol resides during execution.
- // MO_GOT,
-
- /// MO_GOT_CALL - Represents the offset into the global offset table at
- /// which the address of a call site relocation entry symbol resides
- /// during execution. This is different from the above since this flag
- /// can only be present in call instructions.
- // MO_GOT_CALL,
-
- /// MO_GPREL - Represents the offset from the current gp value to be used
- /// for the relocatable object file being produced.
- // MO_GPREL,
-
- /// MO_ABS_HILO - Represents the hi or low part of an absolute symbol
- /// address.
- // MO_ABS_HILO
-
- };
-}
-
class MBlazeInstrInfo : public MBlazeGenInstrInfo {
MBlazeTargetMachine &TM;
const MBlazeRegisterInfo RI;
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td
index 950f2d7..1d8c987 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.td
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.td
@@ -442,17 +442,19 @@ let Predicates=[HasMul] in {
//===----------------------------------------------------------------------===//
let canFoldAsLoad = 1, isReMaterializable = 1 in {
- def LBU : LoadM<0x30, 0x000, "lbu ">;
- def LBUR : LoadM<0x30, 0x200, "lbur ">;
+ let neverHasSideEffects = 1 in {
+ def LBU : LoadM<0x30, 0x000, "lbu ">;
+ def LBUR : LoadM<0x30, 0x200, "lbur ">;
- def LHU : LoadM<0x31, 0x000, "lhu ">;
- def LHUR : LoadM<0x31, 0x200, "lhur ">;
+ def LHU : LoadM<0x31, 0x000, "lhu ">;
+ def LHUR : LoadM<0x31, 0x200, "lhur ">;
- def LW : LoadM<0x32, 0x000, "lw ">;
- def LWR : LoadM<0x32, 0x200, "lwr ">;
+ def LW : LoadM<0x32, 0x000, "lw ">;
+ def LWR : LoadM<0x32, 0x200, "lwr ">;
- let Defs = [CARRY] in {
- def LWX : LoadM<0x32, 0x400, "lwx ">;
+ let Defs = [CARRY] in {
+ def LWX : LoadM<0x32, 0x400, "lwx ">;
+ }
}
def LBUI : LoadMI<0x38, "lbui ", zextloadi8>;
@@ -877,6 +879,9 @@ def : Pat<(zextloadi8 xaddr:$addr), (i32 (LBU xaddr:$addr))>;
// Peepholes
def : Pat<(store (i32 0), iaddr:$dst), (SWI (i32 R0), iaddr:$dst)>;
+// Atomic fence
+def : Pat<(atomic_fence (imm), (imm)), (MEMBARRIER)>;
+
//===----------------------------------------------------------------------===//
// Floating Point Support
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
index 32d67b2..ea81dd6 100644
--- a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
@@ -37,7 +37,7 @@ namespace mblazeIntrinsic {
#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
}
-std::string MBlazeIntrinsicInfo::getName(unsigned IntrID, const Type **Tys,
+std::string MBlazeIntrinsicInfo::getName(unsigned IntrID, Type **Tys,
unsigned numTys) const {
static const char *const names[] = {
#define GET_INTRINSIC_NAME_TABLE
@@ -90,8 +90,8 @@ bool MBlazeIntrinsicInfo::isOverloaded(unsigned IntrID) const {
#include "MBlazeGenIntrinsics.inc"
#undef GET_INTRINSIC_ATTRIBUTES
-static const FunctionType *getType(LLVMContext &Context, unsigned id) {
- const Type *ResultTy = NULL;
+static FunctionType *getType(LLVMContext &Context, unsigned id) {
+ Type *ResultTy = NULL;
std::vector<Type*> ArgTys;
bool IsVarArg = false;
@@ -103,7 +103,7 @@ static const FunctionType *getType(LLVMContext &Context, unsigned id) {
}
Function *MBlazeIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
- const Type **Tys,
+ Type **Tys,
unsigned numTy) const {
assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded");
AttrListPtr AList = getAttributes((mblazeIntrinsic::ID) IntrID);
diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.h b/lib/Target/MBlaze/MBlazeIntrinsicInfo.h
index 9804c77..80760d8 100644
--- a/lib/Target/MBlaze/MBlazeIntrinsicInfo.h
+++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.h
@@ -19,12 +19,12 @@ namespace llvm {
class MBlazeIntrinsicInfo : public TargetIntrinsicInfo {
public:
- std::string getName(unsigned IntrID, const Type **Tys = 0,
+ std::string getName(unsigned IntrID, Type **Tys = 0,
unsigned numTys = 0) const;
unsigned lookupName(const char *Name, unsigned Len) const;
unsigned lookupGCCName(const char *Name) const;
bool isOverloaded(unsigned IID) const;
- Function *getDeclaration(Module *M, unsigned ID, const Type **Tys = 0,
+ Function *getDeclaration(Module *M, unsigned ID, Type **Tys = 0,
unsigned numTys = 0) const;
};
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index f0b201a..9788ba9 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -25,7 +25,6 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -44,164 +43,7 @@ using namespace llvm;
MBlazeRegisterInfo::
MBlazeRegisterInfo(const MBlazeSubtarget &ST, const TargetInstrInfo &tii)
- : MBlazeGenRegisterInfo(), Subtarget(ST), TII(tii) {}
-
-/// getRegisterNumbering - Given the enum value for some register, e.g.
-/// MBlaze::R0, return the number that it corresponds to (e.g. 0).
-unsigned MBlazeRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
- switch (RegEnum) {
- case MBlaze::R0 : return 0;
- case MBlaze::R1 : return 1;
- case MBlaze::R2 : return 2;
- case MBlaze::R3 : return 3;
- case MBlaze::R4 : return 4;
- case MBlaze::R5 : return 5;
- case MBlaze::R6 : return 6;
- case MBlaze::R7 : return 7;
- case MBlaze::R8 : return 8;
- case MBlaze::R9 : return 9;
- case MBlaze::R10 : return 10;
- case MBlaze::R11 : return 11;
- case MBlaze::R12 : return 12;
- case MBlaze::R13 : return 13;
- case MBlaze::R14 : return 14;
- case MBlaze::R15 : return 15;
- case MBlaze::R16 : return 16;
- case MBlaze::R17 : return 17;
- case MBlaze::R18 : return 18;
- case MBlaze::R19 : return 19;
- case MBlaze::R20 : return 20;
- case MBlaze::R21 : return 21;
- case MBlaze::R22 : return 22;
- case MBlaze::R23 : return 23;
- case MBlaze::R24 : return 24;
- case MBlaze::R25 : return 25;
- case MBlaze::R26 : return 26;
- case MBlaze::R27 : return 27;
- case MBlaze::R28 : return 28;
- case MBlaze::R29 : return 29;
- case MBlaze::R30 : return 30;
- case MBlaze::R31 : return 31;
- case MBlaze::RPC : return 0x0000;
- case MBlaze::RMSR : return 0x0001;
- case MBlaze::REAR : return 0x0003;
- case MBlaze::RESR : return 0x0005;
- case MBlaze::RFSR : return 0x0007;
- case MBlaze::RBTR : return 0x000B;
- case MBlaze::REDR : return 0x000D;
- case MBlaze::RPID : return 0x1000;
- case MBlaze::RZPR : return 0x1001;
- case MBlaze::RTLBX : return 0x1002;
- case MBlaze::RTLBLO : return 0x1003;
- case MBlaze::RTLBHI : return 0x1004;
- case MBlaze::RPVR0 : return 0x2000;
- case MBlaze::RPVR1 : return 0x2001;
- case MBlaze::RPVR2 : return 0x2002;
- case MBlaze::RPVR3 : return 0x2003;
- case MBlaze::RPVR4 : return 0x2004;
- case MBlaze::RPVR5 : return 0x2005;
- case MBlaze::RPVR6 : return 0x2006;
- case MBlaze::RPVR7 : return 0x2007;
- case MBlaze::RPVR8 : return 0x2008;
- case MBlaze::RPVR9 : return 0x2009;
- case MBlaze::RPVR10 : return 0x200A;
- case MBlaze::RPVR11 : return 0x200B;
- default: llvm_unreachable("Unknown register number!");
- }
- return 0; // Not reached
-}
-
-/// getRegisterFromNumbering - Given the enum value for some register, e.g.
-/// MBlaze::R0, return the number that it corresponds to (e.g. 0).
-unsigned MBlazeRegisterInfo::getRegisterFromNumbering(unsigned Reg) {
- switch (Reg) {
- case 0 : return MBlaze::R0;
- case 1 : return MBlaze::R1;
- case 2 : return MBlaze::R2;
- case 3 : return MBlaze::R3;
- case 4 : return MBlaze::R4;
- case 5 : return MBlaze::R5;
- case 6 : return MBlaze::R6;
- case 7 : return MBlaze::R7;
- case 8 : return MBlaze::R8;
- case 9 : return MBlaze::R9;
- case 10 : return MBlaze::R10;
- case 11 : return MBlaze::R11;
- case 12 : return MBlaze::R12;
- case 13 : return MBlaze::R13;
- case 14 : return MBlaze::R14;
- case 15 : return MBlaze::R15;
- case 16 : return MBlaze::R16;
- case 17 : return MBlaze::R17;
- case 18 : return MBlaze::R18;
- case 19 : return MBlaze::R19;
- case 20 : return MBlaze::R20;
- case 21 : return MBlaze::R21;
- case 22 : return MBlaze::R22;
- case 23 : return MBlaze::R23;
- case 24 : return MBlaze::R24;
- case 25 : return MBlaze::R25;
- case 26 : return MBlaze::R26;
- case 27 : return MBlaze::R27;
- case 28 : return MBlaze::R28;
- case 29 : return MBlaze::R29;
- case 30 : return MBlaze::R30;
- case 31 : return MBlaze::R31;
- default: llvm_unreachable("Unknown register number!");
- }
- return 0; // Not reached
-}
-
-unsigned MBlazeRegisterInfo::getSpecialRegisterFromNumbering(unsigned Reg) {
- switch (Reg) {
- case 0x0000 : return MBlaze::RPC;
- case 0x0001 : return MBlaze::RMSR;
- case 0x0003 : return MBlaze::REAR;
- case 0x0005 : return MBlaze::RESR;
- case 0x0007 : return MBlaze::RFSR;
- case 0x000B : return MBlaze::RBTR;
- case 0x000D : return MBlaze::REDR;
- case 0x1000 : return MBlaze::RPID;
- case 0x1001 : return MBlaze::RZPR;
- case 0x1002 : return MBlaze::RTLBX;
- case 0x1003 : return MBlaze::RTLBLO;
- case 0x1004 : return MBlaze::RTLBHI;
- case 0x2000 : return MBlaze::RPVR0;
- case 0x2001 : return MBlaze::RPVR1;
- case 0x2002 : return MBlaze::RPVR2;
- case 0x2003 : return MBlaze::RPVR3;
- case 0x2004 : return MBlaze::RPVR4;
- case 0x2005 : return MBlaze::RPVR5;
- case 0x2006 : return MBlaze::RPVR6;
- case 0x2007 : return MBlaze::RPVR7;
- case 0x2008 : return MBlaze::RPVR8;
- case 0x2009 : return MBlaze::RPVR9;
- case 0x200A : return MBlaze::RPVR10;
- case 0x200B : return MBlaze::RPVR11;
- default: llvm_unreachable("Unknown register number!");
- }
- return 0; // Not reached
-}
-
-bool MBlazeRegisterInfo::isRegister(unsigned Reg) {
- return Reg <= 31;
-}
-
-bool MBlazeRegisterInfo::isSpecialRegister(unsigned Reg) {
- switch (Reg) {
- case 0x0000 : case 0x0001 : case 0x0003 : case 0x0005 :
- case 0x0007 : case 0x000B : case 0x000D : case 0x1000 :
- case 0x1001 : case 0x1002 : case 0x1003 : case 0x1004 :
- case 0x2000 : case 0x2001 : case 0x2002 : case 0x2003 :
- case 0x2004 : case 0x2005 : case 0x2006 : case 0x2007 :
- case 0x2008 : case 0x2009 : case 0x200A : case 0x200B :
- return true;
-
- default:
- return false;
- }
- return false; // Not reached
-}
+ : MBlazeGenRegisterInfo(MBlaze::R15), Subtarget(ST), TII(tii) {}
unsigned MBlazeRegisterInfo::getPICCallReg() {
return MBlaze::R20;
@@ -334,10 +176,6 @@ processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
MFI->setObjectOffset(MBlazeFI->getGPFI(), MBlazeFI->getGPStackOffset());
}
-unsigned MBlazeRegisterInfo::getRARegister() const {
- return MBlaze::R15;
-}
-
unsigned MBlazeRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
@@ -353,11 +191,3 @@ unsigned MBlazeRegisterInfo::getEHHandlerRegister() const {
llvm_unreachable("What is the exception handler register");
return 0;
}
-
-int MBlazeRegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const {
- return MBlazeGenRegisterInfo::getDwarfRegNumFull(RegNo,0);
-}
-
-int MBlazeRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
- return MBlazeGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0);
-}
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h
index 7ebce21..7e4b269 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.h
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -42,14 +42,6 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
MBlazeRegisterInfo(const MBlazeSubtarget &Subtarget,
const TargetInstrInfo &tii);
- /// getRegisterNumbering - Given the enum value for some register, e.g.
- /// MBlaze::RA, return the number that it corresponds to (e.g. 31).
- static unsigned getRegisterNumbering(unsigned RegEnum);
- static unsigned getRegisterFromNumbering(unsigned RegEnum);
- static unsigned getSpecialRegisterFromNumbering(unsigned RegEnum);
- static bool isRegister(unsigned RegEnum);
- static bool isSpecialRegister(unsigned RegEnum);
-
/// Get PIC indirect call register
static unsigned getPICCallReg();
@@ -69,15 +61,11 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
/// Debug information queries.
- unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
/// Exception handling queries.
unsigned getEHExceptionRegister() const;
unsigned getEHHandlerRegister() const;
-
- int getDwarfRegNum(unsigned RegNum, bool isEH) const;
- int getLLVMRegNum(unsigned RegNum, bool isEH) const;
};
} // end namespace llvm
diff --git a/lib/Target/MBlaze/MBlazeSubtarget.cpp b/lib/Target/MBlaze/MBlazeSubtarget.cpp
index eda141d..7e5667f 100644
--- a/lib/Target/MBlaze/MBlazeSubtarget.cpp
+++ b/lib/Target/MBlaze/MBlazeSubtarget.cpp
@@ -15,7 +15,7 @@
#include "MBlaze.h"
#include "MBlazeRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index 7208874..7bff53e 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -16,48 +16,13 @@
#include "llvm/PassManager.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegistry.h"
using namespace llvm;
-static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
- MCContext &Ctx, TargetAsmBackend &TAB,
- raw_ostream &_OS,
- MCCodeEmitter *_Emitter,
- bool RelaxAll,
- bool NoExecStack) {
- Triple TheTriple(TT);
-
- if (TheTriple.isOSDarwin()) {
- llvm_unreachable("MBlaze does not support Darwin MACH-O format");
- return NULL;
- }
-
- if (TheTriple.isOSWindows()) {
- llvm_unreachable("MBlaze does not support Windows COFF format");
- return NULL;
- }
-
- return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack);
-}
-
-
extern "C" void LLVMInitializeMBlazeTarget() {
// Register the target.
RegisterTargetMachine<MBlazeTargetMachine> X(TheMBlazeTarget);
-
- // Register the MC code emitter
- TargetRegistry::RegisterCodeEmitter(TheMBlazeTarget,
- llvm::createMBlazeMCCodeEmitter);
-
- // Register the asm backend
- TargetRegistry::RegisterAsmBackend(TheMBlazeTarget,
- createMBlazeAsmBackend);
-
- // Register the object streamer
- TargetRegistry::RegisterObjectStreamer(TheMBlazeTarget,
- createMCStreamer);
-
}
// DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
@@ -67,21 +32,16 @@ extern "C" void LLVMInitializeMBlazeTarget() {
// offset from the stack/frame pointer, using StackGrowsUp enables
// an easier handling.
MBlazeTargetMachine::
-MBlazeTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS):
- LLVMTargetMachine(T, TT, CPU, FS),
+MBlazeTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM):
+ LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
Subtarget(TT, CPU, FS),
DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"),
InstrInfo(*this),
FrameLowering(Subtarget),
TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this),
InstrItins(Subtarget.getInstrItineraryData()) {
- if (getRelocationModel() == Reloc::Default) {
- setRelocationModel(Reloc::Static);
- }
-
- if (getCodeModel() == CodeModel::Default)
- setCodeModel(CodeModel::Small);
}
// Install an instruction selector pass using
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h
index cd6caaf..c1bc08a 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.h
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -41,8 +41,9 @@ namespace llvm {
InstrItineraryData InstrItins;
public:
- MBlazeTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ MBlazeTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
virtual const MBlazeInstrInfo *getInstrInfo() const
{ return &InstrInfo; }
diff --git a/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
index abd1b0b..f66ea30 100644
--- a/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
@@ -69,7 +69,7 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
if (Kind.isMergeable1ByteCString())
return false;
- const Type *Ty = GV->getType()->getElementType();
+ Type *Ty = GV->getType()->getElementType();
return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty));
}
diff --git a/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt b/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt
index 3d15708..37871b6 100644
--- a/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt
@@ -1,4 +1,15 @@
add_llvm_library(LLVMMBlazeDesc
- MBlazeMCTargetDesc.cpp
+ MBlazeAsmBackend.cpp
MBlazeMCAsmInfo.cpp
+ MBlazeMCCodeEmitter.cpp
+ MBlazeMCTargetDesc.cpp
+ )
+
+add_llvm_library_dependencies(LLVMMBlazeDesc
+ LLVMMBlazeAsmPrinter
+ LLVMMBlazeInfo
+ LLVMMC
+ LLVMSupport
)
+
+add_dependencies(LLVMMBlazeDesc MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/MBlazeAsmBackend.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
index 08f14c3..08f7d46a 100644
--- a/lib/Target/MBlaze/MBlazeAsmBackend.cpp
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
@@ -7,10 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Target/TargetAsmBackend.h"
-#include "MBlaze.h"
-#include "MBlazeELFWriterInfo.h"
-#include "llvm/ADT/Twine.h"
+#include "MCTargetDesc/MBlazeMCTargetDesc.h"
+#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCELFObjectWriter.h"
@@ -20,11 +18,11 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Target/TargetAsmBackend.h"
using namespace llvm;
static unsigned getFixupKindSize(unsigned Kind) {
@@ -48,10 +46,10 @@ public:
/*HasRelocationAddend*/ true) {}
};
-class MBlazeAsmBackend : public TargetAsmBackend {
+class MBlazeAsmBackend : public MCAsmBackend {
public:
MBlazeAsmBackend(const Target &T)
- : TargetAsmBackend() {
+ : MCAsmBackend() {
}
unsigned getNumFixupKinds() const {
@@ -148,8 +146,7 @@ void ELFMBlazeAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
}
} // end anonymous namespace
-TargetAsmBackend *llvm::createMBlazeAsmBackend(const Target &T,
- const std::string &TT) {
+MCAsmBackend *llvm::createMBlazeAsmBackend(const Target &T, StringRef TT) {
Triple TheTriple(TT);
if (TheTriple.isOSDarwin())
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h b/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h
new file mode 100644
index 0000000..776dbc4
--- /dev/null
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h
@@ -0,0 +1,240 @@
+//===-- MBlazeBaseInfo.h - Top level definitions for MBlaze -- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the MBlaze target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBlazeBASEINFO_H
+#define MBlazeBASEINFO_H
+
+#include "MBlazeMCTargetDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+/// MBlazeII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace MBlazeII {
+ enum {
+ // PseudoFrm - This represents an instruction that is a pseudo instruction
+ // or one that has not been implemented yet. It is illegal to code generate
+ // it, but tolerated for intermediate implementation stages.
+ FPseudo = 0,
+ FRRR,
+ FRRI,
+ FCRR,
+ FCRI,
+ FRCR,
+ FRCI,
+ FCCR,
+ FCCI,
+ FRRCI,
+ FRRC,
+ FRCX,
+ FRCS,
+ FCRCS,
+ FCRCX,
+ FCX,
+ FCR,
+ FRIR,
+ FRRRR,
+ FRI,
+ FC,
+ FormMask = 63
+
+ //===------------------------------------------------------------------===//
+ // MBlaze Specific MachineOperand flags.
+ // MO_NO_FLAG,
+
+ /// MO_GOT - Represents the offset into the global offset table at which
+ /// the address the relocation entry symbol resides during execution.
+ // MO_GOT,
+
+ /// MO_GOT_CALL - Represents the offset into the global offset table at
+ /// which the address of a call site relocation entry symbol resides
+ /// during execution. This is different from the above since this flag
+ /// can only be present in call instructions.
+ // MO_GOT_CALL,
+
+ /// MO_GPREL - Represents the offset from the current gp value to be used
+ /// for the relocatable object file being produced.
+ // MO_GPREL,
+
+ /// MO_ABS_HILO - Represents the hi or low part of an absolute symbol
+ /// address.
+ // MO_ABS_HILO
+
+ };
+}
+
+static inline bool isMBlazeRegister(unsigned Reg) {
+ return Reg <= 31;
+}
+
+static inline bool isSpecialMBlazeRegister(unsigned Reg) {
+ switch (Reg) {
+ case 0x0000 : case 0x0001 : case 0x0003 : case 0x0005 :
+ case 0x0007 : case 0x000B : case 0x000D : case 0x1000 :
+ case 0x1001 : case 0x1002 : case 0x1003 : case 0x1004 :
+ case 0x2000 : case 0x2001 : case 0x2002 : case 0x2003 :
+ case 0x2004 : case 0x2005 : case 0x2006 : case 0x2007 :
+ case 0x2008 : case 0x2009 : case 0x200A : case 0x200B :
+ return true;
+
+ default:
+ return false;
+ }
+ return false; // Not reached
+}
+
+/// getMBlazeRegisterNumbering - Given the enum value for some register, e.g.
+/// MBlaze::R0, return the number that it corresponds to (e.g. 0).
+static inline unsigned getMBlazeRegisterNumbering(unsigned RegEnum) {
+ switch (RegEnum) {
+ case MBlaze::R0 : return 0;
+ case MBlaze::R1 : return 1;
+ case MBlaze::R2 : return 2;
+ case MBlaze::R3 : return 3;
+ case MBlaze::R4 : return 4;
+ case MBlaze::R5 : return 5;
+ case MBlaze::R6 : return 6;
+ case MBlaze::R7 : return 7;
+ case MBlaze::R8 : return 8;
+ case MBlaze::R9 : return 9;
+ case MBlaze::R10 : return 10;
+ case MBlaze::R11 : return 11;
+ case MBlaze::R12 : return 12;
+ case MBlaze::R13 : return 13;
+ case MBlaze::R14 : return 14;
+ case MBlaze::R15 : return 15;
+ case MBlaze::R16 : return 16;
+ case MBlaze::R17 : return 17;
+ case MBlaze::R18 : return 18;
+ case MBlaze::R19 : return 19;
+ case MBlaze::R20 : return 20;
+ case MBlaze::R21 : return 21;
+ case MBlaze::R22 : return 22;
+ case MBlaze::R23 : return 23;
+ case MBlaze::R24 : return 24;
+ case MBlaze::R25 : return 25;
+ case MBlaze::R26 : return 26;
+ case MBlaze::R27 : return 27;
+ case MBlaze::R28 : return 28;
+ case MBlaze::R29 : return 29;
+ case MBlaze::R30 : return 30;
+ case MBlaze::R31 : return 31;
+ case MBlaze::RPC : return 0x0000;
+ case MBlaze::RMSR : return 0x0001;
+ case MBlaze::REAR : return 0x0003;
+ case MBlaze::RESR : return 0x0005;
+ case MBlaze::RFSR : return 0x0007;
+ case MBlaze::RBTR : return 0x000B;
+ case MBlaze::REDR : return 0x000D;
+ case MBlaze::RPID : return 0x1000;
+ case MBlaze::RZPR : return 0x1001;
+ case MBlaze::RTLBX : return 0x1002;
+ case MBlaze::RTLBLO : return 0x1003;
+ case MBlaze::RTLBHI : return 0x1004;
+ case MBlaze::RPVR0 : return 0x2000;
+ case MBlaze::RPVR1 : return 0x2001;
+ case MBlaze::RPVR2 : return 0x2002;
+ case MBlaze::RPVR3 : return 0x2003;
+ case MBlaze::RPVR4 : return 0x2004;
+ case MBlaze::RPVR5 : return 0x2005;
+ case MBlaze::RPVR6 : return 0x2006;
+ case MBlaze::RPVR7 : return 0x2007;
+ case MBlaze::RPVR8 : return 0x2008;
+ case MBlaze::RPVR9 : return 0x2009;
+ case MBlaze::RPVR10 : return 0x200A;
+ case MBlaze::RPVR11 : return 0x200B;
+ default: llvm_unreachable("Unknown register number!");
+ }
+ return 0; // Not reached
+}
+
+/// getRegisterFromNumbering - Given the enum value for some register, e.g.
+/// MBlaze::R0, return the number that it corresponds to (e.g. 0).
+static inline unsigned getMBlazeRegisterFromNumbering(unsigned Reg) {
+ switch (Reg) {
+ case 0 : return MBlaze::R0;
+ case 1 : return MBlaze::R1;
+ case 2 : return MBlaze::R2;
+ case 3 : return MBlaze::R3;
+ case 4 : return MBlaze::R4;
+ case 5 : return MBlaze::R5;
+ case 6 : return MBlaze::R6;
+ case 7 : return MBlaze::R7;
+ case 8 : return MBlaze::R8;
+ case 9 : return MBlaze::R9;
+ case 10 : return MBlaze::R10;
+ case 11 : return MBlaze::R11;
+ case 12 : return MBlaze::R12;
+ case 13 : return MBlaze::R13;
+ case 14 : return MBlaze::R14;
+ case 15 : return MBlaze::R15;
+ case 16 : return MBlaze::R16;
+ case 17 : return MBlaze::R17;
+ case 18 : return MBlaze::R18;
+ case 19 : return MBlaze::R19;
+ case 20 : return MBlaze::R20;
+ case 21 : return MBlaze::R21;
+ case 22 : return MBlaze::R22;
+ case 23 : return MBlaze::R23;
+ case 24 : return MBlaze::R24;
+ case 25 : return MBlaze::R25;
+ case 26 : return MBlaze::R26;
+ case 27 : return MBlaze::R27;
+ case 28 : return MBlaze::R28;
+ case 29 : return MBlaze::R29;
+ case 30 : return MBlaze::R30;
+ case 31 : return MBlaze::R31;
+ default: llvm_unreachable("Unknown register number!");
+ }
+ return 0; // Not reached
+}
+
+static inline unsigned getSpecialMBlazeRegisterFromNumbering(unsigned Reg) {
+ switch (Reg) {
+ case 0x0000 : return MBlaze::RPC;
+ case 0x0001 : return MBlaze::RMSR;
+ case 0x0003 : return MBlaze::REAR;
+ case 0x0005 : return MBlaze::RESR;
+ case 0x0007 : return MBlaze::RFSR;
+ case 0x000B : return MBlaze::RBTR;
+ case 0x000D : return MBlaze::REDR;
+ case 0x1000 : return MBlaze::RPID;
+ case 0x1001 : return MBlaze::RZPR;
+ case 0x1002 : return MBlaze::RTLBX;
+ case 0x1003 : return MBlaze::RTLBLO;
+ case 0x1004 : return MBlaze::RTLBHI;
+ case 0x2000 : return MBlaze::RPVR0;
+ case 0x2001 : return MBlaze::RPVR1;
+ case 0x2002 : return MBlaze::RPVR2;
+ case 0x2003 : return MBlaze::RPVR3;
+ case 0x2004 : return MBlaze::RPVR4;
+ case 0x2005 : return MBlaze::RPVR5;
+ case 0x2006 : return MBlaze::RPVR6;
+ case 0x2007 : return MBlaze::RPVR7;
+ case 0x2008 : return MBlaze::RPVR8;
+ case 0x2009 : return MBlaze::RPVR9;
+ case 0x200A : return MBlaze::RPVR10;
+ case 0x200B : return MBlaze::RPVR11;
+ default: llvm_unreachable("Unknown register number!");
+ }
+ return 0; // Not reached
+}
+
+} // end namespace llvm;
+
+#endif
diff --git a/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp
index ddc636d..1514557 100644
--- a/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp
@@ -12,11 +12,13 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mccodeemitter"
-#include "MBlaze.h"
-#include "MBlazeInstrInfo.h"
+#include "MCTargetDesc/MBlazeBaseInfo.h"
+#include "MCTargetDesc/MBlazeMCTargetDesc.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/ADT/Statistic.h"
@@ -106,7 +108,7 @@ MCCodeEmitter *llvm::createMBlazeMCCodeEmitter(const MCInstrInfo &MCII,
unsigned MBlazeMCCodeEmitter::getMachineOpValue(const MCInst &MI,
const MCOperand &MO) const {
if (MO.isReg())
- return MBlazeRegisterInfo::getRegisterNumbering(MO.getReg());
+ return getMBlazeRegisterNumbering(MO.getReg());
else if (MO.isImm())
return static_cast<unsigned>(MO.getImm());
else if (MO.isExpr())
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
index 20d6c0b..43ae281 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
@@ -13,10 +13,14 @@
#include "MBlazeMCTargetDesc.h"
#include "MBlazeMCAsmInfo.h"
+#include "InstPrinter/MBlazeInstPrinter.h"
+#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "MBlazeGenInstrInfo.inc"
@@ -36,8 +40,10 @@ static MCInstrInfo *createMBlazeMCInstrInfo() {
return X;
}
-extern "C" void LLVMInitializeMBlazeMCInstrInfo() {
- TargetRegistry::RegisterMCInstrInfo(TheMBlazeTarget, createMBlazeMCInstrInfo);
+static MCRegisterInfo *createMBlazeMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitMBlazeMCRegisterInfo(X, MBlaze::R15);
+ return X;
}
static MCSubtargetInfo *createMBlazeMCSubtargetInfo(StringRef TT, StringRef CPU,
@@ -47,11 +53,6 @@ static MCSubtargetInfo *createMBlazeMCSubtargetInfo(StringRef TT, StringRef CPU,
return X;
}
-extern "C" void LLVMInitializeMBlazeMCSubtargetInfo() {
- TargetRegistry::RegisterMCSubtargetInfo(TheMBlazeTarget,
- createMBlazeMCSubtargetInfo);
-}
-
static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
Triple TheTriple(TT);
switch (TheTriple.getOS()) {
@@ -60,6 +61,80 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
}
}
-extern "C" void LLVMInitializeMBlazeMCAsmInfo() {
+static MCCodeGenInfo *createMBlazeMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ if (RM == Reloc::Default)
+ RM = Reloc::Static;
+ if (CM == CodeModel::Default)
+ CM = CodeModel::Small;
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
+}
+
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &_OS,
+ MCCodeEmitter *_Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin()) {
+ llvm_unreachable("MBlaze does not support Darwin MACH-O format");
+ return NULL;
+ }
+
+ if (TheTriple.isOSWindows()) {
+ llvm_unreachable("MBlaze does not support Windows COFF format");
+ return NULL;
+ }
+
+ return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
+}
+
+static MCInstPrinter *createMBlazeMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCSubtargetInfo &STI) {
+ if (SyntaxVariant == 0)
+ return new MBlazeInstPrinter(MAI);
+ return 0;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeMBlazeTargetMC() {
+ // Register the MC asm info.
RegisterMCAsmInfoFn X(TheMBlazeTarget, createMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheMBlazeTarget,
+ createMBlazeMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheMBlazeTarget, createMBlazeMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheMBlazeTarget,
+ createMBlazeMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheMBlazeTarget,
+ createMBlazeMCSubtargetInfo);
+
+ // Register the MC code emitter
+ TargetRegistry::RegisterMCCodeEmitter(TheMBlazeTarget,
+ llvm::createMBlazeMCCodeEmitter);
+
+ // Register the asm backend
+ TargetRegistry::RegisterMCAsmBackend(TheMBlazeTarget,
+ createMBlazeAsmBackend);
+
+ // Register the object streamer
+ TargetRegistry::RegisterMCObjectStreamer(TheMBlazeTarget,
+ createMCStreamer);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(TheMBlazeTarget,
+ createMBlazeMCInstPrinter);
}
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h
index b14772e..deff5cb 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h
@@ -15,12 +15,23 @@
#define MBLAZEMCTARGETDESC_H
namespace llvm {
+class MCAsmBackend;
+class MCContext;
+class MCCodeEmitter;
+class MCInstrInfo;
class MCSubtargetInfo;
class Target;
class StringRef;
+class formatted_raw_ostream;
extern Target TheMBlazeTarget;
+MCCodeEmitter *createMBlazeMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCAsmBackend *createMBlazeAsmBackend(const Target &T, StringRef TT);
+
} // End llvm namespace
// Defines symbolic names for MBlaze registers. This defines a mapping from
diff --git a/lib/Target/MBlaze/TargetInfo/CMakeLists.txt b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
index 40696f6..93fce58 100644
--- a/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
+++ b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
@@ -5,4 +5,10 @@ add_llvm_library(LLVMMBlazeInfo
MBlazeTargetInfo.cpp
)
-add_dependencies(LLVMMBlazeInfo MBlazeCodeGenTable_gen)
+add_llvm_library_dependencies(LLVMMBlazeInfo
+ LLVMMC
+ LLVMSupport
+ LLVMTarget
+ )
+
+add_dependencies(LLVMMBlazeInfo MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp b/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
index 16e01db..71210d8 100644
--- a/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
+++ b/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
@@ -9,7 +9,7 @@
#include "MBlaze.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
Target llvm::TheMBlazeTarget;
diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt
index 33f3d44..0952b76 100644
--- a/lib/Target/MSP430/CMakeLists.txt
+++ b/lib/Target/MSP430/CMakeLists.txt
@@ -1,11 +1,12 @@
set(LLVM_TARGET_DEFINITIONS MSP430.td)
-tablegen(MSP430GenRegisterInfo.inc -gen-register-info)
-tablegen(MSP430GenInstrInfo.inc -gen-instr-info)
-tablegen(MSP430GenAsmWriter.inc -gen-asm-writer)
-tablegen(MSP430GenDAGISel.inc -gen-dag-isel)
-tablegen(MSP430GenCallingConv.inc -gen-callingconv)
-tablegen(MSP430GenSubtargetInfo.inc -gen-subtarget)
+llvm_tablegen(MSP430GenRegisterInfo.inc -gen-register-info)
+llvm_tablegen(MSP430GenInstrInfo.inc -gen-instr-info)
+llvm_tablegen(MSP430GenAsmWriter.inc -gen-asm-writer)
+llvm_tablegen(MSP430GenDAGISel.inc -gen-dag-isel)
+llvm_tablegen(MSP430GenCallingConv.inc -gen-callingconv)
+llvm_tablegen(MSP430GenSubtargetInfo.inc -gen-subtarget)
+add_public_tablegen_target(MSP430CommonTableGen)
add_llvm_target(MSP430CodeGen
MSP430BranchSelector.cpp
@@ -21,6 +22,19 @@ add_llvm_target(MSP430CodeGen
MSP430MCInstLower.cpp
)
+add_llvm_library_dependencies(LLVMMSP430CodeGen
+ LLVMAsmPrinter
+ LLVMCodeGen
+ LLVMCore
+ LLVMMC
+ LLVMMSP430AsmPrinter
+ LLVMMSP430Desc
+ LLVMMSP430Info
+ LLVMSelectionDAG
+ LLVMSupport
+ LLVMTarget
+ )
+
add_subdirectory(InstPrinter)
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/MSP430/InstPrinter/CMakeLists.txt b/lib/Target/MSP430/InstPrinter/CMakeLists.txt
index f5458d5..ce39d95 100644
--- a/lib/Target/MSP430/InstPrinter/CMakeLists.txt
+++ b/lib/Target/MSP430/InstPrinter/CMakeLists.txt
@@ -3,4 +3,10 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
add_llvm_library(LLVMMSP430AsmPrinter
MSP430InstPrinter.cpp
)
-add_dependencies(LLVMMSP430AsmPrinter MSP430CodeGenTable_gen)
+
+add_llvm_library_dependencies(LLVMMSP430AsmPrinter
+ LLVMMC
+ LLVMSupport
+ )
+
+add_dependencies(LLVMMSP430AsmPrinter MSP430CommonTableGen)
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
index e10d4fe..5d6c6ad 100644
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
+++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
@@ -25,8 +25,10 @@ using namespace llvm;
// Include the auto-generated portion of the assembly writer.
#include "MSP430GenAsmWriter.inc"
-void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
printInstruction(MI, O);
+ printAnnotation(O, Annot);
}
void MSP430InstPrinter::printPCRelImmOperand(const MCInst *MI, unsigned OpNo,
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
index 50d98b7..a1984a8 100644
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
+++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -22,9 +22,9 @@ namespace llvm {
class MSP430InstPrinter : public MCInstPrinter {
public:
MSP430InstPrinter(const MCAsmInfo &MAI)
- : MCInstPrinter(MAI) {}
+ : MCInstPrinter(MAI) {}
- virtual void printInst(const MCInst *MI, raw_ostream &O);
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
diff --git a/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt b/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
index 0f3ebd3..04bd03e 100644
--- a/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
@@ -2,3 +2,11 @@ add_llvm_library(LLVMMSP430Desc
MSP430MCTargetDesc.cpp
MSP430MCAsmInfo.cpp
)
+
+add_llvm_library_dependencies(LLVMMSP430Desc
+ LLVMMC
+ LLVMMSP430AsmPrinter
+ LLVMMSP430Info
+ )
+
+add_dependencies(LLVMMSP430Desc MSP430CommonTableGen)
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index 43a704d..fda70b8 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -13,10 +13,12 @@
#include "MSP430MCTargetDesc.h"
#include "MSP430MCAsmInfo.h"
+#include "InstPrinter/MSP430InstPrinter.h"
+#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "MSP430GenInstrInfo.inc"
@@ -29,18 +31,18 @@
using namespace llvm;
-
static MCInstrInfo *createMSP430MCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitMSP430MCInstrInfo(X);
return X;
}
-extern "C" void LLVMInitializeMSP430MCInstrInfo() {
- TargetRegistry::RegisterMCInstrInfo(TheMSP430Target, createMSP430MCInstrInfo);
+static MCRegisterInfo *createMSP430MCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitMSP430MCRegisterInfo(X, MSP430::PCW);
+ return X;
}
-
static MCSubtargetInfo *createMSP430MCSubtargetInfo(StringRef TT, StringRef CPU,
StringRef FS) {
MCSubtargetInfo *X = new MCSubtargetInfo();
@@ -48,11 +50,42 @@ static MCSubtargetInfo *createMSP430MCSubtargetInfo(StringRef TT, StringRef CPU,
return X;
}
-extern "C" void LLVMInitializeMSP430MCSubtargetInfo() {
- TargetRegistry::RegisterMCSubtargetInfo(TheMSP430Target,
- createMSP430MCSubtargetInfo);
+static MCCodeGenInfo *createMSP430MCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
+}
+
+static MCInstPrinter *createMSP430MCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCSubtargetInfo &STI) {
+ if (SyntaxVariant == 0)
+ return new MSP430InstPrinter(MAI);
+ return 0;
}
-extern "C" void LLVMInitializeMSP430MCAsmInfo() {
+extern "C" void LLVMInitializeMSP430TargetMC() {
+ // Register the MC asm info.
RegisterMCAsmInfo<MSP430MCAsmInfo> X(TheMSP430Target);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheMSP430Target,
+ createMSP430MCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheMSP430Target, createMSP430MCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheMSP430Target,
+ createMSP430MCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheMSP430Target,
+ createMSP430MCSubtargetInfo);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(TheMSP430Target,
+ createMSP430MCInstPrinter);
}
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
index 0d8a6bd..35f2590 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ALPHAMCTARGETDESC_H
-#define ALPHAMCTARGETDESC_H
+#ifndef MSP430MCTARGETDESC_H
+#define MSP430MCTARGETDESC_H
namespace llvm {
class MCSubtargetInfo;
diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp
index 2042056..8836549 100644
--- a/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -32,9 +32,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -163,17 +161,7 @@ void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutStreamer.EmitInstruction(TmpInst);
}
-static MCInstPrinter *createMSP430MCInstPrinter(const Target &T,
- unsigned SyntaxVariant,
- const MCAsmInfo &MAI) {
- if (SyntaxVariant == 0)
- return new MSP430InstPrinter(MAI);
- return 0;
-}
-
// Force static initialization.
extern "C" void LLVMInitializeMSP430AsmPrinter() {
RegisterAsmPrinter<MSP430AsmPrinter> X(TheMSP430Target);
- TargetRegistry::RegisterMCInstPrinter(TheMSP430Target,
- createMSP430MCInstPrinter);
}
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 0a3eab1..dc37431 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -79,6 +79,7 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
setStackPointerRegisterToSaveRestore(MSP430::SPW);
setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
setSchedulingPreference(Sched::Latency);
// We have post-incremented loads / stores.
@@ -987,8 +988,8 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
}
}
-bool MSP430TargetLowering::isTruncateFree(const Type *Ty1,
- const Type *Ty2) const {
+bool MSP430TargetLowering::isTruncateFree(Type *Ty1,
+ Type *Ty2) const {
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
@@ -1002,7 +1003,7 @@ bool MSP430TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
return (VT1.getSizeInBits() > VT2.getSizeInBits());
}
-bool MSP430TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const {
+bool MSP430TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
// MSP430 implicitly zero-extends 8-bit results in 16-bit registers.
return 0 && Ty1->isIntegerTy(8) && Ty2->isIntegerTy(16);
}
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index bd660a0..237f604 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -102,7 +102,7 @@ namespace llvm {
/// isTruncateFree - Return true if it's free to truncate a value of type
/// Ty1 to type Ty2. e.g. On msp430 it's free to truncate a i16 value in
/// register R15W to i8 by referencing its sub-register R15B.
- virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const;
+ virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
virtual bool isTruncateFree(EVT VT1, EVT VT2) const;
/// isZExtFree - Return true if any actual instruction that defines a value
@@ -113,7 +113,7 @@ namespace llvm {
/// necessarily apply to truncate instructions. e.g. on msp430, all
/// instructions that define 8-bit values implicit zero-extend the result
/// out to 16 bits.
- virtual bool isZExtFree(const Type *Ty1, const Type *Ty2) const;
+ virtual bool isZExtFree(Type *Ty1, Type *Ty2) const;
virtual bool isZExtFree(EVT VT1, EVT VT2) const;
MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI,
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index 846d093..ffd4318 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -20,8 +20,8 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_CTOR
#include "MSP430GenInstrInfo.inc"
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 1cc60bb..9049c4b 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -34,7 +34,7 @@ using namespace llvm;
// FIXME: Provide proper call frame setup / destroy opcodes.
MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm,
const TargetInstrInfo &tii)
- : MSP430GenRegisterInfo(), TM(tm), TII(tii) {
+ : MSP430GenRegisterInfo(MSP430::PCW), TM(tm), TII(tii) {
StackAlign = TM.getFrameLowering()->getStackAlignment();
}
@@ -233,22 +233,8 @@ MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
}
}
-unsigned MSP430RegisterInfo::getRARegister() const {
- return MSP430::PCW;
-}
-
unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
return TFI->hasFP(MF) ? MSP430::FPW : MSP430::SPW;
}
-
-int MSP430RegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
- llvm_unreachable("Not implemented yet!");
- return 0;
-}
-
-int MSP430RegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const {
- llvm_unreachable("Not implemented yet!");
- return 0;
-}
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
index fb70594..10a3d53 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -58,12 +58,7 @@ public:
void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
// Debug information queries.
- unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
-
- //! Get DWARF debugging register number
- int getDwarfRegNum(unsigned RegNum, bool isEH) const;
- int getLLVMRegNum(unsigned RegNum, bool isEH) const;
};
} // end namespace llvm
diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp
index b58c50a..3ee14d9 100644
--- a/lib/Target/MSP430/MSP430Subtarget.cpp
+++ b/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -13,7 +13,7 @@
#include "MSP430Subtarget.h"
#include "MSP430.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 971f512..4dd8933 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -16,7 +16,7 @@
#include "llvm/PassManager.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
extern "C" void LLVMInitializeMSP430Target() {
@@ -25,10 +25,11 @@ extern "C" void LLVMInitializeMSP430Target() {
}
MSP430TargetMachine::MSP430TargetMachine(const Target &T,
- const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : LLVMTargetMachine(T, TT, CPU, FS),
+ StringRef TT,
+ StringRef CPU,
+ StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
Subtarget(TT, CPU, FS),
// FIXME: Check TargetData string.
DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index 2a9eea0..eb483dc 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -38,8 +38,9 @@ class MSP430TargetMachine : public LLVMTargetMachine {
MSP430FrameLowering FrameLowering;
public:
- MSP430TargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ MSP430TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
virtual const TargetFrameLowering *getFrameLowering() const {
return &FrameLowering;
diff --git a/lib/Target/MSP430/TargetInfo/CMakeLists.txt b/lib/Target/MSP430/TargetInfo/CMakeLists.txt
index 2d1aa9d..1526946 100644
--- a/lib/Target/MSP430/TargetInfo/CMakeLists.txt
+++ b/lib/Target/MSP430/TargetInfo/CMakeLists.txt
@@ -4,4 +4,10 @@ add_llvm_library(LLVMMSP430Info
MSP430TargetInfo.cpp
)
-add_dependencies(LLVMMSP430Info MSP430CodeGenTable_gen)
+add_llvm_library_dependencies(LLVMMSP430Info
+ LLVMMC
+ LLVMSupport
+ LLVMTarget
+ )
+
+add_dependencies(LLVMMSP430Info MSP430CommonTableGen)
diff --git a/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp b/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
index f9ca5c4..8b3e01e 100644
--- a/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
+++ b/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
@@ -9,7 +9,7 @@
#include "MSP430.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
Target llvm::TheMSP430Target;
diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp
index 46c687b..53ad155f 100644
--- a/lib/Target/Mangler.cpp
+++ b/lib/Target/Mangler.cpp
@@ -159,7 +159,7 @@ static void AddFastCallStdCallSuffix(SmallVectorImpl<char> &OutName,
unsigned ArgWords = 0;
for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
AI != AE; ++AI) {
- const Type *Ty = AI->getType();
+ Type *Ty = AI->getType();
// 'Dereference' type in case of byval parameter attribute
if (AI->hasByValAttr())
Ty = cast<PointerType>(Ty)->getElementType();
@@ -214,7 +214,7 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
// fastcall and stdcall functions usually need @42 at the end to specify
// the argument info.
- const FunctionType *FT = F->getFunctionType();
+ FunctionType *FT = F->getFunctionType();
if ((CC == CallingConv::X86_FastCall || CC == CallingConv::X86_StdCall) &&
// "Pure" variadic functions do not receive @0 suffix.
(!FT->isVarArg() || FT->getNumParams() == 0 ||
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index 36ab1a9..1b4329b 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -1,17 +1,20 @@
set(LLVM_TARGET_DEFINITIONS Mips.td)
-tablegen(MipsGenRegisterInfo.inc -gen-register-info)
-tablegen(MipsGenInstrInfo.inc -gen-instr-info)
-tablegen(MipsGenAsmWriter.inc -gen-asm-writer)
-tablegen(MipsGenDAGISel.inc -gen-dag-isel)
-tablegen(MipsGenCallingConv.inc -gen-callingconv)
-tablegen(MipsGenSubtargetInfo.inc -gen-subtarget)
+llvm_tablegen(MipsGenRegisterInfo.inc -gen-register-info)
+llvm_tablegen(MipsGenInstrInfo.inc -gen-instr-info)
+llvm_tablegen(MipsGenAsmWriter.inc -gen-asm-writer)
+llvm_tablegen(MipsGenDAGISel.inc -gen-dag-isel)
+llvm_tablegen(MipsGenCallingConv.inc -gen-callingconv)
+llvm_tablegen(MipsGenSubtargetInfo.inc -gen-subtarget)
+add_public_tablegen_target(MipsCommonTableGen)
add_llvm_target(MipsCodeGen
MipsAsmPrinter.cpp
+ MipsCodeEmitter.cpp
MipsDelaySlotFiller.cpp
MipsEmitGPRestore.cpp
MipsExpandPseudo.cpp
+ MipsJITInfo.cpp
MipsInstrInfo.cpp
MipsISelDAGToDAG.cpp
MipsISelLowering.cpp
@@ -25,6 +28,19 @@ add_llvm_target(MipsCodeGen
MipsSelectionDAGInfo.cpp
)
+add_llvm_library_dependencies(LLVMMipsCodeGen
+ LLVMAsmPrinter
+ LLVMCodeGen
+ LLVMCore
+ LLVMMC
+ LLVMMipsAsmPrinter
+ LLVMMipsDesc
+ LLVMMipsInfo
+ LLVMSelectionDAG
+ LLVMSupport
+ LLVMTarget
+ )
+
add_subdirectory(InstPrinter)
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Mips/InstPrinter/CMakeLists.txt b/lib/Target/Mips/InstPrinter/CMakeLists.txt
index 8852fd4..c45b35d 100644
--- a/lib/Target/Mips/InstPrinter/CMakeLists.txt
+++ b/lib/Target/Mips/InstPrinter/CMakeLists.txt
@@ -3,4 +3,10 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
add_llvm_library(LLVMMipsAsmPrinter
MipsInstPrinter.cpp
)
-add_dependencies(LLVMMipsAsmPrinter MipsCodeGenTable_gen)
+
+add_llvm_library_dependencies(LLVMMipsAsmPrinter
+ LLVMMC
+ LLVMSupport
+ )
+
+add_dependencies(LLVMMipsAsmPrinter MipsCommonTableGen)
diff --git a/lib/Target/Mips/InstPrinter/Makefile b/lib/Target/Mips/InstPrinter/Makefile
index 63e38ef..74872a4 100644
--- a/lib/Target/Mips/InstPrinter/Makefile
+++ b/lib/Target/Mips/InstPrinter/Makefile
@@ -10,7 +10,7 @@
LEVEL = ../../../..
LIBRARYNAME = LLVMMipsAsmPrinter
-# Hack: we need to include 'main' arm target directory to grab private headers
+# Hack: we need to include 'main' mips target directory to grab private headers
CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
index 41c1dd3..3dafc61 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- MipsInstPrinter.cpp - Convert Mips MCInst to assembly syntax --------===//
+//===-- MipsInstPrinter.cpp - Convert Mips MCInst to assembly syntax ------===//
//
// The LLVM Compiler Infrastructure
//
@@ -69,8 +69,10 @@ void MipsInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
OS << '$' << LowercaseString(getRegisterName(RegNo));
}
-void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
printInstruction(MI, O);
+ printAnnotation(O, Annot);
}
void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
index 680208e..5c11165 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@@ -1,4 +1,4 @@
-//===-- MipsInstPrinter.h - Convert Mips MCInst to assembly syntax ----------===//
+//===-- MipsInstPrinter.h - Convert Mips MCInst to assembly syntax --------===//
//
// The LLVM Compiler Infrastructure
//
@@ -86,7 +86,7 @@ public:
virtual StringRef getOpcodeName(unsigned Opcode) const;
virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
- virtual void printInst(const MCInst *MI, raw_ostream &O);
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
private:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
index 97de75d..2ceb5c9 100644
--- a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
@@ -1,4 +1,15 @@
add_llvm_library(LLVMMipsDesc
- MipsMCTargetDesc.cpp
+ MipsAsmBackend.cpp
MipsMCAsmInfo.cpp
+ MipsMCCodeEmitter.cpp
+ MipsMCTargetDesc.cpp
+ )
+
+add_llvm_library_dependencies(LLVMMipsDesc
+ LLVMMC
+ LLVMMipsAsmPrinter
+ LLVMMipsInfo
+ LLVMSupport
)
+
+add_dependencies(LLVMMipsDesc MipsCommonTableGen)
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
new file mode 100644
index 0000000..f190ec4
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -0,0 +1,117 @@
+#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+class MipsELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ MipsELFObjectWriter(bool is64Bit, Triple::OSType OSType, uint16_t EMachine,
+ bool HasRelocationAddend)
+ : MCELFObjectTargetWriter(is64Bit, OSType, EMachine,
+ HasRelocationAddend) {}
+};
+
+class MipsAsmBackend : public MCAsmBackend {
+public:
+ MipsAsmBackend(const Target &T)
+ : MCAsmBackend() {}
+
+ unsigned getNumFixupKinds() const {
+ return 1; //tbd
+ }
+
+ /// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided
+ /// data fragment, at the offset specified by the fixup and following the
+ /// fixup kind as appropriate.
+ void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const {
+ }
+
+ /// @name Target Relaxation Interfaces
+ /// @{
+
+ /// MayNeedRelaxation - Check whether the given instruction may need
+ /// relaxation.
+ ///
+ /// \param Inst - The instruction to test.
+ bool MayNeedRelaxation(const MCInst &Inst) const {
+ return false;
+ }
+
+ /// RelaxInstruction - Relax the instruction in the given fragment to the next
+ /// wider instruction.
+ ///
+ /// \param Inst - The instruction to relax, which may be the same as the
+ /// output.
+ /// \parm Res [output] - On return, the relaxed instruction.
+ void RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ }
+
+ /// @}
+
+ /// WriteNopData - Write an (optimal) nop sequence of Count bytes to the given
+ /// output. If the target cannot generate such a sequence, it should return an
+ /// error.
+ ///
+ /// \return - True on success.
+ bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+ return false;
+ }
+};
+
+class MipsEB_AsmBackend : public MipsAsmBackend {
+public:
+ Triple::OSType OSType;
+
+ MipsEB_AsmBackend(const Target &T, Triple::OSType _OSType)
+ : MipsAsmBackend(T), OSType(_OSType) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createELFObjectWriter(createELFObjectTargetWriter(),
+ OS, /*IsLittleEndian*/ false);
+ }
+
+ MCELFObjectTargetWriter *createELFObjectTargetWriter() const {
+ return new MipsELFObjectWriter(false, OSType, ELF::EM_MIPS, false);
+ }
+};
+
+class MipsEL_AsmBackend : public MipsAsmBackend {
+public:
+ Triple::OSType OSType;
+
+ MipsEL_AsmBackend(const Target &T, Triple::OSType _OSType)
+ : MipsAsmBackend(T), OSType(_OSType) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createELFObjectWriter(createELFObjectTargetWriter(),
+ OS, /*IsLittleEndian*/ true);
+ }
+
+ MCELFObjectTargetWriter *createELFObjectTargetWriter() const {
+ return new MipsELFObjectWriter(false, OSType, ELF::EM_MIPS, false);
+ }
+};
+}
+
+MCAsmBackend *llvm::createMipsAsmBackend(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+
+ // just return little endian for now
+ //
+ return new MipsEL_AsmBackend(T, Triple(TT).getOS());
+}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
new file mode 100644
index 0000000..f7a6fa9
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -0,0 +1,113 @@
+//===-- MipsBaseInfo.h - Top level definitions for ARM ------- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the Mips target useful for the compiler back-end and the MC libraries.
+//
+//===----------------------------------------------------------------------===//
+#ifndef MIPSBASEINFO_H
+#define MIPSBASEINFO_H
+
+#include "MipsMCTargetDesc.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+/// getMipsRegisterNumbering - Given the enum value for some register,
+/// return the number that it corresponds to.
+inline static unsigned getMipsRegisterNumbering(unsigned RegEnum)
+{
+ switch (RegEnum) {
+ case Mips::ZERO: case Mips::ZERO_64: case Mips::F0: case Mips::D0_64:
+ case Mips::D0:
+ return 0;
+ case Mips::AT: case Mips::AT_64: case Mips::F1: case Mips::D1_64:
+ return 1;
+ case Mips::V0: case Mips::V0_64: case Mips::F2: case Mips::D2_64:
+ case Mips::D1:
+ return 2;
+ case Mips::V1: case Mips::V1_64: case Mips::F3: case Mips::D3_64:
+ return 3;
+ case Mips::A0: case Mips::A0_64: case Mips::F4: case Mips::D4_64:
+ case Mips::D2:
+ return 4;
+ case Mips::A1: case Mips::A1_64: case Mips::F5: case Mips::D5_64:
+ return 5;
+ case Mips::A2: case Mips::A2_64: case Mips::F6: case Mips::D6_64:
+ case Mips::D3:
+ return 6;
+ case Mips::A3: case Mips::A3_64: case Mips::F7: case Mips::D7_64:
+ return 7;
+ case Mips::T0: case Mips::T0_64: case Mips::F8: case Mips::D8_64:
+ case Mips::D4:
+ return 8;
+ case Mips::T1: case Mips::T1_64: case Mips::F9: case Mips::D9_64:
+ return 9;
+ case Mips::T2: case Mips::T2_64: case Mips::F10: case Mips::D10_64:
+ case Mips::D5:
+ return 10;
+ case Mips::T3: case Mips::T3_64: case Mips::F11: case Mips::D11_64:
+ return 11;
+ case Mips::T4: case Mips::T4_64: case Mips::F12: case Mips::D12_64:
+ case Mips::D6:
+ return 12;
+ case Mips::T5: case Mips::T5_64: case Mips::F13: case Mips::D13_64:
+ return 13;
+ case Mips::T6: case Mips::T6_64: case Mips::F14: case Mips::D14_64:
+ case Mips::D7:
+ return 14;
+ case Mips::T7: case Mips::T7_64: case Mips::F15: case Mips::D15_64:
+ return 15;
+ case Mips::S0: case Mips::S0_64: case Mips::F16: case Mips::D16_64:
+ case Mips::D8:
+ return 16;
+ case Mips::S1: case Mips::S1_64: case Mips::F17: case Mips::D17_64:
+ return 17;
+ case Mips::S2: case Mips::S2_64: case Mips::F18: case Mips::D18_64:
+ case Mips::D9:
+ return 18;
+ case Mips::S3: case Mips::S3_64: case Mips::F19: case Mips::D19_64:
+ return 19;
+ case Mips::S4: case Mips::S4_64: case Mips::F20: case Mips::D20_64:
+ case Mips::D10:
+ return 20;
+ case Mips::S5: case Mips::S5_64: case Mips::F21: case Mips::D21_64:
+ return 21;
+ case Mips::S6: case Mips::S6_64: case Mips::F22: case Mips::D22_64:
+ case Mips::D11:
+ return 22;
+ case Mips::S7: case Mips::S7_64: case Mips::F23: case Mips::D23_64:
+ return 23;
+ case Mips::T8: case Mips::T8_64: case Mips::F24: case Mips::D24_64:
+ case Mips::D12:
+ return 24;
+ case Mips::T9: case Mips::T9_64: case Mips::F25: case Mips::D25_64:
+ return 25;
+ case Mips::K0: case Mips::K0_64: case Mips::F26: case Mips::D26_64:
+ case Mips::D13:
+ return 26;
+ case Mips::K1: case Mips::K1_64: case Mips::F27: case Mips::D27_64:
+ return 27;
+ case Mips::GP: case Mips::GP_64: case Mips::F28: case Mips::D28_64:
+ case Mips::D14:
+ return 28;
+ case Mips::SP: case Mips::SP_64: case Mips::F29: case Mips::D29_64:
+ return 29;
+ case Mips::FP: case Mips::FP_64: case Mips::F30: case Mips::D30_64:
+ case Mips::D15:
+ return 30;
+ case Mips::RA: case Mips::RA_64: case Mips::F31: case Mips::D31_64:
+ return 31;
+ default: llvm_unreachable("Unknown register number!");
+ }
+ return 0; // Not reached
+}
+}
+
+#endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
new file mode 100644
index 0000000..8b099ea
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -0,0 +1,90 @@
+#ifndef LLVM_Mips_MipsFIXUPKINDS_H
+#define LLVM_Mips_MipsFIXUPKINDS_H
+
+//===-- Mips/MipsFixupKinds.h - Mips Specific Fixup Entries --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace Mips {
+ enum Fixups {
+ // fixup_Mips_xxx - R_MIPS_NONE
+ fixup_Mips_NONE = FirstTargetFixupKind,
+
+ // fixup_Mips_xxx - R_MIPS_16.
+ fixup_Mips_16,
+
+ // fixup_Mips_xxx - R_MIPS_32.
+ fixup_Mips_32,
+
+ // fixup_Mips_xxx - R_MIPS_REL32.
+ fixup_Mips_REL32,
+
+ // fixup_Mips_xxx - R_MIPS_26.
+ fixup_Mips_26,
+
+ // fixup_Mips_xxx - R_MIPS_HI16.
+ fixup_Mips_HI16,
+
+ // fixup_Mips_xxx - R_MIPS_LO16.
+ fixup_Mips_LO16,
+
+ // fixup_Mips_xxx - R_MIPS_GPREL16.
+ fixup_Mips_GPREL16,
+
+ // fixup_Mips_xxx - R_MIPS_LITERAL.
+ fixup_Mips_LITERAL,
+
+ // fixup_Mips_xxx - R_MIPS_GOT16.
+ fixup_Mips_GOT16,
+
+ // fixup_Mips_xxx - R_MIPS_PC16.
+ fixup_Mips_PC16,
+
+ // fixup_Mips_xxx - R_MIPS_CALL16.
+ fixup_Mips_CALL16,
+
+ // fixup_Mips_xxx - R_MIPS_GPREL32.
+ fixup_Mips_GPREL32,
+
+ // fixup_Mips_xxx - R_MIPS_SHIFT5.
+ fixup_Mips_SHIFT5,
+
+ // fixup_Mips_xxx - R_MIPS_SHIFT6.
+ fixup_Mips_SHIFT6,
+
+ // fixup_Mips_xxx - R_MIPS_64.
+ fixup_Mips_64,
+
+ // fixup_Mips_xxx - R_MIPS_TLS_GD.
+ fixup_Mips_TLSGD,
+
+ // fixup_Mips_xxx - R_MIPS_TLS_GOTTPREL.
+ fixup_Mips_GOTTPREL,
+
+ // fixup_Mips_xxx - R_MIPS_TLS_TPREL_HI16.
+ fixup_Mips_TPREL_HI,
+
+ // fixup_Mips_xxx - R_MIPS_TLS_TPREL_LO16.
+ fixup_Mips_TPREL_LO,
+
+ // fixup_Mips_xxx - yyy. // This should become R_MIPS_PC16
+ fixup_Mips_Branch_PCRel,
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+ };
+} // namespace llvm
+} // namespace Mips
+
+
+#endif /* LLVM_Mips_MipsFIXUPKINDS_H */
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index 5d92425..71ae804 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -18,7 +18,8 @@ using namespace llvm;
MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) {
Triple TheTriple(TT);
- if (TheTriple.getArch() == Triple::mips)
+ if ((TheTriple.getArch() == Triple::mips) ||
+ (TheTriple.getArch() == Triple::mips64))
IsLittleEndian = false;
AlignmentIsInBytes = false;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
new file mode 100644
index 0000000..d66de23
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -0,0 +1,52 @@
+//===-- MipsMCCodeEmitter.cpp - Convert Mips code to machine code ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MipsMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+//
+#define DEBUG_TYPE "mccodeemitter"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "MCTargetDesc/MipsMCTargetDesc.h"
+
+using namespace llvm;
+
+namespace {
+class MipsMCCodeEmitter : public MCCodeEmitter {
+ MipsMCCodeEmitter(const MipsMCCodeEmitter &); // DO NOT IMPLEMENT
+ void operator=(const MipsMCCodeEmitter &); // DO NOT IMPLEMENT
+ const MCInstrInfo &MCII;
+ const MCSubtargetInfo &STI;
+
+public:
+ MipsMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+ MCContext &ctx)
+ : MCII(mcii), STI(sti) {}
+
+ ~MipsMCCodeEmitter() {}
+
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ }
+}; // class MipsMCCodeEmitter
+} // namespace
+
+MCCodeEmitter *llvm::createMipsMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new MipsMCCodeEmitter(MCII, STI, Ctx);
+}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index 06f0d0b..1f9e3dd 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -13,10 +13,14 @@
#include "MipsMCTargetDesc.h"
#include "MipsMCAsmInfo.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "MipsGenInstrInfo.inc"
@@ -35,11 +39,12 @@ static MCInstrInfo *createMipsMCInstrInfo() {
return X;
}
-extern "C" void LLVMInitializeMipsMCInstrInfo() {
- TargetRegistry::RegisterMCInstrInfo(TheMipsTarget, createMipsMCInstrInfo);
+static MCRegisterInfo *createMipsMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitMipsMCRegisterInfo(X, Mips::RA);
+ return X;
}
-
static MCSubtargetInfo *createMipsMCSubtargetInfo(StringRef TT, StringRef CPU,
StringRef FS) {
MCSubtargetInfo *X = new MCSubtargetInfo();
@@ -47,12 +52,111 @@ static MCSubtargetInfo *createMipsMCSubtargetInfo(StringRef TT, StringRef CPU,
return X;
}
-extern "C" void LLVMInitializeMipsMCSubtargetInfo() {
+static MCAsmInfo *createMipsMCAsmInfo(const Target &T, StringRef TT) {
+ MCAsmInfo *MAI = new MipsMCAsmInfo(T, TT);
+
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(Mips::SP, 0);
+ MAI->addInitialFrameState(0, Dst, Src);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createMipsMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ if (RM == Reloc::Default)
+ RM = Reloc::PIC_;
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
+}
+
+static MCInstPrinter *createMipsMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCSubtargetInfo &STI) {
+ return new MipsInstPrinter(MAI);
+}
+
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &_OS,
+ MCCodeEmitter *_Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ Triple TheTriple(TT);
+
+ return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
+}
+
+extern "C" void LLVMInitializeMipsTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(TheMipsTarget, createMipsMCAsmInfo);
+ RegisterMCAsmInfoFn Y(TheMipselTarget, createMipsMCAsmInfo);
+ RegisterMCAsmInfoFn A(TheMips64Target, createMipsMCAsmInfo);
+ RegisterMCAsmInfoFn B(TheMips64elTarget, createMipsMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheMipsTarget,
+ createMipsMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheMipselTarget,
+ createMipsMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheMips64Target,
+ createMipsMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheMips64elTarget,
+ createMipsMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheMipsTarget, createMipsMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheMipselTarget, createMipsMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheMips64Target, createMipsMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheMips64elTarget, createMipsMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheMipsTarget, createMipsMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheMipselTarget, createMipsMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheMips64Target, createMipsMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheMips64elTarget,
+ createMipsMCRegisterInfo);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterMCCodeEmitter(TheMipsTarget, createMipsMCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(TheMipselTarget,
+ createMipsMCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(TheMips64Target,
+ createMipsMCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(TheMips64elTarget,
+ createMipsMCCodeEmitter);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterMCObjectStreamer(TheMipsTarget, createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(TheMipselTarget, createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(TheMips64Target, createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(TheMips64elTarget, createMCStreamer);
+
+ // Register the asm backend.
+ TargetRegistry::RegisterMCAsmBackend(TheMipsTarget, createMipsAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(TheMipselTarget, createMipsAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(TheMips64Target, createMipsAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(TheMips64elTarget, createMipsAsmBackend);
+
+ // Register the MC subtarget info.
TargetRegistry::RegisterMCSubtargetInfo(TheMipsTarget,
createMipsMCSubtargetInfo);
-}
+ TargetRegistry::RegisterMCSubtargetInfo(TheMipselTarget,
+ createMipsMCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(TheMips64Target,
+ createMipsMCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(TheMips64elTarget,
+ createMipsMCSubtargetInfo);
-extern "C" void LLVMInitializeMipsMCAsmInfo() {
- RegisterMCAsmInfo<MipsMCAsmInfo> X(TheMipsTarget);
- RegisterMCAsmInfo<MipsMCAsmInfo> Y(TheMipselTarget);
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(TheMipsTarget,
+ createMipsMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(TheMipselTarget,
+ createMipsMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(TheMips64Target,
+ createMipsMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(TheMips64elTarget,
+ createMipsMCInstPrinter);
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
index 3d18f11..7a0042a 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -1,4 +1,4 @@
-//===-- AlphaMCTargetDesc.h - Alpha Target Descriptions ---------*- C++ -*-===//
+//===-- MipsMCTargetDesc.h - Mips Target Descriptions -----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,21 +7,32 @@
//
//===----------------------------------------------------------------------===//
//
-// This file provides Alpha specific target descriptions.
+// This file provides Mips specific target descriptions.
//
//===----------------------------------------------------------------------===//
-#ifndef ALPHAMCTARGETDESC_H
-#define ALPHAMCTARGETDESC_H
+#ifndef MIPSMCTARGETDESC_H
+#define MIPSMCTARGETDESC_H
namespace llvm {
+class MCAsmBackend;
+class MCInstrInfo;
+class MCCodeEmitter;
+class MCContext;
class MCSubtargetInfo;
-class Target;
class StringRef;
+class Target;
extern Target TheMipsTarget;
extern Target TheMipselTarget;
+extern Target TheMips64Target;
+extern Target TheMips64elTarget;
+
+MCCodeEmitter *createMipsMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+MCAsmBackend *createMipsAsmBackend(const Target &T, StringRef TT);
} // End llvm namespace
// Defines symbolic names for Mips registers. This defines a mapping from
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index 984b5ad..bacecf2 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -29,6 +29,9 @@ namespace llvm {
FunctionPass *createMipsExpandPseudoPass(MipsTargetMachine &TM);
FunctionPass *createMipsEmitGPRestorePass(MipsTargetMachine &TM);
+ FunctionPass *createMipsJITCodeEmitterPass(MipsTargetMachine &TM,
+ JITCodeEmitter &JCE);
+
} // end namespace llvm;
#endif
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 433cd57..39c2c16 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -38,6 +38,10 @@ def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat",
"true", "Only supports single precision float">;
def FeatureO32 : SubtargetFeature<"o32", "MipsABI", "O32",
"Enable o32 ABI">;
+def FeatureN32 : SubtargetFeature<"n32", "MipsABI", "N32",
+ "Enable n32 ABI">;
+def FeatureN64 : SubtargetFeature<"n64", "MipsABI", "N64",
+ "Enable n64 ABI">;
def FeatureEABI : SubtargetFeature<"eabi", "MipsABI", "EABI",
"Enable eabi ABI">;
def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU",
@@ -54,16 +58,19 @@ def FeatureSwap : SubtargetFeature<"swap", "HasSwap", "true",
"Enable 'byte/half swap' instructions.">;
def FeatureBitCount : SubtargetFeature<"bitcount", "HasBitCount", "true",
"Enable 'count leading bits' instructions.">;
-def FeatureMips1 : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1",
- "Mips1 ISA Support">;
-def FeatureMips2 : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2",
- "Mips2 ISA Support">;
def FeatureMips32 : SubtargetFeature<"mips32", "MipsArchVersion", "Mips32",
"Mips32 ISA Support",
[FeatureCondMov, FeatureBitCount]>;
def FeatureMips32r2 : SubtargetFeature<"mips32r2", "MipsArchVersion",
"Mips32r2", "Mips32r2 ISA Support",
[FeatureMips32, FeatureSEInReg]>;
+def FeatureMips64 : SubtargetFeature<"mips64", "MipsArchVersion",
+ "Mips64", "Mips64 ISA Support",
+ [FeatureGP64Bit, FeatureFP64Bit,
+ FeatureMips32]>;
+def FeatureMips64r2 : SubtargetFeature<"mips64r2", "MipsArchVersion",
+ "Mips64r2", "Mips64r2 ISA Support",
+ [FeatureMips64, FeatureMips32r2]>;
//===----------------------------------------------------------------------===//
// Mips processors supported.
@@ -72,21 +79,10 @@ def FeatureMips32r2 : SubtargetFeature<"mips32r2", "MipsArchVersion",
class Proc<string Name, list<SubtargetFeature> Features>
: Processor<Name, MipsGenericItineraries, Features>;
-def : Proc<"mips1", [FeatureMips1]>;
-def : Proc<"r2000", [FeatureMips1]>;
-def : Proc<"r3000", [FeatureMips1]>;
-
-def : Proc<"mips2", [FeatureMips2]>;
-def : Proc<"r6000", [FeatureMips2]>;
-
+def : Proc<"mips32r1", [FeatureMips32]>;
def : Proc<"4ke", [FeatureMips32r2]>;
-
-// Allegrex is a 32bit subset of r4000, both for integer and fp registers,
-// but much more similar to Mips2 than Mips3. It also contains some of
-// Mips32/Mips32r2 instructions and a custom vector fpu processor.
-def : Proc<"allegrex", [FeatureMips2, FeatureSingleFloat, FeatureEABI,
- FeatureVFPU, FeatureSEInReg, FeatureCondMov, FeatureMulDivAdd,
- FeatureMinMax, FeatureSwap, FeatureBitCount]>;
+def : Proc<"mips64r1", [FeatureMips64]>;
+def : Proc<"mips64r2", [FeatureMips64r2]>;
def MipsAsmWriter : AsmWriter {
string AsmWriterClassName = "InstPrinter";
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
new file mode 100644
index 0000000..49b0223
--- /dev/null
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -0,0 +1,214 @@
+//===- Mips64InstrInfo.td - Mips64 Instruction Information -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes Mips64 instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Mips Operand, Complex Patterns and Transformations Definitions.
+//===----------------------------------------------------------------------===//
+
+// Instruction operand types
+def shamt_64 : Operand<i64>;
+
+// Unsigned Operand
+def uimm16_64 : Operand<i64> {
+ let PrintMethod = "printUnsignedImm";
+}
+
+// Transformation Function - get Imm - 32.
+def Subtract32 : SDNodeXForm<imm, [{
+ return getI32Imm((unsigned)N->getZExtValue() - 32);
+}]>;
+
+// imm32_63 predicate - True if imm is in range [32, 63].
+def imm32_63 : ImmLeaf<i64,
+ [{return (int32_t)Imm >= 32 && (int32_t)Imm < 64;}],
+ Subtract32>;
+
+//===----------------------------------------------------------------------===//
+// Instructions specific format
+//===----------------------------------------------------------------------===//
+// Shifts
+class LogicR_shift_rotate_imm64<bits<6> func, bits<5> _rs, string instr_asm,
+ SDNode OpNode, PatFrag PF>:
+ FR<0x00, func, (outs CPU64Regs:$dst), (ins CPU64Regs:$b, shamt_64:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPU64Regs:$dst, (OpNode CPU64Regs:$b, (i64 PF:$c)))],
+ IIAlu> {
+ let rs = _rs;
+}
+
+class LogicR_shift_rotate_reg64<bits<6> func, bits<5> _shamt, string instr_asm,
+ SDNode OpNode>:
+ FR<0x00, func, (outs CPU64Regs:$dst), (ins CPU64Regs:$c, CPU64Regs:$b),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPU64Regs:$dst, (OpNode CPU64Regs:$b, CPU64Regs:$c))], IIAlu> {
+ let shamt = _shamt;
+}
+
+// Mul, Div
+let Defs = [HI64, LO64] in {
+ let isCommutable = 1 in
+ class Mul64<bits<6> func, string instr_asm, InstrItinClass itin>:
+ FR<0x00, func, (outs), (ins CPU64Regs:$a, CPU64Regs:$b),
+ !strconcat(instr_asm, "\t$a, $b"), [], itin>;
+
+ class Div64<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>:
+ FR<0x00, func, (outs), (ins CPU64Regs:$a, CPU64Regs:$b),
+ !strconcat(instr_asm, "\t$$zero, $a, $b"),
+ [(op CPU64Regs:$a, CPU64Regs:$b)], itin>;
+}
+
+// Move from Hi/Lo
+let shamt = 0 in {
+let rs = 0, rt = 0 in
+class MoveFromLOHI64<bits<6> func, string instr_asm>:
+ FR<0x00, func, (outs CPU64Regs:$dst), (ins),
+ !strconcat(instr_asm, "\t$dst"), [], IIHiLo>;
+
+let rt = 0, rd = 0 in
+class MoveToLOHI64<bits<6> func, string instr_asm>:
+ FR<0x00, func, (outs), (ins CPU64Regs:$src),
+ !strconcat(instr_asm, "\t$src"), [], IIHiLo>;
+}
+
+// Count Leading Ones/Zeros in Word
+class CountLeading64<bits<6> func, string instr_asm, list<dag> pattern>:
+ FR<0x1c, func, (outs CPU64Regs:$dst), (ins CPU64Regs:$src),
+ !strconcat(instr_asm, "\t$dst, $src"), pattern, IIAlu>,
+ Requires<[HasBitCount]> {
+ let shamt = 0;
+ let rt = rd;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction definition
+//===----------------------------------------------------------------------===//
+
+/// Arithmetic Instructions (ALU Immediate)
+def DADDiu : ArithLogicI<0x19, "daddiu", add, simm16_64, immSExt16,
+ CPU64Regs>;
+def DANDi : ArithLogicI<0x0c, "andi", and, uimm16_64, immZExt16, CPU64Regs>;
+def SLTi64 : SetCC_I<0x0a, "slti", setlt, simm16_64, immSExt16, CPU64Regs>;
+def SLTiu64 : SetCC_I<0x0b, "sltiu", setult, simm16_64, immSExt16, CPU64Regs>;
+def ORi64 : ArithLogicI<0x0d, "ori", or, uimm16_64, immZExt16, CPU64Regs>;
+def XORi64 : ArithLogicI<0x0e, "xori", xor, uimm16_64, immZExt16, CPU64Regs>;
+
+/// Arithmetic Instructions (3-Operand, R-Type)
+def DADDu : ArithLogicR<0x00, 0x2d, "daddu", add, IIAlu, CPU64Regs, 1>;
+def DSUBu : ArithLogicR<0x00, 0x2f, "dsubu", sub, IIAlu, CPU64Regs>;
+def SLT64 : SetCC_R<0x00, 0x2a, "slt", setlt, CPU64Regs>;
+def SLTu64 : SetCC_R<0x00, 0x2b, "sltu", setult, CPU64Regs>;
+def AND64 : ArithLogicR<0x00, 0x24, "and", and, IIAlu, CPU64Regs, 1>;
+def OR64 : ArithLogicR<0x00, 0x25, "or", or, IIAlu, CPU64Regs, 1>;
+def XOR64 : ArithLogicR<0x00, 0x26, "xor", xor, IIAlu, CPU64Regs, 1>;
+def NOR64 : LogicNOR<0x00, 0x27, "nor", CPU64Regs>;
+
+/// Shift Instructions
+def DSLL : LogicR_shift_rotate_imm64<0x38, 0x00, "dsll", shl, immZExt5>;
+def DSRL : LogicR_shift_rotate_imm64<0x3a, 0x00, "dsrl", srl, immZExt5>;
+def DSRA : LogicR_shift_rotate_imm64<0x3b, 0x00, "dsra", sra, immZExt5>;
+def DSLL32 : LogicR_shift_rotate_imm64<0x3c, 0x00, "dsll32", shl, imm32_63>;
+def DSRL32 : LogicR_shift_rotate_imm64<0x3e, 0x00, "dsrl32", srl, imm32_63>;
+def DSRA32 : LogicR_shift_rotate_imm64<0x3f, 0x00, "dsra32", sra, imm32_63>;
+def DSLLV : LogicR_shift_rotate_reg64<0x24, 0x00, "dsllv", shl>;
+def DSRLV : LogicR_shift_rotate_reg64<0x26, 0x00, "dsrlv", srl>;
+def DSRAV : LogicR_shift_rotate_reg64<0x27, 0x00, "dsrav", sra>;
+
+// Rotate Instructions
+let Predicates = [HasMips64r2] in {
+ def DROTR : LogicR_shift_rotate_imm64<0x3a, 0x01, "drotr", rotr, immZExt5>;
+ def DROTR32 : LogicR_shift_rotate_imm64<0x3e, 0x01, "drotr32", rotr,
+ imm32_63>;
+ def DROTRV : LogicR_shift_rotate_reg64<0x16, 0x01, "drotrv", rotr>;
+}
+
+/// Load and Store Instructions
+/// aligned
+defm LB64 : LoadM64<0x20, "lb", sextloadi8>;
+defm LBu64 : LoadM64<0x24, "lbu", zextloadi8>;
+defm LH64 : LoadM64<0x21, "lh", sextloadi16_a>;
+defm LHu64 : LoadM64<0x25, "lhu", zextloadi16_a>;
+defm LW64 : LoadM64<0x23, "lw", sextloadi32_a>;
+defm LWu64 : LoadM64<0x27, "lwu", zextloadi32_a>;
+defm SB64 : StoreM64<0x28, "sb", truncstorei8>;
+defm SH64 : StoreM64<0x29, "sh", truncstorei16_a>;
+defm SW64 : StoreM64<0x2b, "sw", truncstorei32_a>;
+defm LD : LoadM64<0x37, "ld", load_a>;
+defm SD : StoreM64<0x3f, "sd", store_a>;
+
+/// unaligned
+defm ULH64 : LoadM64<0x21, "ulh", sextloadi16_u, 1>;
+defm ULHu64 : LoadM64<0x25, "ulhu", zextloadi16_u, 1>;
+defm ULW64 : LoadM64<0x23, "ulw", sextloadi32_u, 1>;
+defm USH64 : StoreM64<0x29, "ush", truncstorei16_u, 1>;
+defm USW64 : StoreM64<0x2b, "usw", truncstorei32_u, 1>;
+defm ULD : LoadM64<0x37, "uld", load_u, 1>;
+defm USD : StoreM64<0x3f, "usd", store_u, 1>;
+
+/// Jump and Branch Instructions
+def BEQ64 : CBranch<0x04, "beq", seteq, CPU64Regs>;
+def BNE64 : CBranch<0x05, "bne", setne, CPU64Regs>;
+def BGEZ64 : CBranchZero<0x01, 1, "bgez", setge, CPU64Regs>;
+def BGTZ64 : CBranchZero<0x07, 0, "bgtz", setgt, CPU64Regs>;
+def BLEZ64 : CBranchZero<0x07, 0, "blez", setle, CPU64Regs>;
+def BLTZ64 : CBranchZero<0x01, 0, "bltz", setlt, CPU64Regs>;
+
+/// Multiply and Divide Instructions.
+def DMULT : Mul64<0x1c, "dmult", IIImul>;
+def DMULTu : Mul64<0x1d, "dmultu", IIImul>;
+def DSDIV : Div64<MipsDivRem, 0x1e, "ddiv", IIIdiv>;
+def DUDIV : Div64<MipsDivRemU, 0x1f, "ddivu", IIIdiv>;
+
+let Defs = [HI64] in
+ def MTHI64 : MoveToLOHI64<0x11, "mthi">;
+let Defs = [LO64] in
+ def MTLO64 : MoveToLOHI64<0x13, "mtlo">;
+
+let Uses = [HI64] in
+ def MFHI64 : MoveFromLOHI64<0x10, "mfhi">;
+let Uses = [LO64] in
+ def MFLO64 : MoveFromLOHI64<0x12, "mflo">;
+
+/// Count Leading
+def DCLZ : CountLeading64<0x24, "dclz",
+ [(set CPU64Regs:$dst, (ctlz CPU64Regs:$src))]>;
+def DCLO : CountLeading64<0x25, "dclo",
+ [(set CPU64Regs:$dst, (ctlz (not CPU64Regs:$src)))]>;
+
+//===----------------------------------------------------------------------===//
+// Arbitrary patterns that map to one or more instructions
+//===----------------------------------------------------------------------===//
+
+// Small immediates
+def : Pat<(i64 immSExt16:$in),
+ (DADDiu ZERO_64, imm:$in)>;
+def : Pat<(i64 immZExt16:$in),
+ (ORi64 ZERO_64, imm:$in)>;
+
+// zextloadi32_u
+def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64_P8 addr:$a), 32), 32)>,
+ Requires<[IsN64]>;
+def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64 addr:$a), 32), 32)>,
+ Requires<[NotN64]>;
+
+// hi/lo relocs
+def : Pat<(i64 (MipsLo tglobaladdr:$in)), (DADDiu ZERO_64, tglobaladdr:$in)>;
+
+defm : BrcondPats<CPU64Regs, BEQ64, BNE64, SLT64, SLTu64, SLTi64, SLTiu64,
+ ZERO_64>;
+
+// setcc patterns
+defm : SeteqPats<CPU64Regs, SLTiu64, XOR64, SLTu64, ZERO_64>;
+defm : SetlePats<CPU64Regs, SLT64, SLTu64>;
+defm : SetgtPats<CPU64Regs, SLT64, SLTu64>;
+defm : SetgePats<CPU64Regs, SLT64, SLTu64>;
+defm : SetgeImmPats<CPU64Regs, SLTi64, SLTiu64>;
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 69e03bd..0e82681 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -18,6 +18,7 @@
#include "MipsInstrInfo.h"
#include "MipsMachineFunction.h"
#include "MipsMCInstLower.h"
+#include "MipsMCSymbolRefExpr.h"
#include "InstPrinter/MipsInstPrinter.h"
#include "llvm/BasicBlock.h"
#include "llvm/Instructions.h"
@@ -25,6 +26,7 @@
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
@@ -33,15 +35,22 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Analysis/DebugInfo.h"
using namespace llvm;
+static bool isUnalignedLoadStore(unsigned Opc) {
+ return Opc == Mips::ULW || Opc == Mips::ULH || Opc == Mips::ULHu ||
+ Opc == Mips::USW || Opc == Mips::USH ||
+ Opc == Mips::ULW_P8 || Opc == Mips::ULH_P8 || Opc == Mips::ULHu_P8 ||
+ Opc == Mips::USW_P8 || Opc == Mips::USH_P8;
+}
+
void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallString<128> Str;
raw_svector_ostream OS(Str);
@@ -52,8 +61,21 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
MipsMCInstLower MCInstLowering(Mang, *MF, *this);
+ unsigned Opc = MI->getOpcode();
MCInst TmpInst0;
MCInstLowering.Lower(MI, TmpInst0);
+
+ // Enclose unaligned load or store with .macro & .nomacro directives.
+ if (isUnalignedLoadStore(Opc)) {
+ MCInst Directive;
+ Directive.setOpcode(Mips::MACRO);
+ OutStreamer.EmitInstruction(Directive);
+ OutStreamer.EmitInstruction(TmpInst0);
+ Directive.setOpcode(Mips::NOMACRO);
+ OutStreamer.EmitInstruction(Directive);
+ return;
+ }
+
OutStreamer.EmitInstruction(TmpInst0);
}
@@ -180,7 +202,6 @@ void MipsAsmPrinter::emitFrameDirective() {
const char *MipsAsmPrinter::getCurrentABIString() const {
switch (Subtarget->getTargetABI()) {
case MipsSubtarget::O32: return "abi32";
- case MipsSubtarget::O64: return "abiO64";
case MipsSubtarget::N32: return "abiN32";
case MipsSubtarget::N64: return "abi64";
case MipsSubtarget::EABI: return "eabi32"; // TODO: handle eabi64
@@ -304,6 +325,11 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
case MipsII::MO_GOTTPREL: O << "%gottprel("; break;
case MipsII::MO_TPREL_HI: O << "%tprel_hi("; break;
case MipsII::MO_TPREL_LO: O << "%tprel_lo("; break;
+ case MipsII::MO_GPOFF_HI: O << "%hi(%neg(%gp_rel("; break;
+ case MipsII::MO_GPOFF_LO: O << "%lo(%neg(%gp_rel("; break;
+ case MipsII::MO_GOT_DISP: O << "%got_disp("; break;
+ case MipsII::MO_GOT_PAGE: O << "%got_page("; break;
+ case MipsII::MO_GOT_OFST: O << "%got_ofst("; break;
}
switch (MO.getType()) {
@@ -424,17 +450,9 @@ void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
}
// Force static initialization.
-static MCInstPrinter *createMipsMCInstPrinter(const Target &T,
- unsigned SyntaxVariant,
- const MCAsmInfo &MAI) {
- return new MipsInstPrinter(MAI);
-}
-
extern "C" void LLVMInitializeMipsAsmPrinter() {
RegisterAsmPrinter<MipsAsmPrinter> X(TheMipsTarget);
RegisterAsmPrinter<MipsAsmPrinter> Y(TheMipselTarget);
-
- TargetRegistry::RegisterMCInstPrinter(TheMipsTarget, createMipsMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheMipselTarget,
- createMipsMCInstPrinter);
+ RegisterAsmPrinter<MipsAsmPrinter> A(TheMips64Target);
+ RegisterAsmPrinter<MipsAsmPrinter> B(TheMips64elTarget);
}
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index 876f0fc..0ae4ef6 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -31,6 +31,55 @@ def RetCC_MipsO32 : CallingConv<[
]>;
//===----------------------------------------------------------------------===//
+// Mips N32/64 Calling Convention
+//===----------------------------------------------------------------------===//
+
+def CC_MipsN : CallingConv<[
+ // FIXME: Handle byval, complex and float double parameters.
+
+ // Promote i8/i16/i32 arguments to i64.
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+ // Integer arguments are passed in integer registers.
+ CCIfType<[i64], CCAssignToRegWithShadow<[A0_64, A1_64, A2_64, A3_64,
+ T0_64, T1_64, T2_64, T3_64],
+ [D12_64, D13_64, D14_64, D15_64,
+ D16_64, D17_64, D18_64, D19_64]>>,
+
+ // f32 arguments are passed in single precision FP registers.
+ CCIfType<[f32], CCAssignToRegWithShadow<[F12, F13, F14, F15,
+ F16, F17, F18, F19],
+ [A0_64, A1_64, A2_64, A3_64,
+ T0_64, T1_64, T2_64, T3_64]>>,
+
+ // f64 arguments are passed in double precision FP registers.
+ CCIfType<[f64], CCAssignToRegWithShadow<[D12_64, D13_64, D14_64, D15_64,
+ D16_64, D17_64, D18_64, D19_64],
+ [A0_64, A1_64, A2_64, A3_64,
+ T0_64, T1_64, T2_64, T3_64]>>,
+
+ // All stack parameter slots become 64-bit doublewords and are 8-byte aligned.
+ CCIfType<[i64, f64], CCAssignToStack<8, 8>>,
+ CCIfType<[f32], CCAssignToStack<4, 8>>
+]>;
+
+def RetCC_MipsN : CallingConv<[
+ // FIXME: Handle complex and float double return values.
+
+ // i32 are returned in registers V0, V1
+ CCIfType<[i32], CCAssignToReg<[V0, V1]>>,
+
+ // i64 are returned in registers V0_64, V1_64
+ CCIfType<[i64], CCAssignToReg<[V0_64, V1_64]>>,
+
+ // f32 are returned in registers F0, F2
+ CCIfType<[f32], CCAssignToReg<[F0, F2]>>,
+
+ // f64 are returned in registers D0, D2
+ CCIfType<[f64], CCAssignToReg<[D0_64, D2_64]>>
+]>;
+
+//===----------------------------------------------------------------------===//
// Mips EABI Calling Convention
//===----------------------------------------------------------------------===//
@@ -77,10 +126,14 @@ def RetCC_MipsEABI : CallingConv<[
//===----------------------------------------------------------------------===//
def CC_Mips : CallingConv<[
- CCIfSubtarget<"isABI_EABI()", CCDelegateTo<CC_MipsEABI>>
+ CCIfSubtarget<"isABI_EABI()", CCDelegateTo<CC_MipsEABI>>,
+ CCIfSubtarget<"isABI_N32()", CCDelegateTo<CC_MipsN>>,
+ CCIfSubtarget<"isABI_N64()", CCDelegateTo<CC_MipsN>>
]>;
def RetCC_Mips : CallingConv<[
CCIfSubtarget<"isABI_EABI()", CCDelegateTo<RetCC_MipsEABI>>,
+ CCIfSubtarget<"isABI_N32()", CCDelegateTo<RetCC_MipsN>>,
+ CCIfSubtarget<"isABI_N64()", CCDelegateTo<RetCC_MipsN>>,
CCDelegateTo<RetCC_MipsO32>
]>;
diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp
new file mode 100644
index 0000000..9220d9c
--- /dev/null
+++ b/lib/Target/Mips/MipsCodeEmitter.cpp
@@ -0,0 +1,245 @@
+//===-- Mips/MipsCodeEmitter.cpp - Convert Mips code to machine code -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// This file contains the pass that transforms the Mips machine instructions
+// into relocatable machine code.
+//
+//===---------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "Mips.h"
+#include "MipsInstrInfo.h"
+#include "MipsRelocations.h"
+#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#ifndef NDEBUG
+#include <iomanip>
+#endif
+
+#include "llvm/CodeGen/MachineOperand.h"
+
+using namespace llvm;
+
+STATISTIC(NumEmitted, "Number of machine instructions emitted");
+
+namespace {
+
+class MipsCodeEmitter : public MachineFunctionPass {
+ MipsJITInfo *JTI;
+ const MipsInstrInfo *II;
+ const TargetData *TD;
+ const MipsSubtarget *Subtarget;
+ TargetMachine &TM;
+ JITCodeEmitter &MCE;
+ const std::vector<MachineConstantPoolEntry> *MCPEs;
+ const std::vector<MachineJumpTableEntry> *MJTEs;
+ bool IsPIC;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineModuleInfo> ();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ static char ID;
+
+ public:
+ MipsCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) :
+ MachineFunctionPass(ID), JTI(0),
+ II((const MipsInstrInfo *) tm.getInstrInfo()),
+ TD(tm.getTargetData()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_) {
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "Mips Machine Code Emitter";
+ }
+
+ /// getBinaryCodeForInstr - This function, generated by the
+ /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+ /// machine instructions.
+ unsigned getBinaryCodeForInstr(const MachineInstr &MI) const;
+
+ void emitInstruction(const MachineInstr &MI);
+
+ private:
+
+ void emitWordLE(unsigned Word);
+
+ /// Routines that handle operands which add machine relocations which are
+ /// fixed up by the relocation stage.
+ void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
+ bool MayNeedFarStub) const;
+ void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const;
+ void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const;
+ void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
+ void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc) const;
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const;
+
+ unsigned getRelocation(const MachineInstr &MI,
+ const MachineOperand &MO) const;
+
+ };
+}
+
+char MipsCodeEmitter::ID = 0;
+
+bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
+ JTI = ((MipsTargetMachine&) MF.getTarget()).getJITInfo();
+ II = ((const MipsTargetMachine&) MF.getTarget()).getInstrInfo();
+ TD = ((const MipsTargetMachine&) MF.getTarget()).getTargetData();
+ Subtarget = &TM.getSubtarget<MipsSubtarget> ();
+ MCPEs = &MF.getConstantPool()->getConstants();
+ MJTEs = 0;
+ if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables();
+ JTI->Initialize(MF, IsPIC);
+ MCE.setModuleInfo(&getAnalysis<MachineModuleInfo> ());
+
+ do {
+ DEBUG(errs() << "JITTing function '"
+ << MF.getFunction()->getName() << "'\n");
+ MCE.startFunction(MF);
+
+ for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
+ MBB != E; ++MBB){
+ MCE.StartMachineBasicBlock(MBB);
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I)
+ emitInstruction(*I);
+ }
+ } while (MCE.finishFunction(MF));
+
+ return false;
+}
+
+unsigned MipsCodeEmitter::getRelocation(const MachineInstr &MI,
+ const MachineOperand &MO) const {
+ // NOTE: This relocations are for static.
+ uint64_t TSFlags = MI.getDesc().TSFlags;
+ uint64_t Form = TSFlags & MipsII::FormMask;
+ if (Form == MipsII::FrmJ)
+ return Mips::reloc_mips_26;
+ if ((Form == MipsII::FrmI || Form == MipsII::FrmFI)
+ && MI.getDesc().isBranch())
+ return Mips::reloc_mips_branch;
+ if (Form == MipsII::FrmI && MI.getOpcode() == Mips::LUi)
+ return Mips::reloc_mips_hi;
+ return Mips::reloc_mips_lo;
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const {
+ if (MO.isReg())
+ return MipsRegisterInfo::getRegisterNumbering(MO.getReg());
+ else if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+ else if (MO.isGlobal())
+ emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO), true);
+ else if (MO.isSymbol())
+ emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO));
+ else if (MO.isCPI())
+ emitConstPoolAddress(MO.getIndex(), getRelocation(MI, MO));
+ else if (MO.isJTI())
+ emitJumpTableAddress(MO.getIndex(), getRelocation(MI, MO));
+ else if (MO.isMBB())
+ emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO));
+ else
+ llvm_unreachable("Unable to encode MachineOperand!");
+ return 0;
+}
+
+void MipsCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
+ bool MayNeedFarStub) const {
+ MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+ const_cast<GlobalValue *>(GV), 0, MayNeedFarStub));
+}
+
+void MipsCodeEmitter::
+emitExternalSymbolAddress(const char *ES, unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+ Reloc, ES, 0, 0, false));
+}
+
+void MipsCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ Reloc, CPI, 0, false));
+}
+
+void MipsCodeEmitter::
+emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+ Reloc, JTIndex, 0, false));
+}
+
+void MipsCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB,
+ unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+ Reloc, BB));
+}
+
+void MipsCodeEmitter::emitInstruction(const MachineInstr &MI) {
+ DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI);
+
+ MCE.processDebugLoc(MI.getDebugLoc(), true);
+
+ // Skip pseudo instructions.
+ if ((MI.getDesc().TSFlags & MipsII::FormMask) == MipsII::Pseudo)
+ return;
+
+ ++NumEmitted; // Keep track of the # of mi's emitted
+
+ switch (MI.getOpcode()) {
+ default:
+ emitWordLE(getBinaryCodeForInstr(MI));
+ break;
+ }
+
+ MCE.processDebugLoc(MI.getDebugLoc(), false);
+}
+
+void MipsCodeEmitter::emitWordLE(unsigned Word) {
+ DEBUG(errs() << " 0x";
+ errs().write_hex(Word) << "\n");
+ MCE.emitWordLE(Word);
+}
+
+/// createMipsJITCodeEmitterPass - Return a pass that emits the collected Mips
+/// code to the specified MCE object.
+FunctionPass *llvm::createMipsJITCodeEmitterPass(MipsTargetMachine &TM,
+ JITCodeEmitter &JCE) {
+ return new MipsCodeEmitter(TM, JCE);
+}
+
+unsigned MipsCodeEmitter::getBinaryCodeForInstr(const MachineInstr &MI) const {
+ // this function will be automatically generated by the CodeEmitterGenerator
+ // using TableGen
+ return 0;
+}
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index c3a6211..be3b7a0 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// Simple pass to fills delay slots with NOPs.
+// Simple pass to fills delay slots with useful instructions.
//
//===----------------------------------------------------------------------===//
@@ -17,18 +17,31 @@
#include "MipsTargetMachine.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
STATISTIC(FilledSlots, "Number of delay slots filled");
+STATISTIC(UsefulSlots, "Number of delay slots filled with instructions that"
+ " are not NOP.");
+
+static cl::opt<bool> EnableDelaySlotFiller(
+ "enable-mips-delay-filler",
+ cl::init(false),
+ cl::desc("Fill the Mips delay slots useful instructions."),
+ cl::Hidden);
namespace {
struct Filler : public MachineFunctionPass {
TargetMachine &TM;
const TargetInstrInfo *TII;
+ MachineBasicBlock::iterator LastFiller;
static char ID;
Filler(TargetMachine &tm)
@@ -47,31 +60,61 @@ namespace {
return Changed;
}
+ bool isDelayFiller(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator candidate);
+
+ void insertCallUses(MachineBasicBlock::iterator MI,
+ SmallSet<unsigned, 32>& RegDefs,
+ SmallSet<unsigned, 32>& RegUses);
+
+ void insertDefsUses(MachineBasicBlock::iterator MI,
+ SmallSet<unsigned, 32>& RegDefs,
+ SmallSet<unsigned, 32>& RegUses);
+
+ bool IsRegInSet(SmallSet<unsigned, 32>& RegSet,
+ unsigned Reg);
+
+ bool delayHasHazard(MachineBasicBlock::iterator candidate,
+ bool &sawLoad, bool &sawStore,
+ SmallSet<unsigned, 32> &RegDefs,
+ SmallSet<unsigned, 32> &RegUses);
+
+ bool
+ findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot,
+ MachineBasicBlock::iterator &Filler);
+
+
};
char Filler::ID = 0;
} // end of anonymous namespace
/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
-/// Currently, we fill delay slots with NOPs. We assume there is only one
-/// delay slot per delayed instruction.
+/// We assume there is only one delay slot per delayed instruction.
bool Filler::
-runOnMachineBasicBlock(MachineBasicBlock &MBB)
-{
+runOnMachineBasicBlock(MachineBasicBlock &MBB) {
bool Changed = false;
- for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
- const MCInstrDesc& MCid = I->getDesc();
- if (MCid.hasDelaySlot() &&
- (TM.getSubtarget<MipsSubtarget>().isMips1() ||
- MCid.isCall() || MCid.isBranch() || MCid.isReturn())) {
- MachineBasicBlock::iterator J = I;
- ++J;
- BuildMI(MBB, J, I->getDebugLoc(), TII->get(Mips::NOP));
+ LastFiller = MBB.end();
+
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+ if (I->getDesc().hasDelaySlot()) {
++FilledSlots;
Changed = true;
- }
- }
+ MachineBasicBlock::iterator D;
+
+ if (EnableDelaySlotFiller && findDelayInstr(MBB, I, D)) {
+ MBB.splice(llvm::next(I), &MBB, D);
+ ++UsefulSlots;
+ }
+ else
+ BuildMI(MBB, llvm::next(I), I->getDebugLoc(), TII->get(Mips::NOP));
+
+ // Record the filler instruction that filled the delay slot.
+ // The instruction after it will be visited in the next iteration.
+ LastFiller = ++I;
+ }
return Changed;
+
}
/// createMipsDelaySlotFillerPass - Returns a pass that fills in delay
@@ -80,3 +123,134 @@ FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) {
return new Filler(tm);
}
+bool Filler::findDelayInstr(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator slot,
+ MachineBasicBlock::iterator &Filler) {
+ SmallSet<unsigned, 32> RegDefs;
+ SmallSet<unsigned, 32> RegUses;
+
+ insertDefsUses(slot, RegDefs, RegUses);
+
+ bool sawLoad = false;
+ bool sawStore = false;
+
+ for (MachineBasicBlock::reverse_iterator I(slot); I != MBB.rend(); ++I) {
+ // skip debug value
+ if (I->isDebugValue())
+ continue;
+
+ // Convert to forward iterator.
+ MachineBasicBlock::iterator FI(llvm::next(I).base());
+
+ if (I->hasUnmodeledSideEffects()
+ || I->isInlineAsm()
+ || I->isLabel()
+ || FI == LastFiller
+ || I->getDesc().isPseudo()
+ //
+ // Should not allow:
+ // ERET, DERET or WAIT, PAUSE. Need to add these to instruction
+ // list. TBD.
+ )
+ break;
+
+ if (delayHasHazard(FI, sawLoad, sawStore, RegDefs, RegUses)) {
+ insertDefsUses(FI, RegDefs, RegUses);
+ continue;
+ }
+
+ Filler = FI;
+ return true;
+ }
+
+ return false;
+}
+
+bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
+ bool &sawLoad,
+ bool &sawStore,
+ SmallSet<unsigned, 32> &RegDefs,
+ SmallSet<unsigned, 32> &RegUses) {
+ if (candidate->isImplicitDef() || candidate->isKill())
+ return true;
+
+ MCInstrDesc MCID = candidate->getDesc();
+ // Loads or stores cannot be moved past a store to the delay slot
+ // and stores cannot be moved past a load.
+ if (MCID.mayLoad()) {
+ if (sawStore)
+ return true;
+ sawLoad = true;
+ }
+
+ if (MCID.mayStore()) {
+ if (sawStore)
+ return true;
+ sawStore = true;
+ if (sawLoad)
+ return true;
+ }
+
+ assert((!MCID.isCall() && !MCID.isReturn()) &&
+ "Cannot put calls or returns in delay slot.");
+
+ for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) {
+ const MachineOperand &MO = candidate->getOperand(i);
+ unsigned Reg;
+
+ if (!MO.isReg() || !(Reg = MO.getReg()))
+ continue; // skip
+
+ if (MO.isDef()) {
+ // check whether Reg is defined or used before delay slot.
+ if (IsRegInSet(RegDefs, Reg) || IsRegInSet(RegUses, Reg))
+ return true;
+ }
+ if (MO.isUse()) {
+ // check whether Reg is defined before delay slot.
+ if (IsRegInSet(RegDefs, Reg))
+ return true;
+ }
+ }
+ return false;
+}
+
+// Insert Defs and Uses of MI into the sets RegDefs and RegUses.
+void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
+ SmallSet<unsigned, 32>& RegDefs,
+ SmallSet<unsigned, 32>& RegUses) {
+ // If MI is a call or return, just examine the explicit non-variadic operands.
+ MCInstrDesc MCID = MI->getDesc();
+ unsigned e = MCID.isCall() || MCID.isReturn() ? MCID.getNumOperands() :
+ MI->getNumOperands();
+
+ // Add RA to RegDefs to prevent users of RA from going into delay slot.
+ if (MCID.isCall())
+ RegDefs.insert(Mips::RA);
+
+ for (unsigned i = 0; i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ unsigned Reg;
+
+ if (!MO.isReg() || !(Reg = MO.getReg()))
+ continue;
+
+ if (MO.isDef())
+ RegDefs.insert(Reg);
+ else if (MO.isUse())
+ RegUses.insert(Reg);
+ }
+}
+
+//returns true if the Reg or its alias is in the RegSet.
+bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg) {
+ if (RegSet.count(Reg))
+ return true;
+ // check Aliased Registers
+ for (const unsigned *Alias = TM.getRegisterInfo()->getAliasSet(Reg);
+ *Alias; ++Alias)
+ if (RegSet.count(*Alias))
+ return true;
+
+ return false;
+}
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index a0f90a0..22d1e47 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -254,9 +254,15 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
}
// Restore GP from the saved stack location
- if (MipsFI->needGPSaveRestore())
- BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE))
- .addImm(MFI->getObjectOffset(MipsFI->getGPFI()));
+ if (MipsFI->needGPSaveRestore()) {
+ unsigned Offset = MFI->getObjectOffset(MipsFI->getGPFI());
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE)).addImm(Offset);
+
+ if (Offset >= 0x8000) {
+ BuildMI(MBB, llvm::prior(MBBI), dl, TII.get(Mips::MACRO));
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO));
+ }
+ }
}
void MipsFrameLowering::emitEpilogue(MachineFunction &MF,
@@ -300,13 +306,6 @@ void MipsFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
-void
-MipsFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const {
- MachineLocation Dst(MachineLocation::VirtualFP);
- MachineLocation Src(Mips::SP, 0);
- Moves.push_back(MachineMove(0, Dst, Src));
-}
-
void MipsFrameLowering::
processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
index 78c78ee..c249756 100644
--- a/lib/Target/Mips/MipsFrameLowering.h
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -27,7 +27,8 @@ protected:
public:
explicit MipsFrameLowering(const MipsSubtarget &sti)
- : TargetFrameLowering(StackGrowsDown, 8, 0), STI(sti) {
+ : TargetFrameLowering(StackGrowsDown, sti.hasMips64() ? 16 : 8, 0),
+ STI(sti) {
}
bool targetHandlesStackFrameRounding() const;
@@ -39,8 +40,6 @@ public:
bool hasFP(const MachineFunction &MF) const;
- void getInitialFrameState(std::vector<MachineMove> &Moves) const;
-
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const;
};
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 90aaeb6..9c831ed 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -86,9 +86,6 @@ private:
// Complex Pattern.
bool SelectAddr(SDValue N, SDValue &Base, SDValue &Offset);
- SDNode *SelectLoadFp64(SDNode *N);
- SDNode *SelectStoreFp64(SDNode *N);
-
// getI32Imm - Return a target constant with the specified
// value, of type i32.
inline SDValue getI32Imm(unsigned Imm) {
@@ -114,17 +111,20 @@ SDNode *MipsDAGToDAGISel::getGlobalBaseReg() {
/// Used on Mips Load/Store instructions
bool MipsDAGToDAGISel::
SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
+ EVT ValTy = Addr.getValueType();
+ unsigned GPReg = ValTy == MVT::i32 ? Mips::GP : Mips::GP_64;
+
// if Address is FI, get the TargetFrameIndex.
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ Offset = CurDAG->getTargetConstant(0, ValTy);
return true;
}
// on PIC code Load GA
if (TM.getRelocationModel() == Reloc::PIC_) {
if (Addr.getOpcode() == MipsISD::WrapperPIC) {
- Base = CurDAG->getRegister(Mips::GP, MVT::i32);
+ Base = CurDAG->getRegister(GPReg, ValTy);
Offset = Addr.getOperand(0);
return true;
}
@@ -133,7 +133,7 @@ SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
Addr.getOpcode() == ISD::TargetGlobalAddress))
return false;
else if (Addr.getOpcode() == ISD::TargetGlobalTLSAddress) {
- Base = CurDAG->getRegister(Mips::GP, MVT::i32);
+ Base = CurDAG->getRegister(GPReg, ValTy);
Offset = Addr;
return true;
}
@@ -147,11 +147,11 @@ SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
// If the first operand is a FI, get the TargetFI Node
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
(Addr.getOperand(0)))
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
else
Base = Addr.getOperand(0);
- Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
return true;
}
}
@@ -180,134 +180,10 @@ SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
}
Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, ValTy);
return true;
}
-SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) {
- MVT::SimpleValueType NVT =
- N->getValueType(0).getSimpleVT().SimpleTy;
-
- if (!Subtarget.isMips1() || NVT != MVT::f64)
- return NULL;
-
- LoadSDNode *LN = cast<LoadSDNode>(N);
- if (LN->getExtensionType() != ISD::NON_EXTLOAD ||
- LN->getAddressingMode() != ISD::UNINDEXED)
- return NULL;
-
- SDValue Chain = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
- SDValue Offset0, Offset1, Base;
-
- if (!SelectAddr(N1, Base, Offset0) ||
- N1.getValueType() != MVT::i32)
- return NULL;
-
- MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
- MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
- DebugLoc dl = N->getDebugLoc();
-
- // The second load should start after for 4 bytes.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Offset0))
- Offset1 = CurDAG->getTargetConstant(C->getSExtValue()+4, MVT::i32);
- else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Offset0))
- Offset1 = CurDAG->getTargetConstantPool(CP->getConstVal(),
- MVT::i32,
- CP->getAlignment(),
- CP->getOffset()+4,
- CP->getTargetFlags());
- else
- return NULL;
-
- // Choose the offsets depending on the endianess
- if (TM.getTargetData()->isBigEndian())
- std::swap(Offset0, Offset1);
-
- // Instead of:
- // ldc $f0, X($3)
- // Generate:
- // lwc $f0, X($3)
- // lwc $f1, X+4($3)
- SDNode *LD0 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32,
- MVT::Other, Base, Offset0, Chain);
- SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, NVT), 0);
- SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl,
- MVT::f64, Undef, SDValue(LD0, 0));
-
- SDNode *LD1 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32,
- MVT::Other, Base, Offset1, SDValue(LD0, 1));
- SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl,
- MVT::f64, I0, SDValue(LD1, 0));
-
- ReplaceUses(SDValue(N, 0), I1);
- ReplaceUses(SDValue(N, 1), Chain);
- cast<MachineSDNode>(LD0)->setMemRefs(MemRefs0, MemRefs0 + 1);
- cast<MachineSDNode>(LD1)->setMemRefs(MemRefs0, MemRefs0 + 1);
- return I1.getNode();
-}
-
-SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) {
-
- if (!Subtarget.isMips1() ||
- N->getOperand(1).getValueType() != MVT::f64)
- return NULL;
-
- SDValue Chain = N->getOperand(0);
-
- StoreSDNode *SN = cast<StoreSDNode>(N);
- if (SN->isTruncatingStore() || SN->getAddressingMode() != ISD::UNINDEXED)
- return NULL;
-
- SDValue N1 = N->getOperand(1);
- SDValue N2 = N->getOperand(2);
- SDValue Offset0, Offset1, Base;
-
- if (!SelectAddr(N2, Base, Offset0) ||
- N1.getValueType() != MVT::f64 ||
- N2.getValueType() != MVT::i32)
- return NULL;
-
- MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
- MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
- DebugLoc dl = N->getDebugLoc();
-
- // Get the even and odd part from the f64 register
- SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::sub_fpodd,
- dl, MVT::f32, N1);
- SDValue FPEven = CurDAG->getTargetExtractSubreg(Mips::sub_fpeven,
- dl, MVT::f32, N1);
-
- // The second store should start after for 4 bytes.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Offset0))
- Offset1 = CurDAG->getTargetConstant(C->getSExtValue()+4, MVT::i32);
- else
- return NULL;
-
- // Choose the offsets depending on the endianess
- if (TM.getTargetData()->isBigEndian())
- std::swap(Offset0, Offset1);
-
- // Instead of:
- // sdc $f0, X($3)
- // Generate:
- // swc $f0, X($3)
- // swc $f1, X+4($3)
- SDValue Ops0[] = { FPEven, Base, Offset0, Chain };
- Chain = SDValue(CurDAG->getMachineNode(Mips::SWC1, dl,
- MVT::Other, Ops0, 4), 0);
- cast<MachineSDNode>(Chain.getNode())->setMemRefs(MemRefs0, MemRefs0 + 1);
-
- SDValue Ops1[] = { FPOdd, Base, Offset1, Chain };
- Chain = SDValue(CurDAG->getMachineNode(Mips::SWC1, dl,
- MVT::Other, Ops1, 4), 0);
- cast<MachineSDNode>(Chain.getNode())->setMemRefs(MemRefs0, MemRefs0 + 1);
-
- ReplaceUses(SDValue(N, 0), Chain);
- return Chain.getNode();
-}
-
/// Select instructions not customized! Used for
/// expanded, promoted and normal instructions
SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
@@ -364,6 +240,8 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
/// Mul with two results
case ISD::SMUL_LOHI:
case ISD::UMUL_LOHI: {
+ assert(Node->getValueType(0) != MVT::i64 &&
+ "64-bit multiplication with two results not handled.");
SDValue Op1 = Node->getOperand(0);
SDValue Op2 = Node->getOperand(1);
@@ -389,21 +267,29 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
/// Special Muls
case ISD::MUL:
- if (Subtarget.isMips32())
+ // Mips32 has a 32-bit three operand mul instruction.
+ if (Subtarget.hasMips32() && Node->getValueType(0) == MVT::i32)
break;
case ISD::MULHS:
case ISD::MULHU: {
+ assert((Opcode == ISD::MUL || Node->getValueType(0) != MVT::i64) &&
+ "64-bit MULH* not handled.");
+ EVT Ty = Node->getValueType(0);
SDValue MulOp1 = Node->getOperand(0);
SDValue MulOp2 = Node->getOperand(1);
- unsigned MulOp = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
+ unsigned MulOp = (Opcode == ISD::MULHU ?
+ Mips::MULTu :
+ (Ty == MVT::i32 ? Mips::MULT : Mips::DMULT));
SDNode *MulNode = CurDAG->getMachineNode(MulOp, dl,
MVT::Glue, MulOp1, MulOp2);
SDValue InFlag = SDValue(MulNode, 0);
- if (Opcode == ISD::MUL)
- return CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32, InFlag);
+ if (Opcode == ISD::MUL) {
+ unsigned Opc = (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64);
+ return CurDAG->getMachineNode(Opc, dl, Ty, InFlag);
+ }
else
return CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag);
}
@@ -417,31 +303,12 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
Mips::ZERO, MVT::i32);
- SDValue Undef = SDValue(
- CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::f64), 0);
- SDNode *MTC = CurDAG->getMachineNode(Mips::MTC1, dl, MVT::f32, Zero);
- SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl,
- MVT::f64, Undef, SDValue(MTC, 0));
- SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl,
- MVT::f64, I0, SDValue(MTC, 0));
- ReplaceUses(SDValue(Node, 0), I1);
- return I1.getNode();
+ return CurDAG->getMachineNode(Mips::BuildPairF64, dl, MVT::f64, Zero,
+ Zero);
}
break;
}
- case ISD::LOAD:
- if (SDNode *ResNode = SelectLoadFp64(Node))
- return ResNode;
- // Other cases are autogenerated.
- break;
-
- case ISD::STORE:
- if (SDNode *ResNode = SelectStoreFp64(Node))
- return ResNode;
- // Other cases are autogenerated.
- break;
-
case MipsISD::ThreadPointer: {
unsigned SrcReg = Mips::HWR29;
unsigned DestReg = Mips::V1;
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index b4f4b1b..1932e74 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -35,6 +35,18 @@
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
+// If I is a shifted mask, set the size (Size) and the first bit of the
+// mask (Pos), and return true.
+// For example, if I is 0x003ff800, (Pos, Size) = (11, 11).
+static bool IsShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
+ if (!isUInt<32>(I) || !isShiftedMask_32(I))
+ return false;
+
+ Size = CountPopulation_32(I);
+ Pos = CountTrailingZeros_32(I);
+ return true;
+}
+
const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
case MipsISD::JmpLink: return "MipsISD::JmpLink";
@@ -61,27 +73,38 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64";
case MipsISD::WrapperPIC: return "MipsISD::WrapperPIC";
case MipsISD::DynAlloc: return "MipsISD::DynAlloc";
+ case MipsISD::Sync: return "MipsISD::Sync";
+ case MipsISD::Ext: return "MipsISD::Ext";
+ case MipsISD::Ins: return "MipsISD::Ins";
default: return NULL;
}
}
MipsTargetLowering::
MipsTargetLowering(MipsTargetMachine &TM)
- : TargetLowering(TM, new MipsTargetObjectFile()) {
- Subtarget = &TM.getSubtarget<MipsSubtarget>();
+ : TargetLowering(TM, new MipsTargetObjectFile()),
+ Subtarget(&TM.getSubtarget<MipsSubtarget>()),
+ HasMips64(Subtarget->hasMips64()), IsN64(Subtarget->isABI_N64()) {
// Mips does not have i1 type, so use i32 for
// setcc operations results (slt, sgt, ...).
setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
// Set up the register classes
addRegisterClass(MVT::i32, Mips::CPURegsRegisterClass);
addRegisterClass(MVT::f32, Mips::FGR32RegisterClass);
+ if (HasMips64)
+ addRegisterClass(MVT::i64, Mips::CPU64RegsRegisterClass);
+
// When dealing with single precision only, use libcalls
- if (!Subtarget->isSingleFloat())
- if (!Subtarget->isFP64bit())
+ if (!Subtarget->isSingleFloat()) {
+ if (HasMips64)
+ addRegisterClass(MVT::f64, Mips::FGR64RegisterClass);
+ else
addRegisterClass(MVT::f64, Mips::AFGR64RegisterClass);
+ }
// Load extented operations for i1 types must be promoted
setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
@@ -100,6 +123,7 @@ MipsTargetLowering(MipsTargetMachine &TM)
// Mips Custom Operations
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
setOperationAction(ISD::JumpTable, MVT::i32, Custom);
@@ -115,6 +139,10 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UDIV, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIV, MVT::i64, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::UDIV, MVT::i64, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
// Operations not directly supported by Mips.
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
@@ -126,10 +154,14 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
setOperationAction(ISD::CTTZ, MVT::i32, Expand);
setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL, MVT::i64, Expand);
- if (!Subtarget->isMips32r2())
+ if (!Subtarget->hasMips32r2())
setOperationAction(ISD::ROTR, MVT::i32, Expand);
+ if (!Subtarget->hasMips64r2())
+ setOperationAction(ISD::ROTR, MVT::i64, Expand);
+
setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
@@ -159,7 +191,14 @@ MipsTargetLowering(MipsTargetMachine &TM)
// Use the default for now
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
+
+ setInsertFencesForAtomic(true);
if (Subtarget->isSingleFloat())
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
@@ -180,6 +219,8 @@ MipsTargetLowering(MipsTargetMachine &TM)
setTargetDAGCombine(ISD::SDIVREM);
setTargetDAGCombine(ISD::UDIVREM);
setTargetDAGCombine(ISD::SETCC);
+ setTargetDAGCombine(ISD::AND);
+ setTargetDAGCombine(ISD::OR);
setMinFunctionAlignment(2);
@@ -190,7 +231,12 @@ MipsTargetLowering(MipsTargetMachine &TM)
setExceptionSelectorRegister(Mips::A1);
}
-MVT::SimpleValueType MipsTargetLowering::getSetCCResultType(EVT VT) const {
+bool MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
+ MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
+ return SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16;
+}
+
+EVT MipsTargetLowering::getSetCCResultType(EVT VT) const {
return MVT::i32;
}
@@ -348,7 +394,7 @@ static SDValue PerformADDECombine(SDNode *N, SelectionDAG& DAG,
if (DCI.isBeforeLegalize())
return SDValue();
- if (Subtarget->isMips32() && SelectMadd(N, &DAG))
+ if (Subtarget->hasMips32() && SelectMadd(N, &DAG))
return SDValue(N, 0);
return SDValue();
@@ -360,7 +406,7 @@ static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG,
if (DCI.isBeforeLegalize())
return SDValue();
- if (Subtarget->isMips32() && SelectMsub(N, &DAG))
+ if (Subtarget->hasMips32() && SelectMsub(N, &DAG))
return SDValue(N, 0);
return SDValue();
@@ -372,6 +418,9 @@ static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG,
if (DCI.isBeforeLegalizeOps())
return SDValue();
+ EVT Ty = N->getValueType(0);
+ unsigned LO = (Ty == MVT::i32) ? Mips::LO : Mips::LO64;
+ unsigned HI = (Ty == MVT::i32) ? Mips::HI : Mips::HI64;
unsigned opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem :
MipsISD::DivRemU;
DebugLoc dl = N->getDebugLoc();
@@ -383,7 +432,7 @@ static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG,
// insert MFLO
if (N->hasAnyUseOfValue(0)) {
- SDValue CopyFromLo = DAG.getCopyFromReg(InChain, dl, Mips::LO, MVT::i32,
+ SDValue CopyFromLo = DAG.getCopyFromReg(InChain, dl, LO, Ty,
InGlue);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyFromLo);
InChain = CopyFromLo.getValue(1);
@@ -393,7 +442,7 @@ static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG,
// insert MFHI
if (N->hasAnyUseOfValue(1)) {
SDValue CopyFromHi = DAG.getCopyFromReg(InChain, dl,
- Mips::HI, MVT::i32, InGlue);
+ HI, Ty, InGlue);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), CopyFromHi);
}
@@ -490,6 +539,101 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG& DAG,
return CreateCMovFP(DAG, Cond, True, False, N->getDebugLoc());
}
+static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget* Subtarget) {
+ // Pattern match EXT.
+ // $dst = and ((sra or srl) $src , pos), (2**size - 1)
+ // => ext $dst, $src, size, pos
+ if (DCI.isBeforeLegalizeOps() || !Subtarget->hasMips32r2())
+ return SDValue();
+
+ SDValue ShiftRight = N->getOperand(0), Mask = N->getOperand(1);
+
+ // Op's first operand must be a shift right.
+ if (ShiftRight.getOpcode() != ISD::SRA && ShiftRight.getOpcode() != ISD::SRL)
+ return SDValue();
+
+ // The second operand of the shift must be an immediate.
+ uint64_t Pos;
+ ConstantSDNode *CN;
+ if (!(CN = dyn_cast<ConstantSDNode>(ShiftRight.getOperand(1))))
+ return SDValue();
+
+ Pos = CN->getZExtValue();
+
+ uint64_t SMPos, SMSize;
+ // Op's second operand must be a shifted mask.
+ if (!(CN = dyn_cast<ConstantSDNode>(Mask)) ||
+ !IsShiftedMask(CN->getZExtValue(), SMPos, SMSize))
+ return SDValue();
+
+ // Return if the shifted mask does not start at bit 0 or the sum of its size
+ // and Pos exceeds the word's size.
+ if (SMPos != 0 || Pos + SMSize > 32)
+ return SDValue();
+
+ return DAG.getNode(MipsISD::Ext, N->getDebugLoc(), MVT::i32,
+ ShiftRight.getOperand(0),
+ DAG.getConstant(Pos, MVT::i32),
+ DAG.getConstant(SMSize, MVT::i32));
+}
+
+static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget* Subtarget) {
+ // Pattern match INS.
+ // $dst = or (and $src1 , mask0), (and (shl $src, pos), mask1),
+ // where mask1 = (2**size - 1) << pos, mask0 = ~mask1
+ // => ins $dst, $src, size, pos, $src1
+ if (DCI.isBeforeLegalizeOps() || !Subtarget->hasMips32r2())
+ return SDValue();
+
+ SDValue And0 = N->getOperand(0), And1 = N->getOperand(1);
+ uint64_t SMPos0, SMSize0, SMPos1, SMSize1;
+ ConstantSDNode *CN;
+
+ // See if Op's first operand matches (and $src1 , mask0).
+ if (And0.getOpcode() != ISD::AND)
+ return SDValue();
+
+ if (!(CN = dyn_cast<ConstantSDNode>(And0.getOperand(1))) ||
+ !IsShiftedMask(~CN->getSExtValue(), SMPos0, SMSize0))
+ return SDValue();
+
+ // See if Op's second operand matches (and (shl $src, pos), mask1).
+ if (And1.getOpcode() != ISD::AND)
+ return SDValue();
+
+ if (!(CN = dyn_cast<ConstantSDNode>(And1.getOperand(1))) ||
+ !IsShiftedMask(CN->getZExtValue(), SMPos1, SMSize1))
+ return SDValue();
+
+ // The shift masks must have the same position and size.
+ if (SMPos0 != SMPos1 || SMSize0 != SMSize1)
+ return SDValue();
+
+ SDValue Shl = And1.getOperand(0);
+ if (Shl.getOpcode() != ISD::SHL)
+ return SDValue();
+
+ if (!(CN = dyn_cast<ConstantSDNode>(Shl.getOperand(1))))
+ return SDValue();
+
+ unsigned Shamt = CN->getZExtValue();
+
+ // Return if the shift amount and the first bit position of mask are not the
+ // same.
+ if (Shamt != SMPos0)
+ return SDValue();
+
+ return DAG.getNode(MipsISD::Ins, N->getDebugLoc(), MVT::i32,
+ Shl.getOperand(0),
+ DAG.getConstant(SMPos0, MVT::i32),
+ DAG.getConstant(SMSize0, MVT::i32),
+ And0.getOperand(0));
+}
+
SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
const {
SelectionDAG &DAG = DCI.DAG;
@@ -506,6 +650,10 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
return PerformDivRemCombine(N, DAG, DCI, Subtarget);
case ISD::SETCC:
return PerformSETCCCombine(N, DAG, DCI, Subtarget);
+ case ISD::AND:
+ return PerformANDCombine(N, DAG, DCI, Subtarget);
+ case ISD::OR:
+ return PerformORCombine(N, DAG, DCI, Subtarget);
}
return SDValue();
@@ -527,6 +675,8 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG);
+ case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG);
}
return SDValue();
}
@@ -733,13 +883,13 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
- unsigned Dest = MI->getOperand(0).getReg();
+ unsigned OldVal = MI->getOperand(0).getReg();
unsigned Ptr = MI->getOperand(1).getReg();
unsigned Incr = MI->getOperand(2).getReg();
- unsigned Oldval = RegInfo.createVirtualRegister(RC);
- unsigned Tmp1 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp2 = RegInfo.createVirtualRegister(RC);
+ unsigned StoreVal = RegInfo.createVirtualRegister(RC);
+ unsigned AndRes = RegInfo.createVirtualRegister(RC);
+ unsigned Success = RegInfo.createVirtualRegister(RC);
// insert new blocks after the current block
const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -758,61 +908,38 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
// thisMBB:
// ...
- // sw incr, fi(sp) // store incr to stack (when BinOpcode == 0)
// fallthrough --> loopMBB
-
- // Note: for atomic.swap (when BinOpcode == 0), storing incr to stack before
- // the loop and then loading it from stack in block loopMBB is necessary to
- // prevent MachineLICM pass to hoist "or" instruction out of the block
- // loopMBB.
-
- int fi = 0;
- if (BinOpcode == 0 && !Nand) {
- // Get or create a temporary stack location.
- MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>();
- fi = MipsFI->getAtomicFrameIndex();
- if (fi == -1) {
- fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false);
- MipsFI->setAtomicFrameIndex(fi);
- }
-
- BuildMI(BB, dl, TII->get(Mips::SW))
- .addReg(Incr).addFrameIndex(fi).addImm(0);
- }
BB->addSuccessor(loopMBB);
+ loopMBB->addSuccessor(loopMBB);
+ loopMBB->addSuccessor(exitMBB);
// loopMBB:
// ll oldval, 0(ptr)
- // or dest, $0, oldval
- // <binop> tmp1, oldval, incr
- // sc tmp1, 0(ptr)
- // beq tmp1, $0, loopMBB
+ // <binop> storeval, oldval, incr
+ // sc success, storeval, 0(ptr)
+ // beq success, $0, loopMBB
BB = loopMBB;
- BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addReg(Ptr).addImm(0);
- BuildMI(BB, dl, TII->get(Mips::OR), Dest).addReg(Mips::ZERO).addReg(Oldval);
+ BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(Ptr).addImm(0);
if (Nand) {
- // and tmp2, oldval, incr
- // nor tmp1, $0, tmp2
- BuildMI(BB, dl, TII->get(Mips::AND), Tmp2).addReg(Oldval).addReg(Incr);
- BuildMI(BB, dl, TII->get(Mips::NOR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2);
+ // and andres, oldval, incr
+ // nor storeval, $0, andres
+ BuildMI(BB, dl, TII->get(Mips::AND), AndRes).addReg(OldVal).addReg(Incr);
+ BuildMI(BB, dl, TII->get(Mips::NOR), StoreVal)
+ .addReg(Mips::ZERO).addReg(AndRes);
} else if (BinOpcode) {
- // <binop> tmp1, oldval, incr
- BuildMI(BB, dl, TII->get(BinOpcode), Tmp1).addReg(Oldval).addReg(Incr);
+ // <binop> storeval, oldval, incr
+ BuildMI(BB, dl, TII->get(BinOpcode), StoreVal).addReg(OldVal).addReg(Incr);
} else {
- // lw tmp2, fi(sp) // load incr from stack
- // or tmp1, $zero, tmp2
- BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addFrameIndex(fi).addImm(0);
- BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2);
+ StoreVal = Incr;
}
- BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addReg(Ptr).addImm(0);
+ BuildMI(BB, dl, TII->get(Mips::SC), Success)
+ .addReg(StoreVal).addReg(Ptr).addImm(0);
BuildMI(BB, dl, TII->get(Mips::BEQ))
- .addReg(Tmp1).addReg(Mips::ZERO).addMBB(loopMBB);
- BB->addSuccessor(loopMBB);
- BB->addSuccessor(exitMBB);
+ .addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB);
MI->eraseFromParent(); // The instruction is gone now.
- return BB;
+ return exitMBB;
}
MachineBasicBlock *
@@ -833,33 +960,34 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
unsigned Ptr = MI->getOperand(1).getReg();
unsigned Incr = MI->getOperand(2).getReg();
- unsigned Addr = RegInfo.createVirtualRegister(RC);
- unsigned Shift = RegInfo.createVirtualRegister(RC);
+ unsigned AlignedAddr = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftAmt = RegInfo.createVirtualRegister(RC);
unsigned Mask = RegInfo.createVirtualRegister(RC);
unsigned Mask2 = RegInfo.createVirtualRegister(RC);
- unsigned Newval = RegInfo.createVirtualRegister(RC);
- unsigned Oldval = RegInfo.createVirtualRegister(RC);
+ unsigned NewVal = RegInfo.createVirtualRegister(RC);
+ unsigned OldVal = RegInfo.createVirtualRegister(RC);
unsigned Incr2 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp1 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp2 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp3 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp4 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp5 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp6 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp7 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp8 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp9 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp10 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp11 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp12 = RegInfo.createVirtualRegister(RC);
+ unsigned MaskLSB2 = RegInfo.createVirtualRegister(RC);
+ unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC);
+ unsigned MaskUpper = RegInfo.createVirtualRegister(RC);
+ unsigned AndRes = RegInfo.createVirtualRegister(RC);
+ unsigned BinOpRes = RegInfo.createVirtualRegister(RC);
+ unsigned MaskedOldVal0 = RegInfo.createVirtualRegister(RC);
+ unsigned StoreVal = RegInfo.createVirtualRegister(RC);
+ unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC);
+ unsigned SrlRes = RegInfo.createVirtualRegister(RC);
+ unsigned SllRes = RegInfo.createVirtualRegister(RC);
+ unsigned Success = RegInfo.createVirtualRegister(RC);
// insert new blocks after the current block
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineFunction::iterator It = BB;
++It;
MF->insert(It, loopMBB);
+ MF->insert(It, sinkMBB);
MF->insert(It, exitMBB);
// Transfer the remainder of BB and its successor edges to exitMBB.
@@ -868,111 +996,104 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+ BB->addSuccessor(loopMBB);
+ loopMBB->addSuccessor(loopMBB);
+ loopMBB->addSuccessor(sinkMBB);
+ sinkMBB->addSuccessor(exitMBB);
+
// thisMBB:
- // addiu tmp1,$0,-4 # 0xfffffffc
- // and addr,ptr,tmp1
- // andi tmp2,ptr,3
- // sll shift,tmp2,3
- // ori tmp3,$0,255 # 0xff
- // sll mask,tmp3,shift
+ // addiu masklsb2,$0,-4 # 0xfffffffc
+ // and alignedaddr,ptr,masklsb2
+ // andi ptrlsb2,ptr,3
+ // sll shiftamt,ptrlsb2,3
+ // ori maskupper,$0,255 # 0xff
+ // sll mask,maskupper,shiftamt
// nor mask2,$0,mask
- // andi tmp4,incr,255
- // sll incr2,tmp4,shift
- // sw incr2, fi(sp) // store incr2 to stack (when BinOpcode == 0)
-
- // Note: for atomic.swap (when BinOpcode == 0), storing incr2 to stack before
- // the loop and then loading it from stack in block loopMBB is necessary to
- // prevent MachineLICM pass to hoist "or" instruction out of the block
- // loopMBB.
+ // sll incr2,incr,shiftamt
int64_t MaskImm = (Size == 1) ? 255 : 65535;
- BuildMI(BB, dl, TII->get(Mips::ADDiu), Tmp1).addReg(Mips::ZERO).addImm(-4);
- BuildMI(BB, dl, TII->get(Mips::AND), Addr).addReg(Ptr).addReg(Tmp1);
- BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp2).addReg(Ptr).addImm(3);
- BuildMI(BB, dl, TII->get(Mips::SLL), Shift).addReg(Tmp2).addImm(3);
- BuildMI(BB, dl, TII->get(Mips::ORi), Tmp3).addReg(Mips::ZERO).addImm(MaskImm);
- BuildMI(BB, dl, TII->get(Mips::SLL), Mask).addReg(Tmp3).addReg(Shift);
+ BuildMI(BB, dl, TII->get(Mips::ADDiu), MaskLSB2)
+ .addReg(Mips::ZERO).addImm(-4);
+ BuildMI(BB, dl, TII->get(Mips::AND), AlignedAddr)
+ .addReg(Ptr).addReg(MaskLSB2);
+ BuildMI(BB, dl, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3);
+ BuildMI(BB, dl, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3);
+ BuildMI(BB, dl, TII->get(Mips::ORi), MaskUpper)
+ .addReg(Mips::ZERO).addImm(MaskImm);
+ BuildMI(BB, dl, TII->get(Mips::SLLV), Mask)
+ .addReg(ShiftAmt).addReg(MaskUpper);
BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask);
- if (BinOpcode != Mips::SUBu) {
- BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp4).addReg(Incr).addImm(MaskImm);
- BuildMI(BB, dl, TII->get(Mips::SLL), Incr2).addReg(Tmp4).addReg(Shift);
- } else {
- BuildMI(BB, dl, TII->get(Mips::SUBu), Tmp4).addReg(Mips::ZERO).addReg(Incr);
- BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp5).addReg(Tmp4).addImm(MaskImm);
- BuildMI(BB, dl, TII->get(Mips::SLL), Incr2).addReg(Tmp5).addReg(Shift);
- }
+ BuildMI(BB, dl, TII->get(Mips::SLLV), Incr2).addReg(ShiftAmt).addReg(Incr);
- int fi = 0;
- if (BinOpcode == 0 && !Nand) {
- // Get or create a temporary stack location.
- MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>();
- fi = MipsFI->getAtomicFrameIndex();
- if (fi == -1) {
- fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false);
- MipsFI->setAtomicFrameIndex(fi);
- }
-
- BuildMI(BB, dl, TII->get(Mips::SW))
- .addReg(Incr2).addFrameIndex(fi).addImm(0);
- }
- BB->addSuccessor(loopMBB);
+ // atomic.load.binop
+ // loopMBB:
+ // ll oldval,0(alignedaddr)
+ // binop binopres,oldval,incr2
+ // and newval,binopres,mask
+ // and maskedoldval0,oldval,mask2
+ // or storeval,maskedoldval0,newval
+ // sc success,storeval,0(alignedaddr)
+ // beq success,$0,loopMBB
+
+ // atomic.swap
// loopMBB:
- // ll oldval,0(addr)
- // binop tmp7,oldval,incr2
- // and newval,tmp7,mask
- // and tmp8,oldval,mask2
- // or tmp9,tmp8,newval
- // sc tmp9,0(addr)
- // beq tmp9,$0,loopMBB
+ // ll oldval,0(alignedaddr)
+ // and newval,incr2,mask
+ // and maskedoldval0,oldval,mask2
+ // or storeval,maskedoldval0,newval
+ // sc success,storeval,0(alignedaddr)
+ // beq success,$0,loopMBB
+
BB = loopMBB;
- BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addReg(Addr).addImm(0);
+ BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(AlignedAddr).addImm(0);
if (Nand) {
- // and tmp6, oldval, incr2
- // nor tmp7, $0, tmp6
- BuildMI(BB, dl, TII->get(Mips::AND), Tmp6).addReg(Oldval).addReg(Incr2);
- BuildMI(BB, dl, TII->get(Mips::NOR), Tmp7).addReg(Mips::ZERO).addReg(Tmp6);
- } else if (BinOpcode == Mips::SUBu) {
- // addu tmp7, oldval, incr2
- BuildMI(BB, dl, TII->get(Mips::ADDu), Tmp7).addReg(Oldval).addReg(Incr2);
+ // and andres, oldval, incr2
+ // nor binopres, $0, andres
+ // and newval, binopres, mask
+ BuildMI(BB, dl, TII->get(Mips::AND), AndRes).addReg(OldVal).addReg(Incr2);
+ BuildMI(BB, dl, TII->get(Mips::NOR), BinOpRes)
+ .addReg(Mips::ZERO).addReg(AndRes);
+ BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask);
} else if (BinOpcode) {
- // <binop> tmp7, oldval, incr2
- BuildMI(BB, dl, TII->get(BinOpcode), Tmp7).addReg(Oldval).addReg(Incr2);
- } else {
- // lw tmp6, fi(sp) // load incr2 from stack
- // or tmp7, $zero, tmp6
- BuildMI(BB, dl, TII->get(Mips::LW), Tmp6).addFrameIndex(fi).addImm(0);
- BuildMI(BB, dl, TII->get(Mips::OR), Tmp7).addReg(Mips::ZERO).addReg(Tmp6);
+ // <binop> binopres, oldval, incr2
+ // and newval, binopres, mask
+ BuildMI(BB, dl, TII->get(BinOpcode), BinOpRes).addReg(OldVal).addReg(Incr2);
+ BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask);
+ } else {// atomic.swap
+ // and newval, incr2, mask
+ BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(Incr2).addReg(Mask);
}
- BuildMI(BB, dl, TII->get(Mips::AND), Newval).addReg(Tmp7).addReg(Mask);
- BuildMI(BB, dl, TII->get(Mips::AND), Tmp8).addReg(Oldval).addReg(Mask2);
- BuildMI(BB, dl, TII->get(Mips::OR), Tmp9).addReg(Tmp8).addReg(Newval);
- BuildMI(BB, dl, TII->get(Mips::SC), Tmp9).addReg(Tmp9).addReg(Addr).addImm(0);
+
+ BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal0)
+ .addReg(OldVal).addReg(Mask2);
+ BuildMI(BB, dl, TII->get(Mips::OR), StoreVal)
+ .addReg(MaskedOldVal0).addReg(NewVal);
+ BuildMI(BB, dl, TII->get(Mips::SC), Success)
+ .addReg(StoreVal).addReg(AlignedAddr).addImm(0);
BuildMI(BB, dl, TII->get(Mips::BEQ))
- .addReg(Tmp9).addReg(Mips::ZERO).addMBB(loopMBB);
- BB->addSuccessor(loopMBB);
- BB->addSuccessor(exitMBB);
-
- // exitMBB:
- // and tmp10,oldval,mask
- // srl tmp11,tmp10,shift
- // sll tmp12,tmp11,24
- // sra dest,tmp12,24
- BB = exitMBB;
+ .addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB);
+
+ // sinkMBB:
+ // and maskedoldval1,oldval,mask
+ // srl srlres,maskedoldval1,shiftamt
+ // sll sllres,srlres,24
+ // sra dest,sllres,24
+ BB = sinkMBB;
int64_t ShiftImm = (Size == 1) ? 24 : 16;
- // reverse order
- BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRA), Dest)
- .addReg(Tmp12).addImm(ShiftImm);
- BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SLL), Tmp12)
- .addReg(Tmp11).addImm(ShiftImm);
- BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRL), Tmp11)
- .addReg(Tmp10).addReg(Shift);
- BuildMI(*BB, BB->begin(), dl, TII->get(Mips::AND), Tmp10)
- .addReg(Oldval).addReg(Mask);
+
+ BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal1)
+ .addReg(OldVal).addReg(Mask);
+ BuildMI(BB, dl, TII->get(Mips::SRLV), SrlRes)
+ .addReg(ShiftAmt).addReg(MaskedOldVal1);
+ BuildMI(BB, dl, TII->get(Mips::SLL), SllRes)
+ .addReg(SrlRes).addImm(ShiftImm);
+ BuildMI(BB, dl, TII->get(Mips::SRA), Dest)
+ .addReg(SllRes).addImm(ShiftImm);
MI->eraseFromParent(); // The instruction is gone now.
- return BB;
+ return exitMBB;
}
MachineBasicBlock *
@@ -989,11 +1110,10 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
unsigned Dest = MI->getOperand(0).getReg();
unsigned Ptr = MI->getOperand(1).getReg();
- unsigned Oldval = MI->getOperand(2).getReg();
- unsigned Newval = MI->getOperand(3).getReg();
+ unsigned OldVal = MI->getOperand(2).getReg();
+ unsigned NewVal = MI->getOperand(3).getReg();
- unsigned Tmp1 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp2 = RegInfo.createVirtualRegister(RC);
+ unsigned Success = RegInfo.createVirtualRegister(RC);
// insert new blocks after the current block
const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -1012,26 +1132,14 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
- // Get or create a temporary stack location.
- MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>();
- int fi = MipsFI->getAtomicFrameIndex();
- if (fi == -1) {
- fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false);
- MipsFI->setAtomicFrameIndex(fi);
- }
-
// thisMBB:
// ...
- // sw newval, fi(sp) // store newval to stack
// fallthrough --> loop1MBB
-
- // Note: storing newval to stack before the loop and then loading it from
- // stack in block loop2MBB is necessary to prevent MachineLICM pass to
- // hoist "or" instruction out of the block loop2MBB.
-
- BuildMI(BB, dl, TII->get(Mips::SW))
- .addReg(Newval).addFrameIndex(fi).addImm(0);
BB->addSuccessor(loop1MBB);
+ loop1MBB->addSuccessor(exitMBB);
+ loop1MBB->addSuccessor(loop2MBB);
+ loop2MBB->addSuccessor(loop1MBB);
+ loop2MBB->addSuccessor(exitMBB);
// loop1MBB:
// ll dest, 0(ptr)
@@ -1039,27 +1147,20 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
BB = loop1MBB;
BuildMI(BB, dl, TII->get(Mips::LL), Dest).addReg(Ptr).addImm(0);
BuildMI(BB, dl, TII->get(Mips::BNE))
- .addReg(Dest).addReg(Oldval).addMBB(exitMBB);
- BB->addSuccessor(exitMBB);
- BB->addSuccessor(loop2MBB);
+ .addReg(Dest).addReg(OldVal).addMBB(exitMBB);
// loop2MBB:
- // lw tmp2, fi(sp) // load newval from stack
- // or tmp1, $0, tmp2
- // sc tmp1, 0(ptr)
- // beq tmp1, $0, loop1MBB
+ // sc success, newval, 0(ptr)
+ // beq success, $0, loop1MBB
BB = loop2MBB;
- BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addFrameIndex(fi).addImm(0);
- BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2);
- BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addReg(Ptr).addImm(0);
+ BuildMI(BB, dl, TII->get(Mips::SC), Success)
+ .addReg(NewVal).addReg(Ptr).addImm(0);
BuildMI(BB, dl, TII->get(Mips::BEQ))
- .addReg(Tmp1).addReg(Mips::ZERO).addMBB(loop1MBB);
- BB->addSuccessor(loop1MBB);
- BB->addSuccessor(exitMBB);
+ .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB);
MI->eraseFromParent(); // The instruction is gone now.
- return BB;
+ return exitMBB;
}
MachineBasicBlock *
@@ -1077,36 +1178,39 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
unsigned Dest = MI->getOperand(0).getReg();
unsigned Ptr = MI->getOperand(1).getReg();
- unsigned Oldval = MI->getOperand(2).getReg();
- unsigned Newval = MI->getOperand(3).getReg();
+ unsigned CmpVal = MI->getOperand(2).getReg();
+ unsigned NewVal = MI->getOperand(3).getReg();
- unsigned Addr = RegInfo.createVirtualRegister(RC);
- unsigned Shift = RegInfo.createVirtualRegister(RC);
+ unsigned AlignedAddr = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftAmt = RegInfo.createVirtualRegister(RC);
unsigned Mask = RegInfo.createVirtualRegister(RC);
unsigned Mask2 = RegInfo.createVirtualRegister(RC);
- unsigned Oldval2 = RegInfo.createVirtualRegister(RC);
- unsigned Oldval3 = RegInfo.createVirtualRegister(RC);
- unsigned Oldval4 = RegInfo.createVirtualRegister(RC);
- unsigned Newval2 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp1 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp2 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp3 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp4 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp5 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp6 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp7 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp8 = RegInfo.createVirtualRegister(RC);
- unsigned Tmp9 = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftedCmpVal = RegInfo.createVirtualRegister(RC);
+ unsigned OldVal = RegInfo.createVirtualRegister(RC);
+ unsigned MaskedOldVal0 = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftedNewVal = RegInfo.createVirtualRegister(RC);
+ unsigned MaskLSB2 = RegInfo.createVirtualRegister(RC);
+ unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC);
+ unsigned MaskUpper = RegInfo.createVirtualRegister(RC);
+ unsigned MaskedCmpVal = RegInfo.createVirtualRegister(RC);
+ unsigned MaskedNewVal = RegInfo.createVirtualRegister(RC);
+ unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC);
+ unsigned StoreVal = RegInfo.createVirtualRegister(RC);
+ unsigned SrlRes = RegInfo.createVirtualRegister(RC);
+ unsigned SllRes = RegInfo.createVirtualRegister(RC);
+ unsigned Success = RegInfo.createVirtualRegister(RC);
// insert new blocks after the current block
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineFunction::iterator It = BB;
++It;
MF->insert(It, loop1MBB);
MF->insert(It, loop2MBB);
+ MF->insert(It, sinkMBB);
MF->insert(It, exitMBB);
// Transfer the remainder of BB and its successor edges to exitMBB.
@@ -1115,76 +1219,90 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+ BB->addSuccessor(loop1MBB);
+ loop1MBB->addSuccessor(sinkMBB);
+ loop1MBB->addSuccessor(loop2MBB);
+ loop2MBB->addSuccessor(loop1MBB);
+ loop2MBB->addSuccessor(sinkMBB);
+ sinkMBB->addSuccessor(exitMBB);
+
+ // FIXME: computation of newval2 can be moved to loop2MBB.
// thisMBB:
- // addiu tmp1,$0,-4 # 0xfffffffc
- // and addr,ptr,tmp1
- // andi tmp2,ptr,3
- // sll shift,tmp2,3
- // ori tmp3,$0,255 # 0xff
- // sll mask,tmp3,shift
+ // addiu masklsb2,$0,-4 # 0xfffffffc
+ // and alignedaddr,ptr,masklsb2
+ // andi ptrlsb2,ptr,3
+ // sll shiftamt,ptrlsb2,3
+ // ori maskupper,$0,255 # 0xff
+ // sll mask,maskupper,shiftamt
// nor mask2,$0,mask
- // andi tmp4,oldval,255
- // sll oldval2,tmp4,shift
- // andi tmp5,newval,255
- // sll newval2,tmp5,shift
+ // andi maskedcmpval,cmpval,255
+ // sll shiftedcmpval,maskedcmpval,shiftamt
+ // andi maskednewval,newval,255
+ // sll shiftednewval,maskednewval,shiftamt
int64_t MaskImm = (Size == 1) ? 255 : 65535;
- BuildMI(BB, dl, TII->get(Mips::ADDiu), Tmp1).addReg(Mips::ZERO).addImm(-4);
- BuildMI(BB, dl, TII->get(Mips::AND), Addr).addReg(Ptr).addReg(Tmp1);
- BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp2).addReg(Ptr).addImm(3);
- BuildMI(BB, dl, TII->get(Mips::SLL), Shift).addReg(Tmp2).addImm(3);
- BuildMI(BB, dl, TII->get(Mips::ORi), Tmp3).addReg(Mips::ZERO).addImm(MaskImm);
- BuildMI(BB, dl, TII->get(Mips::SLL), Mask).addReg(Tmp3).addReg(Shift);
+ BuildMI(BB, dl, TII->get(Mips::ADDiu), MaskLSB2)
+ .addReg(Mips::ZERO).addImm(-4);
+ BuildMI(BB, dl, TII->get(Mips::AND), AlignedAddr)
+ .addReg(Ptr).addReg(MaskLSB2);
+ BuildMI(BB, dl, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3);
+ BuildMI(BB, dl, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3);
+ BuildMI(BB, dl, TII->get(Mips::ORi), MaskUpper)
+ .addReg(Mips::ZERO).addImm(MaskImm);
+ BuildMI(BB, dl, TII->get(Mips::SLLV), Mask)
+ .addReg(ShiftAmt).addReg(MaskUpper);
BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask);
- BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp4).addReg(Oldval).addImm(MaskImm);
- BuildMI(BB, dl, TII->get(Mips::SLL), Oldval2).addReg(Tmp4).addReg(Shift);
- BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp5).addReg(Newval).addImm(MaskImm);
- BuildMI(BB, dl, TII->get(Mips::SLL), Newval2).addReg(Tmp5).addReg(Shift);
- BB->addSuccessor(loop1MBB);
+ BuildMI(BB, dl, TII->get(Mips::ANDi), MaskedCmpVal)
+ .addReg(CmpVal).addImm(MaskImm);
+ BuildMI(BB, dl, TII->get(Mips::SLLV), ShiftedCmpVal)
+ .addReg(ShiftAmt).addReg(MaskedCmpVal);
+ BuildMI(BB, dl, TII->get(Mips::ANDi), MaskedNewVal)
+ .addReg(NewVal).addImm(MaskImm);
+ BuildMI(BB, dl, TII->get(Mips::SLLV), ShiftedNewVal)
+ .addReg(ShiftAmt).addReg(MaskedNewVal);
// loop1MBB:
- // ll oldval3,0(addr)
- // and oldval4,oldval3,mask
- // bne oldval4,oldval2,exitMBB
+ // ll oldval,0(alginedaddr)
+ // and maskedoldval0,oldval,mask
+ // bne maskedoldval0,shiftedcmpval,sinkMBB
BB = loop1MBB;
- BuildMI(BB, dl, TII->get(Mips::LL), Oldval3).addReg(Addr).addImm(0);
- BuildMI(BB, dl, TII->get(Mips::AND), Oldval4).addReg(Oldval3).addReg(Mask);
+ BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(AlignedAddr).addImm(0);
+ BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal0)
+ .addReg(OldVal).addReg(Mask);
BuildMI(BB, dl, TII->get(Mips::BNE))
- .addReg(Oldval4).addReg(Oldval2).addMBB(exitMBB);
- BB->addSuccessor(exitMBB);
- BB->addSuccessor(loop2MBB);
+ .addReg(MaskedOldVal0).addReg(ShiftedCmpVal).addMBB(sinkMBB);
// loop2MBB:
- // and tmp6,oldval3,mask2
- // or tmp7,tmp6,newval2
- // sc tmp7,0(addr)
- // beq tmp7,$0,loop1MBB
+ // and maskedoldval1,oldval,mask2
+ // or storeval,maskedoldval1,shiftednewval
+ // sc success,storeval,0(alignedaddr)
+ // beq success,$0,loop1MBB
BB = loop2MBB;
- BuildMI(BB, dl, TII->get(Mips::AND), Tmp6).addReg(Oldval3).addReg(Mask2);
- BuildMI(BB, dl, TII->get(Mips::OR), Tmp7).addReg(Tmp6).addReg(Newval2);
- BuildMI(BB, dl, TII->get(Mips::SC), Tmp7)
- .addReg(Tmp7).addReg(Addr).addImm(0);
+ BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal1)
+ .addReg(OldVal).addReg(Mask2);
+ BuildMI(BB, dl, TII->get(Mips::OR), StoreVal)
+ .addReg(MaskedOldVal1).addReg(ShiftedNewVal);
+ BuildMI(BB, dl, TII->get(Mips::SC), Success)
+ .addReg(StoreVal).addReg(AlignedAddr).addImm(0);
BuildMI(BB, dl, TII->get(Mips::BEQ))
- .addReg(Tmp7).addReg(Mips::ZERO).addMBB(loop1MBB);
- BB->addSuccessor(loop1MBB);
- BB->addSuccessor(exitMBB);
+ .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB);
- // exitMBB:
- // srl tmp8,oldval4,shift
- // sll tmp9,tmp8,24
- // sra dest,tmp9,24
- BB = exitMBB;
+ // sinkMBB:
+ // srl srlres,maskedoldval0,shiftamt
+ // sll sllres,srlres,24
+ // sra dest,sllres,24
+ BB = sinkMBB;
int64_t ShiftImm = (Size == 1) ? 24 : 16;
- // reverse order
- BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRA), Dest)
- .addReg(Tmp9).addImm(ShiftImm);
- BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SLL), Tmp9)
- .addReg(Tmp8).addImm(ShiftImm);
- BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRL), Tmp8)
- .addReg(Oldval4).addReg(Shift);
+
+ BuildMI(BB, dl, TII->get(Mips::SRLV), SrlRes)
+ .addReg(ShiftAmt).addReg(MaskedOldVal0);
+ BuildMI(BB, dl, TII->get(Mips::SLL), SllRes)
+ .addReg(SrlRes).addImm(ShiftImm);
+ BuildMI(BB, dl, TII->get(Mips::SRA), Dest)
+ .addReg(SllRes).addImm(ShiftImm);
MI->eraseFromParent(); // The instruction is gone now.
- return BB;
+ return exitMBB;
}
//===----------------------------------------------------------------------===//
@@ -1267,9 +1385,9 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
// FIXME there isn't actually debug info here
DebugLoc dl = Op.getDebugLoc();
- const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) {
SDVTList VTs = DAG.getVTList(MVT::i32);
MipsTargetObjectFile &TLOF = (MipsTargetObjectFile&)getObjFileLowering();
@@ -1292,21 +1410,26 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
return DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
}
- SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
- MipsII::MO_GOT);
- GA = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, GA);
- SDValue ResNode = DAG.getLoad(MVT::i32, dl,
+ EVT ValTy = Op.getValueType();
+ bool HasGotOfst = (GV->hasInternalLinkage() ||
+ (GV->hasLocalLinkage() && !isa<Function>(GV)));
+ unsigned GotFlag = IsN64 ?
+ (HasGotOfst ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT_DISP) :
+ MipsII::MO_GOT;
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, ValTy, 0, GotFlag);
+ GA = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, GA);
+ SDValue ResNode = DAG.getLoad(ValTy, dl,
DAG.getEntryNode(), GA, MachinePointerInfo(),
false, false, 0);
// On functions and global targets not internal linked only
// a load from got/GP is necessary for PIC to work.
- if (!GV->hasInternalLinkage() &&
- (!GV->hasLocalLinkage() || isa<Function>(GV)))
+ if (!HasGotOfst)
return ResNode;
- SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
- MipsII::MO_ABS_LO);
- SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GALo);
- return DAG.getNode(ISD::ADD, dl, MVT::i32, ResNode, Lo);
+ SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, ValTy, 0,
+ IsN64 ? MipsII::MO_GOT_OFST :
+ MipsII::MO_ABS_LO);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, ValTy, GALo);
+ return DAG.getNode(ISD::ADD, dl, ValTy, ResNode, Lo);
}
SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op,
@@ -1361,11 +1484,11 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
ArgListTy Args;
ArgListEntry Entry;
Entry.Node = Argument;
- Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext());
+ Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
Args.push_back(Entry);
std::pair<SDValue, SDValue> CallResult =
LowerCallTo(DAG.getEntryNode(),
- (const Type *) Type::getInt32Ty(*DAG.getContext()),
+ (Type *) Type::getInt32Ty(*DAG.getContext()),
false, false, false, false, 0, CallingConv::C, false, true,
DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG,
dl);
@@ -1557,6 +1680,25 @@ LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
return FrameAddr;
}
+// TODO: set SType according to the desired memory barrier behavior.
+SDValue MipsTargetLowering::LowerMEMBARRIER(SDValue Op,
+ SelectionDAG& DAG) const {
+ unsigned SType = 0;
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(MipsISD::Sync, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(SType, MVT::i32));
+}
+
+SDValue MipsTargetLowering::LowerATOMIC_FENCE(SDValue Op,
+ SelectionDAG& DAG) const {
+ // FIXME: Need pseudo-fence for 'singlethread' fences
+ // FIXME: Set SType for weaker fences where supported/appropriate.
+ unsigned SType = 0;
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(MipsISD::Sync, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(SType, MVT::i32));
+}
+
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
@@ -1679,55 +1821,109 @@ static const unsigned O32IntRegs[] = {
Mips::A0, Mips::A1, Mips::A2, Mips::A3
};
+// Return next O32 integer argument register.
+static unsigned getNextIntArgReg(unsigned Reg) {
+ assert((Reg == Mips::A0) || (Reg == Mips::A2));
+ return (Reg == Mips::A0) ? Mips::A1 : Mips::A3;
+}
+
// Write ByVal Arg to arg registers and stack.
static void
-WriteByValArg(SDValue& Chain, DebugLoc dl,
+WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
SmallVector<std::pair<unsigned, SDValue>, 16>& RegsToPass,
SmallVector<SDValue, 8>& MemOpChains, int& LastFI,
MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
const CCValAssign &VA, const ISD::ArgFlagsTy& Flags,
- MVT PtrType) {
- unsigned FirstWord = VA.getLocMemOffset() / 4;
- unsigned NumWords = (Flags.getByValSize() + 3) / 4;
- unsigned LastWord = FirstWord + NumWords;
- unsigned CurWord;
-
- // copy the first 4 words of byval arg to registers A0 - A3
- for (CurWord = FirstWord; CurWord < std::min(LastWord, O32IntRegsSize);
- ++CurWord) {
+ MVT PtrType, bool isLittle) {
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ unsigned Offset = 0;
+ uint32_t RemainingSize = Flags.getByValSize();
+ unsigned ByValAlign = Flags.getByValAlign();
+
+ // Copy the first 4 words of byval arg to registers A0 - A3.
+ // FIXME: Use a stricter alignment if it enables better optimization in passes
+ // run later.
+ for (; RemainingSize >= 4 && LocMemOffset < 4 * 4;
+ Offset += 4, RemainingSize -= 4, LocMemOffset += 4) {
SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
- DAG.getConstant((CurWord - FirstWord) * 4,
- MVT::i32));
+ DAG.getConstant(Offset, MVT::i32));
SDValue LoadVal = DAG.getLoad(MVT::i32, dl, Chain, LoadPtr,
MachinePointerInfo(),
- false, false, 0);
+ false, false, std::min(ByValAlign,
+ (unsigned )4));
MemOpChains.push_back(LoadVal.getValue(1));
- unsigned DstReg = O32IntRegs[CurWord];
+ unsigned DstReg = O32IntRegs[LocMemOffset / 4];
RegsToPass.push_back(std::make_pair(DstReg, LoadVal));
}
- // copy remaining part of byval arg to stack.
- if (CurWord < LastWord) {
- unsigned SizeInBytes = (LastWord - CurWord) * 4;
- SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
- DAG.getConstant((CurWord - FirstWord) * 4,
- MVT::i32));
- LastFI = MFI->CreateFixedObject(SizeInBytes, CurWord * 4, true);
- SDValue Dst = DAG.getFrameIndex(LastFI, PtrType);
- Chain = DAG.getMemcpy(Chain, dl, Dst, Src,
- DAG.getConstant(SizeInBytes, MVT::i32),
- /*Align*/4,
- /*isVolatile=*/false, /*AlwaysInline=*/false,
- MachinePointerInfo(0), MachinePointerInfo(0));
- MemOpChains.push_back(Chain);
+ if (RemainingSize == 0)
+ return;
+
+ // If there still is a register available for argument passing, write the
+ // remaining part of the structure to it using subword loads and shifts.
+ if (LocMemOffset < 4 * 4) {
+ assert(RemainingSize <= 3 && RemainingSize >= 1 &&
+ "There must be one to three bytes remaining.");
+ unsigned LoadSize = (RemainingSize == 3 ? 2 : RemainingSize);
+ SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
+ DAG.getConstant(Offset, MVT::i32));
+ unsigned Alignment = std::min(ByValAlign, (unsigned )4);
+ SDValue LoadVal = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain,
+ LoadPtr, MachinePointerInfo(),
+ MVT::getIntegerVT(LoadSize * 8), false,
+ false, Alignment);
+ MemOpChains.push_back(LoadVal.getValue(1));
+
+ // If target is big endian, shift it to the most significant half-word or
+ // byte.
+ if (!isLittle)
+ LoadVal = DAG.getNode(ISD::SHL, dl, MVT::i32, LoadVal,
+ DAG.getConstant(32 - LoadSize * 8, MVT::i32));
+
+ Offset += LoadSize;
+ RemainingSize -= LoadSize;
+
+ // Read second subword if necessary.
+ if (RemainingSize != 0) {
+ assert(RemainingSize == 1 && "There must be one byte remaining.");
+ LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
+ DAG.getConstant(Offset, MVT::i32));
+ unsigned Alignment = std::min(ByValAlign, (unsigned )2);
+ SDValue Subword = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain,
+ LoadPtr, MachinePointerInfo(),
+ MVT::i8, false, false, Alignment);
+ MemOpChains.push_back(Subword.getValue(1));
+ // Insert the loaded byte to LoadVal.
+ // FIXME: Use INS if supported by target.
+ unsigned ShiftAmt = isLittle ? 16 : 8;
+ SDValue Shift = DAG.getNode(ISD::SHL, dl, MVT::i32, Subword,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ LoadVal = DAG.getNode(ISD::OR, dl, MVT::i32, LoadVal, Shift);
+ }
+
+ unsigned DstReg = O32IntRegs[LocMemOffset / 4];
+ RegsToPass.push_back(std::make_pair(DstReg, LoadVal));
+ return;
}
+
+ // Create a fixed object on stack at offset LocMemOffset and copy
+ // remaining part of byval arg to it using memcpy.
+ SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
+ DAG.getConstant(Offset, MVT::i32));
+ LastFI = MFI->CreateFixedObject(RemainingSize, LocMemOffset, true);
+ SDValue Dst = DAG.getFrameIndex(LastFI, PtrType);
+ ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src,
+ DAG.getConstant(RemainingSize, MVT::i32),
+ std::min(ByValAlign, (unsigned)4),
+ /*isVolatile=*/false, /*AlwaysInline=*/false,
+ MachinePointerInfo(0), MachinePointerInfo(0));
}
/// LowerCall - functions arguments are copied from virtual regs to
/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
/// TODO: isTailCall.
SDValue
-MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
@@ -1757,8 +1953,13 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Get a count of how many bytes are to be pushed on the stack.
unsigned NextStackOffset = CCInfo.getNextStackOffset();
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NextStackOffset,
- true));
+ // Chain is the output chain of the last Load/Store or CopyToReg node.
+ // ByValChain is the output chain of the last Memcpy node created for copying
+ // byval arguments to the stack.
+ SDValue Chain, CallSeqStart, ByValChain;
+ SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true);
+ Chain = CallSeqStart = DAG.getCALLSEQ_START(InChain, NextStackOffsetVal);
+ ByValChain = InChain;
// If this is the first call, create a stack frame object that points to
// a location to which .cprestore saves $gp.
@@ -1818,8 +2019,10 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Arg, DAG.getConstant(1, MVT::i32));
if (!Subtarget->isLittle())
std::swap(Lo, Hi);
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
- RegsToPass.push_back(std::make_pair(VA.getLocReg()+1, Hi));
+ unsigned LocRegLo = VA.getLocReg();
+ unsigned LocRegHigh = getNextIntArgReg(LocRegLo);
+ RegsToPass.push_back(std::make_pair(LocRegLo, Lo));
+ RegsToPass.push_back(std::make_pair(LocRegHigh, Hi));
continue;
}
}
@@ -1852,8 +2055,8 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
"No support for ByVal args by ABIs other than O32 yet.");
assert(Flags.getByValSize() &&
"ByVal args of size 0 should have been ignored by front-end.");
- WriteByValArg(Chain, dl, RegsToPass, MemOpChains, LastFI, MFI, DAG, Arg,
- VA, Flags, getPointerTy());
+ WriteByValArg(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI, MFI,
+ DAG, Arg, VA, Flags, getPointerTy(), Subtarget->isLittle());
continue;
}
@@ -1875,6 +2078,12 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (LastFI)
MipsFI->extendOutArgFIRange(FirstFI, LastFI);
+ // If a memcpy has been created to copy a byval arg to a stack, replace the
+ // chain input of CallSeqStart with ByValChain.
+ if (InChain != ByValChain)
+ DAG.UpdateNodeOperands(CallSeqStart.getNode(), ByValChain,
+ NextStackOffsetVal);
+
// Transform all store nodes into one single node because all store
// nodes are independent of each other.
if (!MemOpChains.empty())
@@ -2071,12 +2280,13 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
if (RegVT == MVT::i32)
RC = Mips::CPURegsRegisterClass;
+ else if (RegVT == MVT::i64)
+ RC = Mips::CPU64RegsRegisterClass;
else if (RegVT == MVT::f32)
RC = Mips::FGR32RegisterClass;
- else if (RegVT == MVT::f64) {
- if (!Subtarget->isSingleFloat())
- RC = Mips::AFGR64RegisterClass;
- } else
+ else if (RegVT == MVT::f64)
+ RC = HasMips64 ? Mips::FGR64RegisterClass : Mips::AFGR64RegisterClass;
+ else
llvm_unreachable("RegVT not supported by FormalArguments Lowering");
// Transform the arguments stored on
@@ -2105,7 +2315,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
ArgValue = DAG.getNode(ISD::BITCAST, dl, MVT::f32, ArgValue);
if (RegVT == MVT::i32 && VA.getValVT() == MVT::f64) {
unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(),
- VA.getLocReg()+1, RC);
+ getNextIntArgReg(ArgReg), RC);
SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT);
if (!Subtarget->isLittle())
std::swap(ArgValue, ArgValue2);
@@ -2313,7 +2523,7 @@ MipsTargetLowering::getSingleConstraintMatchWeight(
// but allow it at the lowest weight.
if (CallOperandVal == NULL)
return CW_Default;
- const Type *type = CallOperandVal->getType();
+ Type *type = CallOperandVal->getType();
// Look at the constraint type.
switch (*constraint) {
default:
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index bda26a2..4be3fed5 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -81,7 +81,12 @@ namespace llvm {
WrapperPIC,
- DynAlloc
+ DynAlloc,
+
+ Sync,
+
+ Ext,
+ Ins
};
}
@@ -93,6 +98,8 @@ namespace llvm {
public:
explicit MipsTargetLowering(MipsTargetMachine &TM);
+ virtual bool allowsUnalignedMemoryAccesses (EVT VT) const;
+
/// LowerOperation - Provide custom lowering hooks for some operations.
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
@@ -101,13 +108,14 @@ namespace llvm {
virtual const char *getTargetNodeName(unsigned Opcode) const;
/// getSetCCResultType - get the ISD::SETCC result ValueType
- MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+ EVT getSetCCResultType(EVT VT) const;
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
private:
// Subtarget Info
const MipsSubtarget *Subtarget;
-
+
+ bool HasMips64, IsN64;
// Lower Operand helpers
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
@@ -128,6 +136,8 @@ namespace llvm {
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const;
+ SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
virtual SDValue
LowerFormalArguments(SDValue Chain,
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 021c167..2fb9d18 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -27,7 +27,7 @@
def SDT_MipsFPBrcond : SDTypeProfile<0, 2, [SDTCisInt<0>,
SDTCisVT<1, OtherVT>]>;
def SDT_MipsFPCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, SDTCisFP<1>,
- SDTCisInt<2>]>;
+ SDTCisVT<2, i32>]>;
def SDT_MipsCMovFP : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
SDTCisSameAs<1, 2>]>;
def SDT_MipsBuildPairF64 : SDTypeProfile<1, 2, [SDTCisVT<0, f64>,
@@ -35,12 +35,11 @@ def SDT_MipsBuildPairF64 : SDTypeProfile<1, 2, [SDTCisVT<0, f64>,
SDTCisSameAs<1, 2>]>;
def SDT_MipsExtractElementF64 : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
SDTCisVT<1, f64>,
- SDTCisVT<0, i32>]>;
+ SDTCisVT<2, i32>]>;
def MipsFPCmp : SDNode<"MipsISD::FPCmp", SDT_MipsFPCmp, [SDNPOutGlue]>;
def MipsCMovFP_T : SDNode<"MipsISD::CMovFP_T", SDT_MipsCMovFP, [SDNPInGlue]>;
def MipsCMovFP_F : SDNode<"MipsISD::CMovFP_F", SDT_MipsCMovFP, [SDNPInGlue]>;
-def MipsFPRound : SDNode<"MipsISD::FPRound", SDTFPRoundOp, [SDNPOptInGlue]>;
def MipsFPBrcond : SDNode<"MipsISD::FPBrcond", SDT_MipsFPBrcond,
[SDNPHasChain, SDNPOptInGlue]>;
def MipsBuildPairF64 : SDNode<"MipsISD::BuildPairF64", SDT_MipsBuildPairF64>;
@@ -55,10 +54,10 @@ let PrintMethod = "printFCCOperand" in
// Feature predicates.
//===----------------------------------------------------------------------===//
-def In32BitMode : Predicate<"!Subtarget.isFP64bit()">;
+def IsFP64bit : Predicate<"Subtarget.isFP64bit()">;
+def NotFP64bit : Predicate<"!Subtarget.isFP64bit()">;
def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">;
def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">;
-def IsNotMipsI : Predicate<"!Subtarget.isMips1()">;
//===----------------------------------------------------------------------===//
// Instruction Class Templates
@@ -74,97 +73,87 @@ def IsNotMipsI : Predicate<"!Subtarget.isMips1()">;
// Only S32 and D32 are supported right now.
//===----------------------------------------------------------------------===//
-multiclass FFR1_1<bits<6> funct, string asmstr>
-{
- def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), (ins FGR32:$fs),
- !strconcat(asmstr, ".s\t$fd, $fs"), []>;
-
- def _D32 : FFR<0x11, funct, 0x1, (outs FGR32:$fd), (ins AFGR64:$fs),
- !strconcat(asmstr, ".d\t$fd, $fs"), []>, Requires<[In32BitMode]>;
+// FP load.
+class FPLoad<bits<6> op, string opstr, PatFrag FOp, RegisterClass RC,
+ Operand MemOpnd>:
+ FFI<op, (outs RC:$ft), (ins MemOpnd:$base),
+ !strconcat(opstr, "\t$ft, $base"), [(set RC:$ft, (FOp addr:$base))]>;
+
+// FP store.
+class FPStore<bits<6> op, string opstr, PatFrag FOp, RegisterClass RC,
+ Operand MemOpnd>:
+ FFI<op, (outs), (ins RC:$ft, MemOpnd:$base),
+ !strconcat(opstr, "\t$ft, $base"), [(store RC:$ft, addr:$base)]>;
+
+// Instructions that convert an FP value to 32-bit fixed point.
+multiclass FFR1_W_M<bits<6> funct, string opstr> {
+ def _S : FFR1<funct, 16, opstr, "w.s", FGR32, FGR32>;
+ def _D32 : FFR1<funct, 17, opstr, "w.d", FGR32, AFGR64>,
+ Requires<[NotFP64bit]>;
+ def _D64 : FFR1<funct, 17, opstr, "w.d", FGR32, FGR64>,
+ Requires<[IsFP64bit]>;
}
-multiclass FFR1_2<bits<6> funct, string asmstr, SDNode FOp>
-{
- def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), (ins FGR32:$fs),
- !strconcat(asmstr, ".s\t$fd, $fs"),
- [(set FGR32:$fd, (FOp FGR32:$fs))]>;
-
- def _D32 : FFR<0x11, funct, 0x1, (outs AFGR64:$fd), (ins AFGR64:$fs),
- !strconcat(asmstr, ".d\t$fd, $fs"),
- [(set AFGR64:$fd, (FOp AFGR64:$fs))]>, Requires<[In32BitMode]>;
+// Instructions that convert an FP value to 64-bit fixed point.
+let Predicates = [IsFP64bit] in
+multiclass FFR1_L_M<bits<6> funct, string opstr> {
+ def _S : FFR1<funct, 16, opstr, "l.s", FGR64, FGR32>;
+ def _D64 : FFR1<funct, 17, opstr, "l.d", FGR64, FGR64>;
}
-class FFR1_3<bits<6> funct, bits<5> fmt, RegisterClass RcSrc,
- RegisterClass RcDst, string asmstr>:
- FFR<0x11, funct, fmt, (outs RcSrc:$fd), (ins RcDst:$fs),
- !strconcat(asmstr, "\t$fd, $fs"), []>;
-
+// FP-to-FP conversion instructions.
+multiclass FFR1P_M<bits<6> funct, string opstr, SDNode OpNode> {
+ def _S : FFR1P<funct, 16, opstr, "s", FGR32, FGR32, OpNode>;
+ def _D32 : FFR1P<funct, 17, opstr, "d", AFGR64, AFGR64, OpNode>,
+ Requires<[NotFP64bit]>;
+ def _D64 : FFR1P<funct, 17, opstr, "d", FGR64, FGR64, OpNode>,
+ Requires<[IsFP64bit]>;
+}
-multiclass FFR1_4<bits<6> funct, string asmstr, SDNode FOp, bit isComm = 0> {
+multiclass FFR2P_M<bits<6> funct, string opstr, SDNode OpNode, bit isComm = 0> {
let isCommutable = isComm in {
- def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd),
- (ins FGR32:$fs, FGR32:$ft),
- !strconcat(asmstr, ".s\t$fd, $fs, $ft"),
- [(set FGR32:$fd, (FOp FGR32:$fs, FGR32:$ft))]>;
-
- def _D32 : FFR<0x11, funct, 0x1, (outs AFGR64:$fd),
- (ins AFGR64:$fs, AFGR64:$ft),
- !strconcat(asmstr, ".d\t$fd, $fs, $ft"),
- [(set AFGR64:$fd, (FOp AFGR64:$fs, AFGR64:$ft))]>,
- Requires<[In32BitMode]>;
+ def _S : FFR2P<funct, 16, opstr, "s", FGR32, OpNode>;
+ def _D32 : FFR2P<funct, 17, opstr, "d", AFGR64, OpNode>,
+ Requires<[NotFP64bit]>;
+ def _D64 : FFR2P<funct, 17, opstr, "d", FGR64, OpNode>,
+ Requires<[IsFP64bit]>;
}
}
//===----------------------------------------------------------------------===//
// Floating Point Instructions
//===----------------------------------------------------------------------===//
+defm ROUND_W : FFR1_W_M<0xc, "round">;
+defm ROUND_L : FFR1_L_M<0x8, "round">;
+defm TRUNC_W : FFR1_W_M<0xd, "trunc">;
+defm TRUNC_L : FFR1_L_M<0x9, "trunc">;
+defm CEIL_W : FFR1_W_M<0xe, "ceil">;
+defm CEIL_L : FFR1_L_M<0xa, "ceil">;
+defm FLOOR_W : FFR1_W_M<0xf, "floor">;
+defm FLOOR_L : FFR1_L_M<0xb, "floor">;
+defm CVT_W : FFR1_W_M<0x24, "cvt">;
+defm CVT_L : FFR1_L_M<0x25, "cvt">;
+
+def CVT_S_W : FFR1<0x20, 20, "cvt", "s.w", FGR32, FGR32>;
+
+let Predicates = [NotFP64bit] in {
+ def CVT_S_D32 : FFR1<0x20, 17, "cvt", "s.d", FGR32, AFGR64>;
+ def CVT_D32_W : FFR1<0x21, 20, "cvt", "d.w", AFGR64, FGR32>;
+ def CVT_D32_S : FFR1<0x21, 16, "cvt", "d.s", AFGR64, FGR32>;
+}
-let ft = 0 in {
- defm FLOOR_W : FFR1_1<0b001111, "floor.w">;
- defm CEIL_W : FFR1_1<0b001110, "ceil.w">;
- defm ROUND_W : FFR1_1<0b001100, "round.w">;
- defm TRUNC_W : FFR1_1<0b001101, "trunc.w">;
- defm CVTW : FFR1_1<0b100100, "cvt.w">;
-
- defm FABS : FFR1_2<0b000101, "abs", fabs>;
- defm FNEG : FFR1_2<0b000111, "neg", fneg>;
- defm FSQRT : FFR1_2<0b000100, "sqrt", fsqrt>;
-
- /// Convert to Single Precison
- def CVTS_W32 : FFR1_3<0b100000, 0x2, FGR32, FGR32, "cvt.s.w">;
-
- let Predicates = [IsNotSingleFloat] in {
- /// Ceil to long signed integer
- def CEIL_LS : FFR1_3<0b001010, 0x0, FGR32, FGR32, "ceil.l">;
- def CEIL_LD : FFR1_3<0b001010, 0x1, AFGR64, AFGR64, "ceil.l">;
-
- /// Round to long signed integer
- def ROUND_LS : FFR1_3<0b001000, 0x0, FGR32, FGR32, "round.l">;
- def ROUND_LD : FFR1_3<0b001000, 0x1, AFGR64, AFGR64, "round.l">;
-
- /// Floor to long signed integer
- def FLOOR_LS : FFR1_3<0b001011, 0x0, FGR32, FGR32, "floor.l">;
- def FLOOR_LD : FFR1_3<0b001011, 0x1, AFGR64, AFGR64, "floor.l">;
-
- /// Trunc to long signed integer
- def TRUNC_LS : FFR1_3<0b001001, 0x0, FGR32, FGR32, "trunc.l">;
- def TRUNC_LD : FFR1_3<0b001001, 0x1, AFGR64, AFGR64, "trunc.l">;
-
- /// Convert to long signed integer
- def CVTL_S : FFR1_3<0b100101, 0x0, FGR32, FGR32, "cvt.l">;
- def CVTL_D : FFR1_3<0b100101, 0x1, AFGR64, AFGR64, "cvt.l">;
-
- /// Convert to Double Precison
- def CVTD_S32 : FFR1_3<0b100001, 0x0, AFGR64, FGR32, "cvt.d.s">;
- def CVTD_W32 : FFR1_3<0b100001, 0x2, AFGR64, FGR32, "cvt.d.w">;
- def CVTD_L32 : FFR1_3<0b100001, 0x3, AFGR64, AFGR64, "cvt.d.l">;
-
- /// Convert to Single Precison
- def CVTS_D32 : FFR1_3<0b100000, 0x1, FGR32, AFGR64, "cvt.s.d">;
- def CVTS_L32 : FFR1_3<0b100000, 0x3, FGR32, AFGR64, "cvt.s.l">;
- }
+let Predicates = [IsFP64bit] in {
+ def CVT_S_D64 : FFR1<0x20, 17, "cvt", "s.d", FGR32, FGR64>;
+ def CVT_S_L : FFR1<0x20, 21, "cvt", "s.l", FGR32, FGR64>;
+ def CVT_D64_W : FFR1<0x21, 20, "cvt", "d.w", FGR64, FGR32>;
+ def CVT_D64_S : FFR1<0x21, 16, "cvt", "d.s", FGR64, FGR32>;
+ def CVT_D64_L : FFR1<0x21, 21, "cvt", "d.l", FGR64, FGR64>;
}
+defm FABS : FFR1P_M<0x5, "abs", fabs>;
+defm FNEG : FFR1P_M<0x7, "neg", fneg>;
+defm FSQRT : FFR1P_M<0x4, "sqrt", fsqrt>;
+
// The odd-numbered registers are only referenced when doing loads,
// stores, and moves between floating-point and integer registers.
// When defining instructions, we reference all 32-bit registers,
@@ -178,37 +167,46 @@ let fd = 0 in {
"ctc1\t$fs, $rt", []>;
def MFC1 : FFR<0x11, 0x00, 0x00, (outs CPURegs:$rt), (ins FGR32:$fs),
- "mfc1\t$rt, $fs", []>;
+ "mfc1\t$rt, $fs",
+ [(set CPURegs:$rt, (bitconvert FGR32:$fs))]>;
def MTC1 : FFR<0x11, 0x00, 0x04, (outs FGR32:$fs), (ins CPURegs:$rt),
- "mtc1\t$rt, $fs", []>;
+ "mtc1\t$rt, $fs",
+ [(set FGR32:$fs, (bitconvert CPURegs:$rt))]>;
}
-def FMOV_S32 : FFR<0x11, 0b000110, 0x0, (outs FGR32:$fd), (ins FGR32:$fs),
- "mov.s\t$fd, $fs", []>;
-def FMOV_D32 : FFR<0x11, 0b000110, 0x1, (outs AFGR64:$fd), (ins AFGR64:$fs),
- "mov.d\t$fd, $fs", []>;
+def FMOV_S : FFR1<0x6, 16, "mov", "s", FGR32, FGR32>;
+def FMOV_D32 : FFR1<0x6, 17, "mov", "d", AFGR64, AFGR64>,
+ Requires<[NotFP64bit]>;
+def FMOV_D64 : FFR1<0x6, 17, "mov", "d", FGR64, FGR64>,
+ Requires<[IsFP64bit]>;
/// Floating Point Memory Instructions
-let Predicates = [IsNotSingleFloat, IsNotMipsI] in {
- def LDC1 : FFI<0b110101, (outs AFGR64:$ft), (ins mem:$addr),
- "ldc1\t$ft, $addr", [(set AFGR64:$ft, (load addr:$addr))]>;
-
- def SDC1 : FFI<0b111101, (outs), (ins AFGR64:$ft, mem:$addr),
- "sdc1\t$ft, $addr", [(store AFGR64:$ft, addr:$addr)]>;
+let Predicates = [IsN64] in {
+ def LWC1_P8 : FPLoad<0x31, "lwc1", load, FGR32, mem64>;
+ def SWC1_P8 : FPStore<0x39, "swc1", store, FGR32, mem64>;
+ def LDC164_P8 : FPLoad<0x35, "ldc1", load, FGR64, mem64>;
+ def SDC164_P8 : FPStore<0x3d, "sdc1", store, FGR64, mem64>;
}
-// LWC1 and SWC1 can always be emitted with odd registers.
-def LWC1 : FFI<0b110001, (outs FGR32:$ft), (ins mem:$addr), "lwc1\t$ft, $addr",
- [(set FGR32:$ft, (load addr:$addr))]>;
-def SWC1 : FFI<0b111001, (outs), (ins FGR32:$ft, mem:$addr),
- "swc1\t$ft, $addr", [(store FGR32:$ft, addr:$addr)]>;
+let Predicates = [NotN64] in {
+ def LWC1 : FPLoad<0x31, "lwc1", load, FGR32, mem>;
+ def SWC1 : FPStore<0x39, "swc1", store, FGR32, mem>;
+ let Predicates = [HasMips64] in {
+ def LDC164 : FPLoad<0x35, "ldc1", load, FGR64, mem>;
+ def SDC164 : FPStore<0x3d, "sdc1", store, FGR64, mem>;
+ }
+ let Predicates = [NotMips64] in {
+ def LDC1 : FPLoad<0x35, "ldc1", load, AFGR64, mem>;
+ def SDC1 : FPStore<0x3d, "sdc1", store, AFGR64, mem>;
+ }
+}
/// Floating-point Aritmetic
-defm FADD : FFR1_4<0x10, "add", fadd, 1>;
-defm FDIV : FFR1_4<0x03, "div", fdiv>;
-defm FMUL : FFR1_4<0x02, "mul", fmul, 1>;
-defm FSUB : FFR1_4<0x01, "sub", fsub>;
+defm FADD : FFR2P_M<0x10, "add", fadd, 1>;
+defm FDIV : FFR2P_M<0x03, "div", fdiv>;
+defm FMUL : FFR2P_M<0x02, "mul", fmul, 1>;
+defm FSUB : FFR2P_M<0x01, "sub", fsub>;
//===----------------------------------------------------------------------===//
// Floating Point Branch Codes
@@ -217,8 +215,6 @@ defm FSUB : FFR1_4<0x01, "sub", fsub>;
// They must be kept in synch.
def MIPS_BRANCH_F : PatLeaf<(i32 0)>;
def MIPS_BRANCH_T : PatLeaf<(i32 1)>;
-def MIPS_BRANCH_FL : PatLeaf<(i32 2)>;
-def MIPS_BRANCH_TL : PatLeaf<(i32 3)>;
/// Floating Point Branch of False/True (Likely)
let isBranch=1, isTerminator=1, hasDelaySlot=1, base=0x8, Uses=[FCR31] in
@@ -228,8 +224,6 @@ let isBranch=1, isTerminator=1, hasDelaySlot=1, base=0x8, Uses=[FCR31] in
def BC1F : FBRANCH<MIPS_BRANCH_F, "bc1f">;
def BC1T : FBRANCH<MIPS_BRANCH_T, "bc1t">;
-def BC1FL : FBRANCH<MIPS_BRANCH_FL, "bc1fl">;
-def BC1TL : FBRANCH<MIPS_BRANCH_TL, "bc1tl">;
//===----------------------------------------------------------------------===//
// Floating Point Flag Conditions
@@ -254,7 +248,7 @@ def MIPS_FCOND_LE : PatLeaf<(i32 14)>;
def MIPS_FCOND_NGT : PatLeaf<(i32 15)>;
/// Floating Point Compare
-let hasDelaySlot = 1, Defs=[FCR31] in {
+let Defs=[FCR31] in {
def FCMP_S32 : FCC<0x0, (outs), (ins FGR32:$fs, FGR32:$ft, condcode:$cc),
"c.$cc.s\t$fs, $ft",
[(MipsFPCmp FGR32:$fs, FGR32:$ft, imm:$cc)]>;
@@ -262,7 +256,7 @@ let hasDelaySlot = 1, Defs=[FCR31] in {
def FCMP_D32 : FCC<0x1, (outs), (ins AFGR64:$fs, AFGR64:$ft, condcode:$cc),
"c.$cc.d\t$fs, $ft",
[(MipsFPCmp AFGR64:$fs, AFGR64:$ft, imm:$cc)]>,
- Requires<[In32BitMode]>;
+ Requires<[NotFP64bit]>;
}
@@ -280,7 +274,7 @@ class CondMovIntFP<RegisterClass RC, bits<5> fmt, bits<6> func,
def MOVZ_S : CondMovIntFP<FGR32, 16, 18, "movz.s">;
def MOVN_S : CondMovIntFP<FGR32, 16, 19, "movn.s">;
-let Predicates = [In32BitMode] in {
+let Predicates = [NotFP64bit] in {
def MOVZ_D : CondMovIntFP<AFGR64, 17, 18, "movz.d">;
def MOVN_D : CondMovIntFP<AFGR64, 17, 19, "movn.d">;
}
@@ -288,7 +282,7 @@ let Predicates = [In32BitMode] in {
defm : MovzPats<FGR32, MOVZ_S>;
defm : MovnPats<FGR32, MOVN_S>;
-let Predicates = [In32BitMode] in {
+let Predicates = [NotFP64bit] in {
defm : MovzPats<AFGR64, MOVZ_D>;
defm : MovnPats<AFGR64, MOVN_D>;
}
@@ -313,7 +307,7 @@ def MOVF : CondMovFPInt<MipsCMovFP_F, 0, "movf">;
def MOVT_S : CondMovFPFP<FGR32, MipsCMovFP_T, 16, 1, "movt.s">;
def MOVF_S : CondMovFPFP<FGR32, MipsCMovFP_F, 16, 0, "movf.s">;
-let Predicates = [In32BitMode] in {
+let Predicates = [NotFP64bit] in {
def MOVT_D : CondMovFPFP<AFGR64, MipsCMovFP_T, 17, 1, "movt.d">;
def MOVF_D : CondMovFPFP<AFGR64, MipsCMovFP_F, 17, 0, "movf.d">;
}
@@ -353,22 +347,16 @@ def fpimm0neg : PatLeaf<(fpimm), [{
}]>;
def : Pat<(f32 fpimm0), (MTC1 ZERO)>;
-def : Pat<(f32 fpimm0neg), (FNEG_S32 (MTC1 ZERO))>;
+def : Pat<(f32 fpimm0neg), (FNEG_S (MTC1 ZERO))>;
-def : Pat<(f32 (sint_to_fp CPURegs:$src)), (CVTS_W32 (MTC1 CPURegs:$src))>;
-def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVTD_W32 (MTC1 CPURegs:$src))>;
+def : Pat<(f32 (sint_to_fp CPURegs:$src)), (CVT_S_W (MTC1 CPURegs:$src))>;
+def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVT_D32_W (MTC1 CPURegs:$src))>;
-def : Pat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S32 FGR32:$src))>;
+def : Pat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S FGR32:$src))>;
def : Pat<(i32 (fp_to_sint AFGR64:$src)), (MFC1 (TRUNC_W_D32 AFGR64:$src))>;
-def : Pat<(i32 (bitconvert FGR32:$src)), (MFC1 FGR32:$src)>;
-def : Pat<(f32 (bitconvert CPURegs:$src)), (MTC1 CPURegs:$src)>;
-
-let Predicates = [In32BitMode] in {
- def : Pat<(f32 (fround AFGR64:$src)), (CVTS_D32 AFGR64:$src)>;
- def : Pat<(f64 (fextend FGR32:$src)), (CVTD_S32 FGR32:$src)>;
+let Predicates = [NotFP64bit] in {
+ def : Pat<(f32 (fround AFGR64:$src)), (CVT_S_D32 AFGR64:$src)>;
+ def : Pat<(f64 (fextend FGR32:$src)), (CVT_D32_S FGR32:$src)>;
}
-// MipsFPRound is only emitted for MipsI targets.
-def : Pat<(f32 (MipsFPRound AFGR64:$src)), (CVTW_D32 AFGR64:$src)>;
-
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index 9f55fb3..d246a26 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -44,7 +44,9 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
// Mips Pseudo Instructions Format
class MipsPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
- MipsInst<outs, ins, asmstr, pattern, IIPseudo>;
+ MipsInst<outs, ins, asmstr, pattern, IIPseudo> {
+ let isPseudo = 1;
+}
//===----------------------------------------------------------------------===//
// Format R instruction class in Mips : <|opcode|rs|rt|rd|shamt|funct|>
@@ -88,6 +90,21 @@ class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
let Inst{15-0} = imm16;
}
+class CBranchBase<bits<6> op, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> rs;
+ bits<5> rt;
+ bits<16> imm16;
+
+ let opcode = op;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = imm16;
+}
+
//===----------------------------------------------------------------------===//
// Format J instruction class in Mips : <|opcode|address|>
//===----------------------------------------------------------------------===//
@@ -224,4 +241,27 @@ class FFCMOV<bits<5> _fmt, bits<1> _tf, dag outs, dag ins, string asmstr,
let Inst{15-11} = fs;
let Inst{10-6} = fd;
let Inst{5-0} = 17;
-} \ No newline at end of file
+}
+
+// FP unary instructions without patterns.
+class FFR1<bits<6> funct, bits<5> fmt, string opstr, string fmtstr,
+ RegisterClass DstRC, RegisterClass SrcRC> :
+ FFR<0x11, funct, fmt, (outs DstRC:$fd), (ins SrcRC:$fs),
+ !strconcat(opstr, ".", fmtstr, "\t$fd, $fs"), []> {
+ let ft = 0;
+}
+
+// FP unary instructions with patterns.
+class FFR1P<bits<6> funct, bits<5> fmt, string opstr, string fmtstr,
+ RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode> :
+ FFR<0x11, funct, fmt, (outs DstRC:$fd), (ins SrcRC:$fs),
+ !strconcat(opstr, ".", fmtstr, "\t$fd, $fs"),
+ [(set DstRC:$fd, (OpNode SrcRC:$fs))]> {
+ let ft = 0;
+}
+
+class FFR2P<bits<6> funct, bits<5> fmt, string opstr,
+ string fmtstr, RegisterClass RC, SDNode OpNode> :
+ FFR<0x11, funct, fmt, (outs RC:$fd), (ins RC:$fs, RC:$ft),
+ !strconcat(opstr, ".", fmtstr, "\t$fd, $fs, $ft"),
+ [(set RC:$fd, (OpNode RC:$fs, RC:$ft))]>;
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 0a7a7f2..559943a 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -17,8 +17,8 @@
#include "InstPrinter/MipsInstPrinter.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/ADT/STLExtras.h"
#define GET_INSTRINFO_CTOR
@@ -28,7 +28,8 @@ using namespace llvm;
MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm)
: MipsGenInstrInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
- TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
+ TM(tm), IsN64(TM.getSubtarget<MipsSubtarget>().isABI_N64()),
+ RI(*TM.getSubtargetImpl(), *this) {}
const MipsRegisterInfo &MipsInstrInfo::getRegisterInfo() const {
@@ -47,8 +48,12 @@ static bool isZeroImm(const MachineOperand &op) {
unsigned MipsInstrInfo::
isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
{
- if ((MI->getOpcode() == Mips::LW) || (MI->getOpcode() == Mips::LWC1) ||
- (MI->getOpcode() == Mips::LDC1)) {
+ unsigned Opc = MI->getOpcode();
+
+ if ((Opc == Mips::LW) || (Opc == Mips::LW_P8) || (Opc == Mips::LD) ||
+ (Opc == Mips::LD_P8) || (Opc == Mips::LWC1) || (Opc == Mips::LWC1_P8) ||
+ (Opc == Mips::LDC1) || (Opc == Mips::LDC164) ||
+ (Opc == Mips::LDC164_P8)) {
if ((MI->getOperand(1).isFI()) && // is a stack slot
(MI->getOperand(2).isImm()) && // the imm is zero
(isZeroImm(MI->getOperand(2)))) {
@@ -68,8 +73,12 @@ isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
unsigned MipsInstrInfo::
isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
{
- if ((MI->getOpcode() == Mips::SW) || (MI->getOpcode() == Mips::SWC1) ||
- (MI->getOpcode() == Mips::SDC1)) {
+ unsigned Opc = MI->getOpcode();
+
+ if ((Opc == Mips::SW) || (Opc == Mips::SW_P8) || (Opc == Mips::SD) ||
+ (Opc == Mips::SD_P8) || (Opc == Mips::SWC1) || (Opc == Mips::SWC1_P8) ||
+ (Opc == Mips::SDC1) || (Opc == Mips::SDC164) ||
+ (Opc == Mips::SDC164_P8)) {
if ((MI->getOperand(1).isFI()) && // is a stack slot
(MI->getOperand(2).isImm()) && // the imm is zero
(isZeroImm(MI->getOperand(2)))) {
@@ -94,70 +103,63 @@ copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
- bool DestCPU = Mips::CPURegsRegClass.contains(DestReg);
- bool SrcCPU = Mips::CPURegsRegClass.contains(SrcReg);
-
- // CPU-CPU is the most common.
- if (DestCPU && SrcCPU) {
- BuildMI(MBB, I, DL, get(Mips::ADDu), DestReg).addReg(Mips::ZERO)
- .addReg(SrcReg, getKillRegState(KillSrc));
- return;
- }
+ unsigned Opc = 0, ZeroReg = 0;
- // Copy to CPU from other registers.
- if (DestCPU) {
- if (Mips::CCRRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(Mips::CFC1), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg.
+ if (Mips::CPURegsRegClass.contains(SrcReg))
+ Opc = Mips::ADDu, ZeroReg = Mips::ZERO;
+ else if (Mips::CCRRegClass.contains(SrcReg))
+ Opc = Mips::CFC1;
else if (Mips::FGR32RegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(Mips::MFC1), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ Opc = Mips::MFC1;
else if (SrcReg == Mips::HI)
- BuildMI(MBB, I, DL, get(Mips::MFHI), DestReg);
+ Opc = Mips::MFHI, SrcReg = 0;
else if (SrcReg == Mips::LO)
- BuildMI(MBB, I, DL, get(Mips::MFLO), DestReg);
- else
- llvm_unreachable("Copy to CPU from invalid register");
- return;
+ Opc = Mips::MFLO, SrcReg = 0;
}
-
- // Copy to other registers from CPU.
- if (SrcCPU) {
+ else if (Mips::CPURegsRegClass.contains(SrcReg)) { // Copy from CPU Reg.
if (Mips::CCRRegClass.contains(DestReg))
- BuildMI(MBB, I, DL, get(Mips::CTC1), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ Opc = Mips::CTC1;
else if (Mips::FGR32RegClass.contains(DestReg))
- BuildMI(MBB, I, DL, get(Mips::MTC1), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ Opc = Mips::MTC1;
else if (DestReg == Mips::HI)
- BuildMI(MBB, I, DL, get(Mips::MTHI))
- .addReg(SrcReg, getKillRegState(KillSrc));
+ Opc = Mips::MTHI, DestReg = 0;
else if (DestReg == Mips::LO)
- BuildMI(MBB, I, DL, get(Mips::MTLO))
- .addReg(SrcReg, getKillRegState(KillSrc));
- else
- llvm_unreachable("Copy from CPU to invalid register");
- return;
+ Opc = Mips::MTLO, DestReg = 0;
}
-
- if (Mips::FGR32RegClass.contains(DestReg, SrcReg)) {
- BuildMI(MBB, I, DL, get(Mips::FMOV_S32), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- return;
+ else if (Mips::FGR32RegClass.contains(DestReg, SrcReg))
+ Opc = Mips::FMOV_S;
+ else if (Mips::AFGR64RegClass.contains(DestReg, SrcReg))
+ Opc = Mips::FMOV_D32;
+ else if (Mips::CCRRegClass.contains(DestReg, SrcReg))
+ Opc = Mips::MOVCCRToCCR;
+ else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg.
+ if (Mips::CPU64RegsRegClass.contains(SrcReg))
+ Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64;
+ else if (SrcReg == Mips::HI64)
+ Opc = Mips::MFHI64, SrcReg = 0;
+ else if (SrcReg == Mips::LO64)
+ Opc = Mips::MFLO64, SrcReg = 0;
}
-
- if (Mips::AFGR64RegClass.contains(DestReg, SrcReg)) {
- BuildMI(MBB, I, DL, get(Mips::FMOV_D32), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- return;
+ else if (Mips::CPU64RegsRegClass.contains(SrcReg)) { // Copy from CPU64 Reg.
+ if (DestReg == Mips::HI64)
+ Opc = Mips::MTHI64, DestReg = 0;
+ else if (DestReg == Mips::LO64)
+ Opc = Mips::MTLO64, DestReg = 0;
}
- if (Mips::CCRRegClass.contains(DestReg, SrcReg)) {
- BuildMI(MBB, I, DL, get(Mips::MOVCCRToCCR), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- return;
- }
- llvm_unreachable("Cannot copy registers");
+ assert(Opc && "Cannot copy registers");
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc));
+
+ if (DestReg)
+ MIB.addReg(DestReg, RegState::Define);
+
+ if (ZeroReg)
+ MIB.addReg(ZeroReg);
+
+ if (SrcReg)
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
}
void MipsInstrInfo::
@@ -167,31 +169,22 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const TargetRegisterInfo *TRI) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
+ unsigned Opc = 0;
if (RC == Mips::CPURegsRegisterClass)
- BuildMI(MBB, I, DL, get(Mips::SW)).addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0);
+ Opc = IsN64 ? Mips::SW_P8 : Mips::SW;
+ else if (RC == Mips::CPU64RegsRegisterClass)
+ Opc = IsN64 ? Mips::SD_P8 : Mips::SD;
else if (RC == Mips::FGR32RegisterClass)
- BuildMI(MBB, I, DL, get(Mips::SWC1)).addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0);
- else if (RC == Mips::AFGR64RegisterClass) {
- if (!TM.getSubtarget<MipsSubtarget>().isMips1()) {
- BuildMI(MBB, I, DL, get(Mips::SDC1))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0);
- } else {
- const TargetRegisterInfo *TRI =
- MBB.getParent()->getTarget().getRegisterInfo();
- const unsigned *SubSet = TRI->getSubRegisters(SrcReg);
- BuildMI(MBB, I, DL, get(Mips::SWC1))
- .addReg(SubSet[0], getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0);
- BuildMI(MBB, I, DL, get(Mips::SWC1))
- .addReg(SubSet[1], getKillRegState(isKill))
- .addFrameIndex(FI).addImm(4);
- }
- } else
- llvm_unreachable("Register class not handled!");
+ Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1;
+ else if (RC == Mips::AFGR64RegisterClass)
+ Opc = Mips::SDC1;
+ else if (RC == Mips::FGR64RegisterClass)
+ Opc = IsN64 ? Mips::SDC164_P8 : Mips::SDC164;
+
+ assert(Opc && "Register class not handled!");
+ BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0);
}
void MipsInstrInfo::
@@ -202,25 +195,21 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
{
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
+ unsigned Opc = 0;
if (RC == Mips::CPURegsRegisterClass)
- BuildMI(MBB, I, DL, get(Mips::LW), DestReg).addFrameIndex(FI).addImm(0);
+ Opc = IsN64 ? Mips::LW_P8 : Mips::LW;
+ else if (RC == Mips::CPU64RegsRegisterClass)
+ Opc = IsN64 ? Mips::LD_P8 : Mips::LD;
else if (RC == Mips::FGR32RegisterClass)
- BuildMI(MBB, I, DL, get(Mips::LWC1), DestReg).addFrameIndex(FI).addImm(0);
- else if (RC == Mips::AFGR64RegisterClass) {
- if (!TM.getSubtarget<MipsSubtarget>().isMips1()) {
- BuildMI(MBB, I, DL, get(Mips::LDC1), DestReg).addFrameIndex(FI).addImm(0);
- } else {
- const TargetRegisterInfo *TRI =
- MBB.getParent()->getTarget().getRegisterInfo();
- const unsigned *SubSet = TRI->getSubRegisters(DestReg);
- BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[0])
- .addFrameIndex(FI).addImm(0);
- BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[1])
- .addFrameIndex(FI).addImm(4);
- }
- } else
- llvm_unreachable("Register class not handled!");
+ Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1;
+ else if (RC == Mips::AFGR64RegisterClass)
+ Opc = Mips::LDC1;
+ else if (RC == Mips::FGR64RegisterClass)
+ Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164;
+
+ assert(Opc && "Register class not handled!");
+ BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0);
}
MachineInstr*
@@ -237,9 +226,12 @@ MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
//===----------------------------------------------------------------------===//
static unsigned GetAnalyzableBrOpc(unsigned Opc) {
- return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ ||
- Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ ||
- Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::J) ? Opc : 0;
+ return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ ||
+ Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ ||
+ Opc == Mips::BEQ64 || Opc == Mips::BNE64 || Opc == Mips::BGTZ64 ||
+ Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 ||
+ Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::J) ?
+ Opc : 0;
}
/// GetOppositeBranchOpc - Return the inverse of the specified
@@ -248,14 +240,20 @@ unsigned Mips::GetOppositeBranchOpc(unsigned Opc)
{
switch (Opc) {
default: llvm_unreachable("Illegal opcode!");
- case Mips::BEQ : return Mips::BNE;
- case Mips::BNE : return Mips::BEQ;
- case Mips::BGTZ : return Mips::BLEZ;
- case Mips::BGEZ : return Mips::BLTZ;
- case Mips::BLTZ : return Mips::BGEZ;
- case Mips::BLEZ : return Mips::BGTZ;
- case Mips::BC1T : return Mips::BC1F;
- case Mips::BC1F : return Mips::BC1T;
+ case Mips::BEQ : return Mips::BNE;
+ case Mips::BNE : return Mips::BEQ;
+ case Mips::BGTZ : return Mips::BLEZ;
+ case Mips::BGEZ : return Mips::BLTZ;
+ case Mips::BLTZ : return Mips::BGEZ;
+ case Mips::BLEZ : return Mips::BGTZ;
+ case Mips::BEQ64 : return Mips::BNE64;
+ case Mips::BNE64 : return Mips::BEQ64;
+ case Mips::BGTZ64 : return Mips::BLEZ64;
+ case Mips::BGEZ64 : return Mips::BLTZ64;
+ case Mips::BLTZ64 : return Mips::BGEZ64;
+ case Mips::BLEZ64 : return Mips::BGTZ64;
+ case Mips::BC1T : return Mips::BC1F;
+ case Mips::BC1F : return Mips::BC1T;
}
}
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 4421c48..271d248 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -72,12 +72,47 @@ namespace MipsII {
/// MO_TPREL_HI/LO - Represents the hi and low part of the offset from
// the thread pointer (Local Exec TLS).
MO_TPREL_HI,
- MO_TPREL_LO
+ MO_TPREL_LO,
+
+ // N32/64 Flags.
+ MO_GPOFF_HI,
+ MO_GPOFF_LO,
+ MO_GOT_DISP,
+ MO_GOT_PAGE,
+ MO_GOT_OFST
+ };
+
+ enum {
+ //===------------------------------------------------------------------===//
+ // Instruction encodings. These are the standard/most common forms for
+ // Mips instructions.
+ //
+
+ // Pseudo - This represents an instruction that is a pseudo instruction
+ // or one that has not been implemented yet. It is illegal to code generate
+ // it, but tolerated for intermediate implementation stages.
+ Pseudo = 0,
+
+ /// FrmR - This form is for instructions of the format R.
+ FrmR = 1,
+ /// FrmI - This form is for instructions of the format I.
+ FrmI = 2,
+ /// FrmJ - This form is for instructions of the format J.
+ FrmJ = 3,
+ /// FrmFR - This form is for instructions of the format FR.
+ FrmFR = 4,
+ /// FrmFI - This form is for instructions of the format FI.
+ FrmFI = 5,
+ /// FrmOther - This form is for instructions that have no specific format.
+ FrmOther = 6,
+
+ FormMask = 15
};
}
class MipsInstrInfo : public MipsGenInstrInfo {
MipsTargetMachine &TM;
+ bool IsN64;
const MipsRegisterInfo RI;
public:
explicit MipsInstrInfo(MipsTargetMachine &TM);
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index d1a0587..06b7de7 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -34,13 +34,20 @@ def SDT_MipsMAddMSub : SDTypeProfile<0, 4,
SDTCisSameAs<1, 2>,
SDTCisSameAs<2, 3>]>;
def SDT_MipsDivRem : SDTypeProfile<0, 2,
- [SDTCisVT<0, i32>,
+ [SDTCisInt<0>,
SDTCisSameAs<0, 1>]>;
def SDT_MipsThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
def SDT_MipsDynAlloc : SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
SDTCisVT<1, iPTR>]>;
+def SDT_Sync : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+
+def SDT_Ext : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>, SDTCisSameAs<2, 3>]>;
+def SDT_Ins : SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>, SDTCisSameAs<2, 3>,
+ SDTCisSameAs<0, 4>]>;
// Call
def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink,
@@ -106,6 +113,11 @@ def MipsWrapperPIC : SDNode<"MipsISD::WrapperPIC", SDTIntUnaryOp>;
def MipsDynAlloc : SDNode<"MipsISD::DynAlloc", SDT_MipsDynAlloc,
[SDNPHasChain, SDNPInGlue]>;
+def MipsSync : SDNode<"MipsISD::Sync", SDT_Sync, [SDNPHasChain]>;
+
+def MipsExt : SDNode<"MipsISD::Ext", SDT_Ext>;
+def MipsIns : SDNode<"MipsISD::Ins", SDT_Ins>;
+
//===----------------------------------------------------------------------===//
// Mips Instruction Predicate Definitions.
//===----------------------------------------------------------------------===//
@@ -113,8 +125,13 @@ def HasSEInReg : Predicate<"Subtarget.hasSEInReg()">;
def HasBitCount : Predicate<"Subtarget.hasBitCount()">;
def HasSwap : Predicate<"Subtarget.hasSwap()">;
def HasCondMov : Predicate<"Subtarget.hasCondMov()">;
-def IsMips32 : Predicate<"Subtarget.isMips32()">;
-def IsMips32r2 : Predicate<"Subtarget.isMips32r2()">;
+def HasMips32 : Predicate<"Subtarget.hasMips32()">;
+def HasMips32r2 : Predicate<"Subtarget.hasMips32r2()">;
+def HasMips64 : Predicate<"Subtarget.hasMips64()">;
+def NotMips64 : Predicate<"!Subtarget.hasMips64()">;
+def HasMips64r2 : Predicate<"Subtarget.hasMips64r2()">;
+def IsN64 : Predicate<"Subtarget.isABI_N64()">;
+def NotN64 : Predicate<"!Subtarget.isABI_N64()">;
//===----------------------------------------------------------------------===//
// Mips Operand, Complex Patterns and Transformations Definitions.
@@ -124,6 +141,7 @@ def IsMips32r2 : Predicate<"Subtarget.isMips32r2()">;
def brtarget : Operand<OtherVT>;
def calltarget : Operand<i32>;
def simm16 : Operand<i32>;
+def simm16_64 : Operand<i64>;
def shamt : Operand<i32>;
// Unsigned Operand
@@ -137,6 +155,11 @@ def mem : Operand<i32> {
let MIOperandInfo = (ops CPURegs, simm16);
}
+def mem64 : Operand<i64> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops CPU64Regs, simm16_64);
+}
+
def mem_ea : Operand<i32> {
let PrintMethod = "printMemOperandEA";
let MIOperandInfo = (ops CPURegs, simm16);
@@ -177,36 +200,85 @@ def immZExt5 : PatLeaf<(imm), [{
def addr : ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], []>;
//===----------------------------------------------------------------------===//
+// Pattern fragment for load/store
+//===----------------------------------------------------------------------===//
+class UnalignedLoad<PatFrag Node> : PatFrag<(ops node:$ptr), (Node node:$ptr), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ return LD->getMemoryVT().getSizeInBits()/8 > LD->getAlignment();
+}]>;
+
+class AlignedLoad<PatFrag Node> : PatFrag<(ops node:$ptr), (Node node:$ptr), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ return LD->getMemoryVT().getSizeInBits()/8 <= LD->getAlignment();
+}]>;
+
+class UnalignedStore<PatFrag Node> : PatFrag<(ops node:$val, node:$ptr),
+ (Node node:$val, node:$ptr), [{
+ StoreSDNode *SD = cast<StoreSDNode>(N);
+ return SD->getMemoryVT().getSizeInBits()/8 > SD->getAlignment();
+}]>;
+
+class AlignedStore<PatFrag Node> : PatFrag<(ops node:$val, node:$ptr),
+ (Node node:$val, node:$ptr), [{
+ StoreSDNode *SD = cast<StoreSDNode>(N);
+ return SD->getMemoryVT().getSizeInBits()/8 <= SD->getAlignment();
+}]>;
+
+// Load/Store PatFrags.
+def sextloadi16_a : AlignedLoad<sextloadi16>;
+def zextloadi16_a : AlignedLoad<zextloadi16>;
+def extloadi16_a : AlignedLoad<extloadi16>;
+def load_a : AlignedLoad<load>;
+def sextloadi32_a : AlignedLoad<sextloadi32>;
+def zextloadi32_a : AlignedLoad<zextloadi32>;
+def extloadi32_a : AlignedLoad<extloadi32>;
+def truncstorei16_a : AlignedStore<truncstorei16>;
+def store_a : AlignedStore<store>;
+def truncstorei32_a : AlignedStore<truncstorei32>;
+def sextloadi16_u : UnalignedLoad<sextloadi16>;
+def zextloadi16_u : UnalignedLoad<zextloadi16>;
+def extloadi16_u : UnalignedLoad<extloadi16>;
+def load_u : UnalignedLoad<load>;
+def sextloadi32_u : UnalignedLoad<sextloadi32>;
+def zextloadi32_u : UnalignedLoad<zextloadi32>;
+def extloadi32_u : UnalignedLoad<extloadi32>;
+def truncstorei16_u : UnalignedStore<truncstorei16>;
+def store_u : UnalignedStore<store>;
+def truncstorei32_u : UnalignedStore<truncstorei32>;
+
+//===----------------------------------------------------------------------===//
// Instructions specific format
//===----------------------------------------------------------------------===//
-// Arithmetic 3 register operands
-class ArithR<bits<6> op, bits<6> func, string instr_asm, SDNode OpNode,
- InstrItinClass itin, bit isComm = 0>:
- FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"),
- [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin> {
+// Arithmetic and logical instructions with 3 register operands.
+class ArithLogicR<bits<6> op, bits<6> func, string instr_asm, SDNode OpNode,
+ InstrItinClass itin, RegisterClass RC, bit isComm = 0>:
+ FR<op, func, (outs RC:$rd), (ins RC:$rs, RC:$rt),
+ !strconcat(instr_asm, "\t$rd, $rs, $rt"),
+ [(set RC:$rd, (OpNode RC:$rs, RC:$rt))], itin> {
+ let shamt = 0;
let isCommutable = isComm;
}
class ArithOverflowR<bits<6> op, bits<6> func, string instr_asm,
- bit isComm = 0>:
- FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"), [], IIAlu> {
+ InstrItinClass itin, RegisterClass RC, bit isComm = 0>:
+ FR<op, func, (outs RC:$rd), (ins RC:$rs, RC:$rt),
+ !strconcat(instr_asm, "\t$rd, $rs, $rt"), [], itin> {
+ let shamt = 0;
let isCommutable = isComm;
}
-// Arithmetic 2 register operands
-class ArithI<bits<6> op, string instr_asm, SDNode OpNode,
- Operand Od, PatLeaf imm_type> :
- FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"),
- [(set CPURegs:$dst, (OpNode CPURegs:$b, imm_type:$c))], IIAlu>;
+// Arithmetic and logical instructions with 2 register operands.
+class ArithLogicI<bits<6> op, string instr_asm, SDNode OpNode,
+ Operand Od, PatLeaf imm_type, RegisterClass RC> :
+ FI<op, (outs RC:$rt), (ins RC:$rs, Od:$i),
+ !strconcat(instr_asm, "\t$rt, $rs, $i"),
+ [(set RC:$rt, (OpNode RC:$rs, imm_type:$i))], IIAlu>;
class ArithOverflowI<bits<6> op, string instr_asm, SDNode OpNode,
- Operand Od, PatLeaf imm_type> :
- FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"), [], IIAlu>;
+ Operand Od, PatLeaf imm_type, RegisterClass RC> :
+ FI<op, (outs RC:$rt), (ins RC:$rs, Od:$i),
+ !strconcat(instr_asm, "\t$rt, $rs, $i"), [], IIAlu>;
// Arithmetic Multiply ADD/SUB
let rd = 0, shamt = 0, Defs = [HI, LO], Uses = [HI, LO] in
@@ -214,92 +286,134 @@ class MArithR<bits<6> func, string instr_asm, SDNode op, bit isComm = 0> :
FR<0x1c, func, (outs), (ins CPURegs:$rs, CPURegs:$rt),
!strconcat(instr_asm, "\t$rs, $rt"),
[(op CPURegs:$rs, CPURegs:$rt, LO, HI)], IIImul> {
+ let rd = 0;
+ let shamt = 0;
let isCommutable = isComm;
}
// Logical
-let isCommutable = 1 in
-class LogicR<bits<6> func, string instr_asm, SDNode OpNode>:
- FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"),
- [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>;
-
-class LogicI<bits<6> op, string instr_asm, SDNode OpNode>:
- FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, uimm16:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"),
- [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt16:$c))], IIAlu>;
-
-let isCommutable = 1 in
-class LogicNOR<bits<6> op, bits<6> func, string instr_asm>:
- FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"),
- [(set CPURegs:$dst, (not (or CPURegs:$b, CPURegs:$c)))], IIAlu>;
+class LogicNOR<bits<6> op, bits<6> func, string instr_asm, RegisterClass RC>:
+ FR<op, func, (outs RC:$rd), (ins RC:$rs, RC:$rt),
+ !strconcat(instr_asm, "\t$rd, $rs, $rt"),
+ [(set RC:$rd, (not (or RC:$rs, RC:$rt)))], IIAlu> {
+ let shamt = 0;
+ let isCommutable = 1;
+}
// Shifts
class LogicR_shift_rotate_imm<bits<6> func, bits<5> _rs, string instr_asm,
SDNode OpNode>:
- FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$b, shamt:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"),
- [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt5:$c))], IIAlu> {
+ FR<0x00, func, (outs CPURegs:$rd), (ins CPURegs:$rt, shamt:$shamt),
+ !strconcat(instr_asm, "\t$rd, $rt, $shamt"),
+ [(set CPURegs:$rd, (OpNode CPURegs:$rt, (i32 immZExt5:$shamt)))], IIAlu> {
let rs = _rs;
}
-class LogicR_shift_rotate_reg<bits<6> func, bits<5> _shamt, string instr_asm,
+class LogicR_shift_rotate_reg<bits<6> func, bits<5> isRotate, string instr_asm,
SDNode OpNode>:
- FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b),
- !strconcat(instr_asm, "\t$dst, $b, $c"),
- [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu> {
- let shamt = _shamt;
+ FR<0x00, func, (outs CPURegs:$rd), (ins CPURegs:$rs, CPURegs:$rt),
+ !strconcat(instr_asm, "\t$rd, $rt, $rs"),
+ [(set CPURegs:$rd, (OpNode CPURegs:$rt, CPURegs:$rs))], IIAlu> {
+ let shamt = isRotate;
}
// Load Upper Imediate
class LoadUpper<bits<6> op, string instr_asm>:
- FI< op,
- (outs CPURegs:$dst),
- (ins uimm16:$imm),
- !strconcat(instr_asm, "\t$dst, $imm"),
- [], IIAlu>;
+ FI<op, (outs CPURegs:$rt), (ins uimm16:$imm),
+ !strconcat(instr_asm, "\t$rt, $imm"), [], IIAlu> {
+ let rs = 0;
+}
// Memory Load/Store
-let canFoldAsLoad = 1, hasDelaySlot = 1 in
-class LoadM<bits<6> op, string instr_asm, PatFrag OpNode>:
- FI<op, (outs CPURegs:$dst), (ins mem:$addr),
- !strconcat(instr_asm, "\t$dst, $addr"),
- [(set CPURegs:$dst, (OpNode addr:$addr))], IILoad>;
+let canFoldAsLoad = 1 in
+class LoadM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC,
+ Operand MemOpnd, bit Pseudo>:
+ FI<op, (outs RC:$rt), (ins MemOpnd:$addr),
+ !strconcat(instr_asm, "\t$rt, $addr"),
+ [(set RC:$rt, (OpNode addr:$addr))], IILoad> {
+ let isPseudo = Pseudo;
+}
-class StoreM<bits<6> op, string instr_asm, PatFrag OpNode>:
- FI<op, (outs), (ins CPURegs:$dst, mem:$addr),
- !strconcat(instr_asm, "\t$dst, $addr"),
- [(OpNode CPURegs:$dst, addr:$addr)], IIStore>;
+class StoreM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC,
+ Operand MemOpnd, bit Pseudo>:
+ FI<op, (outs), (ins RC:$rt, MemOpnd:$addr),
+ !strconcat(instr_asm, "\t$rt, $addr"),
+ [(OpNode RC:$rt, addr:$addr)], IIStore> {
+ let isPseudo = Pseudo;
+}
+
+// 32-bit load.
+multiclass LoadM32<bits<6> op, string instr_asm, PatFrag OpNode,
+ bit Pseudo = 0> {
+ def #NAME# : LoadM<op, instr_asm, OpNode, CPURegs, mem, Pseudo>,
+ Requires<[NotN64]>;
+ def _P8 : LoadM<op, instr_asm, OpNode, CPURegs, mem64, Pseudo>,
+ Requires<[IsN64]>;
+}
+
+// 64-bit load.
+multiclass LoadM64<bits<6> op, string instr_asm, PatFrag OpNode,
+ bit Pseudo = 0> {
+ def #NAME# : LoadM<op, instr_asm, OpNode, CPU64Regs, mem, Pseudo>,
+ Requires<[NotN64]>;
+ def _P8 : LoadM<op, instr_asm, OpNode, CPU64Regs, mem64, Pseudo>,
+ Requires<[IsN64]>;
+}
+
+// 32-bit store.
+multiclass StoreM32<bits<6> op, string instr_asm, PatFrag OpNode,
+ bit Pseudo = 0> {
+ def #NAME# : StoreM<op, instr_asm, OpNode, CPURegs, mem, Pseudo>,
+ Requires<[NotN64]>;
+ def _P8 : StoreM<op, instr_asm, OpNode, CPURegs, mem64, Pseudo>,
+ Requires<[IsN64]>;
+}
+
+// 64-bit store.
+multiclass StoreM64<bits<6> op, string instr_asm, PatFrag OpNode,
+ bit Pseudo = 0> {
+ def #NAME# : StoreM<op, instr_asm, OpNode, CPU64Regs, mem, Pseudo>,
+ Requires<[NotN64]>;
+ def _P8 : StoreM<op, instr_asm, OpNode, CPU64Regs, mem64, Pseudo>,
+ Requires<[IsN64]>;
+}
// Conditional Branch
-let isBranch = 1, isTerminator=1, hasDelaySlot = 1 in {
-class CBranch<bits<6> op, string instr_asm, PatFrag cond_op>:
- FI<op, (outs), (ins CPURegs:$a, CPURegs:$b, brtarget:$offset),
- !strconcat(instr_asm, "\t$a, $b, $offset"),
- [(brcond (cond_op CPURegs:$a, CPURegs:$b), bb:$offset)],
- IIBranch>;
+class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>:
+ CBranchBase<op, (outs), (ins RC:$rs, RC:$rt, brtarget:$offset),
+ !strconcat(instr_asm, "\t$rs, $rt, $offset"),
+ [(brcond (i32 (cond_op RC:$rs, RC:$rt)), bb:$offset)], IIBranch> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 1;
+}
-class CBranchZero<bits<6> op, string instr_asm, PatFrag cond_op>:
- FI<op, (outs), (ins CPURegs:$src, brtarget:$offset),
- !strconcat(instr_asm, "\t$src, $offset"),
- [(brcond (cond_op CPURegs:$src, 0), bb:$offset)],
- IIBranch>;
+class CBranchZero<bits<6> op, bits<5> _rt, string instr_asm, PatFrag cond_op,
+ RegisterClass RC>:
+ CBranchBase<op, (outs), (ins RC:$rs, brtarget:$offset),
+ !strconcat(instr_asm, "\t$rs, $offset"),
+ [(brcond (i32 (cond_op RC:$rs, 0)), bb:$offset)], IIBranch> {
+ let rt = _rt;
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 1;
}
// SetCC
-class SetCC_R<bits<6> op, bits<6> func, string instr_asm,
- PatFrag cond_op>:
- FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"),
- [(set CPURegs:$dst, (cond_op CPURegs:$b, CPURegs:$c))],
- IIAlu>;
+class SetCC_R<bits<6> op, bits<6> func, string instr_asm, PatFrag cond_op,
+ RegisterClass RC>:
+ FR<op, func, (outs CPURegs:$rd), (ins RC:$rs, RC:$rt),
+ !strconcat(instr_asm, "\t$rd, $rs, $rt"),
+ [(set CPURegs:$rd, (cond_op RC:$rs, RC:$rt))],
+ IIAlu> {
+ let shamt = 0;
+}
-class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op,
- Operand Od, PatLeaf imm_type>:
- FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"),
- [(set CPURegs:$dst, (cond_op CPURegs:$b, imm_type:$c))],
+class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op, Operand Od,
+ PatLeaf imm_type, RegisterClass RC>:
+ FI<op, (outs CPURegs:$rd), (ins RC:$rs, Od:$i),
+ !strconcat(instr_asm, "\t$rd, $rs, $i"),
+ [(set CPURegs:$rd, (cond_op RC:$rs, imm_type:$i))],
IIAlu>;
// Unconditional branch
@@ -310,8 +424,12 @@ class JumpFJ<bits<6> op, string instr_asm>:
let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1 in
class JumpFR<bits<6> op, bits<6> func, string instr_asm>:
- FR<op, func, (outs), (ins CPURegs:$target),
- !strconcat(instr_asm, "\t$target"), [(brind CPURegs:$target)], IIBranch>;
+ FR<op, func, (outs), (ins CPURegs:$rs),
+ !strconcat(instr_asm, "\t$rs"), [(brind CPURegs:$rs)], IIBranch> {
+ let rt = 0;
+ let rd = 0;
+ let shamt = 0;
+}
// Jump and Link (Call)
let isCall=1, hasDelaySlot=1,
@@ -323,76 +441,124 @@ let isCall=1, hasDelaySlot=1,
!strconcat(instr_asm, "\t$target"), [(MipsJmpLink imm:$target)],
IIBranch>;
- let rd=31 in
class JumpLinkReg<bits<6> op, bits<6> func, string instr_asm>:
FR<op, func, (outs), (ins CPURegs:$rs, variable_ops),
- !strconcat(instr_asm, "\t$rs"), [(MipsJmpLink CPURegs:$rs)], IIBranch>;
+ !strconcat(instr_asm, "\t$rs"), [(MipsJmpLink CPURegs:$rs)], IIBranch> {
+ let rt = 0;
+ let rd = 31;
+ let shamt = 0;
+ }
class BranchLink<string instr_asm>:
FI<0x1, (outs), (ins CPURegs:$rs, brtarget:$target, variable_ops),
- !strconcat(instr_asm, "\t$rs, $target"), [], IIBranch>;
+ !strconcat(instr_asm, "\t$rs, $target"), [], IIBranch> {
+ let rt = 0;
+ }
}
// Mul, Div
-let Defs = [HI, LO] in {
- let isCommutable = 1 in
- class Mul<bits<6> func, string instr_asm, InstrItinClass itin>:
- FR<0x00, func, (outs), (ins CPURegs:$a, CPURegs:$b),
- !strconcat(instr_asm, "\t$a, $b"), [], itin>;
+class Mul<bits<6> func, string instr_asm, InstrItinClass itin>:
+ FR<0x00, func, (outs), (ins CPURegs:$rs, CPURegs:$rt),
+ !strconcat(instr_asm, "\t$rs, $rt"), [], itin> {
+ let rd = 0;
+ let shamt = 0;
+ let isCommutable = 1;
+ let Defs = [HI, LO];
+}
- class Div<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>:
- FR<0x00, func, (outs), (ins CPURegs:$a, CPURegs:$b),
- !strconcat(instr_asm, "\t$$zero, $a, $b"),
- [(op CPURegs:$a, CPURegs:$b)], itin>;
+class Div<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>:
+ FR<0x00, func, (outs), (ins CPURegs:$rs, CPURegs:$rt),
+ !strconcat(instr_asm, "\t$$zero, $rs, $rt"),
+ [(op CPURegs:$rs, CPURegs:$rt)], itin> {
+ let rd = 0;
+ let shamt = 0;
+ let Defs = [HI, LO];
}
// Move from Hi/Lo
class MoveFromLOHI<bits<6> func, string instr_asm>:
- FR<0x00, func, (outs CPURegs:$dst), (ins),
- !strconcat(instr_asm, "\t$dst"), [], IIHiLo>;
+ FR<0x00, func, (outs CPURegs:$rd), (ins),
+ !strconcat(instr_asm, "\t$rd"), [], IIHiLo> {
+ let rs = 0;
+ let rt = 0;
+ let shamt = 0;
+}
class MoveToLOHI<bits<6> func, string instr_asm>:
- FR<0x00, func, (outs), (ins CPURegs:$src),
- !strconcat(instr_asm, "\t$src"), [], IIHiLo>;
+ FR<0x00, func, (outs), (ins CPURegs:$rs),
+ !strconcat(instr_asm, "\t$rs"), [], IIHiLo> {
+ let rt = 0;
+ let rd = 0;
+ let shamt = 0;
+}
class EffectiveAddress<string instr_asm> :
- FI<0x09, (outs CPURegs:$dst), (ins mem_ea:$addr),
- instr_asm, [(set CPURegs:$dst, addr:$addr)], IIAlu>;
+ FI<0x09, (outs CPURegs:$rt), (ins mem_ea:$addr),
+ instr_asm, [(set CPURegs:$rt, addr:$addr)], IIAlu>;
// Count Leading Ones/Zeros in Word
class CountLeading<bits<6> func, string instr_asm, list<dag> pattern>:
- FR<0x1c, func, (outs CPURegs:$dst), (ins CPURegs:$src),
- !strconcat(instr_asm, "\t$dst, $src"), pattern, IIAlu>,
+ FR<0x1c, func, (outs CPURegs:$rd), (ins CPURegs:$rs),
+ !strconcat(instr_asm, "\t$rd, $rs"), pattern, IIAlu>,
Requires<[HasBitCount]> {
let shamt = 0;
let rt = rd;
}
// Sign Extend in Register.
-class SignExtInReg<bits<6> func, string instr_asm, ValueType vt>:
- FR<0x3f, func, (outs CPURegs:$dst), (ins CPURegs:$src),
- !strconcat(instr_asm, "\t$dst, $src"),
- [(set CPURegs:$dst, (sext_inreg CPURegs:$src, vt))], NoItinerary>;
+class SignExtInReg<bits<5> sa, string instr_asm, ValueType vt>:
+ FR<0x3f, 0x20, (outs CPURegs:$rd), (ins CPURegs:$rt),
+ !strconcat(instr_asm, "\t$rd, $rt"),
+ [(set CPURegs:$rd, (sext_inreg CPURegs:$rt, vt))], NoItinerary> {
+ let rs = 0;
+ let shamt = sa;
+ let Predicates = [HasSEInReg];
+}
// Byte Swap
-class ByteSwap<bits<6> func, string instr_asm>:
- FR<0x1f, func, (outs CPURegs:$dst), (ins CPURegs:$src),
- !strconcat(instr_asm, "\t$dst, $src"),
- [(set CPURegs:$dst, (bswap CPURegs:$src))], NoItinerary>;
-
-// Conditional Move
-class CondMov<bits<6> func, string instr_asm, PatLeaf MovCode>:
- FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$F, CPURegs:$T,
- CPURegs:$cond), !strconcat(instr_asm, "\t$dst, $T, $cond"),
- [], NoItinerary>;
+class ByteSwap<bits<6> func, bits<5> sa, string instr_asm>:
+ FR<0x1f, func, (outs CPURegs:$rd), (ins CPURegs:$rt),
+ !strconcat(instr_asm, "\t$rd, $rt"),
+ [(set CPURegs:$rd, (bswap CPURegs:$rt))], NoItinerary> {
+ let rs = 0;
+ let shamt = sa;
+ let Predicates = [HasSwap];
+}
// Read Hardware
-class ReadHardware: FR<0x1f, 0x3b, (outs CPURegs:$dst), (ins HWRegs:$src),
- "rdhwr\t$dst, $src", [], IIAlu> {
+class ReadHardware: FR<0x1f, 0x3b, (outs CPURegs:$rt), (ins HWRegs:$rd),
+ "rdhwr\t$rt, $rd", [], IIAlu> {
let rs = 0;
let shamt = 0;
}
+// Ext and Ins
+class ExtIns<bits<6> _funct, string instr_asm, dag outs, dag ins,
+ list<dag> pattern, InstrItinClass itin>:
+ FR<0x1f, _funct, outs, ins, !strconcat(instr_asm, " $rt, $rs, $pos, $sz"),
+ pattern, itin>, Requires<[HasMips32r2]> {
+ bits<5> pos;
+ bits<5> sz;
+ let rd = sz;
+ let shamt = pos;
+}
+
+// Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*).
+class Atomic2Ops<PatFrag Op, string Opstr> :
+ MipsPseudo<(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+ !strconcat("atomic_", Opstr, "\t$dst, $ptr, $incr"),
+ [(set CPURegs:$dst,
+ (Op CPURegs:$ptr, CPURegs:$incr))]>;
+
+// Atomic Compare & Swap.
+class AtomicCmpSwap<PatFrag Op, string Width> :
+ MipsPseudo<(outs CPURegs:$dst),
+ (ins CPURegs:$ptr, CPURegs:$cmp, CPURegs:$swap),
+ !strconcat("atomic_cmp_swap_", Width,
+ "\t$dst, $ptr, $cmp, $swap"),
+ [(set CPURegs:$dst,
+ (Op CPURegs:$ptr, CPURegs:$cmp, CPURegs:$swap))]>;
+
//===----------------------------------------------------------------------===//
// Pseudo instructions
//===----------------------------------------------------------------------===//
@@ -427,112 +593,32 @@ def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>;
def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc), ".cprestore\t$loc", []>;
let usesCustomInserter = 1 in {
- def ATOMIC_LOAD_ADD_I8 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_add_8\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_add_8 CPURegs:$ptr, CPURegs:$incr))]>;
- def ATOMIC_LOAD_ADD_I16 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_add_16\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_add_16 CPURegs:$ptr, CPURegs:$incr))]>;
- def ATOMIC_LOAD_ADD_I32 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_add_32\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_add_32 CPURegs:$ptr, CPURegs:$incr))]>;
-
- def ATOMIC_LOAD_SUB_I8 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_sub_8\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_sub_8 CPURegs:$ptr, CPURegs:$incr))]>;
- def ATOMIC_LOAD_SUB_I16 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_sub_16\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_sub_16 CPURegs:$ptr, CPURegs:$incr))]>;
- def ATOMIC_LOAD_SUB_I32 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_sub_32\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_sub_32 CPURegs:$ptr, CPURegs:$incr))]>;
-
- def ATOMIC_LOAD_AND_I8 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_and_8\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_and_8 CPURegs:$ptr, CPURegs:$incr))]>;
- def ATOMIC_LOAD_AND_I16 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_and_16\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_and_16 CPURegs:$ptr, CPURegs:$incr))]>;
- def ATOMIC_LOAD_AND_I32 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_and_32\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_and_32 CPURegs:$ptr, CPURegs:$incr))]>;
-
- def ATOMIC_LOAD_OR_I8 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_or_8\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_or_8 CPURegs:$ptr, CPURegs:$incr))]>;
- def ATOMIC_LOAD_OR_I16 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_or_16\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_or_16 CPURegs:$ptr, CPURegs:$incr))]>;
- def ATOMIC_LOAD_OR_I32 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_or_32\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_or_32 CPURegs:$ptr, CPURegs:$incr))]>;
-
- def ATOMIC_LOAD_XOR_I8 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_xor_8\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_xor_8 CPURegs:$ptr, CPURegs:$incr))]>;
- def ATOMIC_LOAD_XOR_I16 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_xor_16\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_xor_16 CPURegs:$ptr, CPURegs:$incr))]>;
- def ATOMIC_LOAD_XOR_I32 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_xor_32\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_xor_32 CPURegs:$ptr, CPURegs:$incr))]>;
-
- def ATOMIC_LOAD_NAND_I8 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_nand_8\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_nand_8 CPURegs:$ptr, CPURegs:$incr))]>;
- def ATOMIC_LOAD_NAND_I16 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_nand_16\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_nand_16 CPURegs:$ptr, CPURegs:$incr))]>;
- def ATOMIC_LOAD_NAND_I32 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
- "atomic_load_nand_32\t$dst, $ptr, $incr",
- [(set CPURegs:$dst, (atomic_load_nand_32 CPURegs:$ptr, CPURegs:$incr))]>;
-
- def ATOMIC_SWAP_I8 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$val),
- "atomic_swap_8\t$dst, $ptr, $val",
- [(set CPURegs:$dst, (atomic_swap_8 CPURegs:$ptr, CPURegs:$val))]>;
- def ATOMIC_SWAP_I16 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$val),
- "atomic_swap_16\t$dst, $ptr, $val",
- [(set CPURegs:$dst, (atomic_swap_16 CPURegs:$ptr, CPURegs:$val))]>;
- def ATOMIC_SWAP_I32 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$val),
- "atomic_swap_32\t$dst, $ptr, $val",
- [(set CPURegs:$dst, (atomic_swap_32 CPURegs:$ptr, CPURegs:$val))]>;
-
- def ATOMIC_CMP_SWAP_I8 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval),
- "atomic_cmp_swap_8\t$dst, $ptr, $oldval, $newval",
- [(set CPURegs:$dst,
- (atomic_cmp_swap_8 CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval))]>;
- def ATOMIC_CMP_SWAP_I16 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval),
- "atomic_cmp_swap_16\t$dst, $ptr, $oldval, $newval",
- [(set CPURegs:$dst,
- (atomic_cmp_swap_16 CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval))]>;
- def ATOMIC_CMP_SWAP_I32 : MipsPseudo<
- (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval),
- "atomic_cmp_swap_32\t$dst, $ptr, $oldval, $newval",
- [(set CPURegs:$dst,
- (atomic_cmp_swap_32 CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval))]>;
+ def ATOMIC_LOAD_ADD_I8 : Atomic2Ops<atomic_load_add_8, "load_add_8">;
+ def ATOMIC_LOAD_ADD_I16 : Atomic2Ops<atomic_load_add_16, "load_add_16">;
+ def ATOMIC_LOAD_ADD_I32 : Atomic2Ops<atomic_load_add_32, "load_add_32">;
+ def ATOMIC_LOAD_SUB_I8 : Atomic2Ops<atomic_load_sub_8, "load_sub_8">;
+ def ATOMIC_LOAD_SUB_I16 : Atomic2Ops<atomic_load_sub_16, "load_sub_16">;
+ def ATOMIC_LOAD_SUB_I32 : Atomic2Ops<atomic_load_sub_32, "load_sub_32">;
+ def ATOMIC_LOAD_AND_I8 : Atomic2Ops<atomic_load_and_8, "load_and_8">;
+ def ATOMIC_LOAD_AND_I16 : Atomic2Ops<atomic_load_and_16, "load_and_16">;
+ def ATOMIC_LOAD_AND_I32 : Atomic2Ops<atomic_load_and_32, "load_and_32">;
+ def ATOMIC_LOAD_OR_I8 : Atomic2Ops<atomic_load_or_8, "load_or_8">;
+ def ATOMIC_LOAD_OR_I16 : Atomic2Ops<atomic_load_or_16, "load_or_16">;
+ def ATOMIC_LOAD_OR_I32 : Atomic2Ops<atomic_load_or_32, "load_or_32">;
+ def ATOMIC_LOAD_XOR_I8 : Atomic2Ops<atomic_load_xor_8, "load_xor_8">;
+ def ATOMIC_LOAD_XOR_I16 : Atomic2Ops<atomic_load_xor_16, "load_xor_16">;
+ def ATOMIC_LOAD_XOR_I32 : Atomic2Ops<atomic_load_xor_32, "load_xor_32">;
+ def ATOMIC_LOAD_NAND_I8 : Atomic2Ops<atomic_load_nand_8, "load_nand_8">;
+ def ATOMIC_LOAD_NAND_I16 : Atomic2Ops<atomic_load_nand_16, "load_nand_16">;
+ def ATOMIC_LOAD_NAND_I32 : Atomic2Ops<atomic_load_nand_32, "load_nand_32">;
+
+ def ATOMIC_SWAP_I8 : Atomic2Ops<atomic_swap_8, "swap_8">;
+ def ATOMIC_SWAP_I16 : Atomic2Ops<atomic_swap_16, "swap_16">;
+ def ATOMIC_SWAP_I32 : Atomic2Ops<atomic_swap_32, "swap_32">;
+
+ def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap<atomic_cmp_swap_8, "8">;
+ def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<atomic_cmp_swap_16, "16">;
+ def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<atomic_cmp_swap_32, "32">;
}
//===----------------------------------------------------------------------===//
@@ -544,26 +630,26 @@ let usesCustomInserter = 1 in {
//===----------------------------------------------------------------------===//
/// Arithmetic Instructions (ALU Immediate)
-def ADDiu : ArithI<0x09, "addiu", add, simm16, immSExt16>;
-def ADDi : ArithOverflowI<0x08, "addi", add, simm16, immSExt16>;
-def SLTi : SetCC_I<0x0a, "slti", setlt, simm16, immSExt16>;
-def SLTiu : SetCC_I<0x0b, "sltiu", setult, simm16, immSExt16>;
-def ANDi : LogicI<0x0c, "andi", and>;
-def ORi : LogicI<0x0d, "ori", or>;
-def XORi : LogicI<0x0e, "xori", xor>;
+def ADDiu : ArithLogicI<0x09, "addiu", add, simm16, immSExt16, CPURegs>;
+def ADDi : ArithOverflowI<0x08, "addi", add, simm16, immSExt16, CPURegs>;
+def SLTi : SetCC_I<0x0a, "slti", setlt, simm16, immSExt16, CPURegs>;
+def SLTiu : SetCC_I<0x0b, "sltiu", setult, simm16, immSExt16, CPURegs>;
+def ANDi : ArithLogicI<0x0c, "andi", and, uimm16, immZExt16, CPURegs>;
+def ORi : ArithLogicI<0x0d, "ori", or, uimm16, immZExt16, CPURegs>;
+def XORi : ArithLogicI<0x0e, "xori", xor, uimm16, immZExt16, CPURegs>;
def LUi : LoadUpper<0x0f, "lui">;
/// Arithmetic Instructions (3-Operand, R-Type)
-def ADDu : ArithR<0x00, 0x21, "addu", add, IIAlu, 1>;
-def SUBu : ArithR<0x00, 0x23, "subu", sub, IIAlu>;
-def ADD : ArithOverflowR<0x00, 0x20, "add", 1>;
-def SUB : ArithOverflowR<0x00, 0x22, "sub">;
-def SLT : SetCC_R<0x00, 0x2a, "slt", setlt>;
-def SLTu : SetCC_R<0x00, 0x2b, "sltu", setult>;
-def AND : LogicR<0x24, "and", and>;
-def OR : LogicR<0x25, "or", or>;
-def XOR : LogicR<0x26, "xor", xor>;
-def NOR : LogicNOR<0x00, 0x27, "nor">;
+def ADDu : ArithLogicR<0x00, 0x21, "addu", add, IIAlu, CPURegs, 1>;
+def SUBu : ArithLogicR<0x00, 0x23, "subu", sub, IIAlu, CPURegs>;
+def ADD : ArithOverflowR<0x00, 0x20, "add", IIAlu, CPURegs, 1>;
+def SUB : ArithOverflowR<0x00, 0x22, "sub", IIAlu, CPURegs>;
+def SLT : SetCC_R<0x00, 0x2a, "slt", setlt, CPURegs>;
+def SLTu : SetCC_R<0x00, 0x2b, "sltu", setult, CPURegs>;
+def AND : ArithLogicR<0x00, 0x24, "and", and, IIAlu, CPURegs, 1>;
+def OR : ArithLogicR<0x00, 0x25, "or", or, IIAlu, CPURegs, 1>;
+def XOR : ArithLogicR<0x00, 0x26, "xor", xor, IIAlu, CPURegs, 1>;
+def NOR : LogicNOR<0x00, 0x27, "nor", CPURegs>;
/// Shift Instructions
def SLL : LogicR_shift_rotate_imm<0x00, 0x00, "sll", shl>;
@@ -574,45 +660,58 @@ def SRLV : LogicR_shift_rotate_reg<0x06, 0x00, "srlv", srl>;
def SRAV : LogicR_shift_rotate_reg<0x07, 0x00, "srav", sra>;
// Rotate Instructions
-let Predicates = [IsMips32r2] in {
+let Predicates = [HasMips32r2] in {
def ROTR : LogicR_shift_rotate_imm<0x02, 0x01, "rotr", rotr>;
def ROTRV : LogicR_shift_rotate_reg<0x06, 0x01, "rotrv", rotr>;
}
/// Load and Store Instructions
-def LB : LoadM<0x20, "lb", sextloadi8>;
-def LBu : LoadM<0x24, "lbu", zextloadi8>;
-def LH : LoadM<0x21, "lh", sextloadi16>;
-def LHu : LoadM<0x25, "lhu", zextloadi16>;
-def LW : LoadM<0x23, "lw", load>;
-def SB : StoreM<0x28, "sb", truncstorei8>;
-def SH : StoreM<0x29, "sh", truncstorei16>;
-def SW : StoreM<0x2b, "sw", store>;
+/// aligned
+defm LB : LoadM32<0x20, "lb", sextloadi8>;
+defm LBu : LoadM32<0x24, "lbu", zextloadi8>;
+defm LH : LoadM32<0x21, "lh", sextloadi16_a>;
+defm LHu : LoadM32<0x25, "lhu", zextloadi16_a>;
+defm LW : LoadM32<0x23, "lw", load_a>;
+defm SB : StoreM32<0x28, "sb", truncstorei8>;
+defm SH : StoreM32<0x29, "sh", truncstorei16_a>;
+defm SW : StoreM32<0x2b, "sw", store_a>;
+
+/// unaligned
+defm ULH : LoadM32<0x21, "ulh", sextloadi16_u, 1>;
+defm ULHu : LoadM32<0x25, "ulhu", zextloadi16_u, 1>;
+defm ULW : LoadM32<0x23, "ulw", load_u, 1>;
+defm USH : StoreM32<0x29, "ush", truncstorei16_u, 1>;
+defm USW : StoreM32<0x2b, "usw", store_u, 1>;
+
+let hasSideEffects = 1 in
+def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype",
+ [(MipsSync imm:$stype)], NoItinerary>
+{
+ let opcode = 0;
+ let Inst{25-11} = 0;
+ let Inst{5-0} = 15;
+}
/// Load-linked, Store-conditional
-let hasDelaySlot = 1 in
+let mayLoad = 1 in
def LL : FI<0x30, (outs CPURegs:$dst), (ins mem:$addr),
"ll\t$dst, $addr", [], IILoad>;
-let Constraints = "$src = $dst" in
+let mayStore = 1, Constraints = "$src = $dst" in
def SC : FI<0x38, (outs CPURegs:$dst), (ins CPURegs:$src, mem:$addr),
"sc\t$src, $addr", [], IIStore>;
/// Jump and Branch Instructions
def J : JumpFJ<0x02, "j">;
-def JR : JumpFR<0x00, 0x08, "jr">;
+let isIndirectBranch = 1 in
+ def JR : JumpFR<0x00, 0x08, "jr">;
def JAL : JumpLink<0x03, "jal">;
def JALR : JumpLinkReg<0x00, 0x09, "jalr">;
-def BEQ : CBranch<0x04, "beq", seteq>;
-def BNE : CBranch<0x05, "bne", setne>;
-
-let rt=1 in
- def BGEZ : CBranchZero<0x01, "bgez", setge>;
-
-let rt=0 in {
- def BGTZ : CBranchZero<0x07, "bgtz", setgt>;
- def BLEZ : CBranchZero<0x07, "blez", setle>;
- def BLTZ : CBranchZero<0x01, "bltz", setlt>;
-}
+def BEQ : CBranch<0x04, "beq", seteq, CPURegs>;
+def BNE : CBranch<0x05, "bne", setne, CPURegs>;
+def BGEZ : CBranchZero<0x01, 1, "bgez", setge, CPURegs>;
+def BGTZ : CBranchZero<0x07, 0, "bgtz", setgt, CPURegs>;
+def BLEZ : CBranchZero<0x07, 0, "blez", setle, CPURegs>;
+def BLTZ : CBranchZero<0x01, 0, "bltz", setlt, CPURegs>;
def BGEZAL : BranchLink<"bgezal">;
def BLTZAL : BranchLink<"bltzal">;
@@ -639,40 +738,31 @@ let Uses = [LO] in
def MFLO : MoveFromLOHI<0x12, "mflo">;
/// Sign Ext In Register Instructions.
-let Predicates = [HasSEInReg] in {
- let shamt = 0x10, rs = 0 in
- def SEB : SignExtInReg<0x21, "seb", i8>;
-
- let shamt = 0x18, rs = 0 in
- def SEH : SignExtInReg<0x20, "seh", i16>;
-}
+def SEB : SignExtInReg<0x10, "seb", i8>;
+def SEH : SignExtInReg<0x18, "seh", i16>;
/// Count Leading
-def CLZ : CountLeading<0b100000, "clz",
- [(set CPURegs:$dst, (ctlz CPURegs:$src))]>;
-def CLO : CountLeading<0b100001, "clo",
- [(set CPURegs:$dst, (ctlz (not CPURegs:$src)))]>;
+def CLZ : CountLeading<0x20, "clz",
+ [(set CPURegs:$rd, (ctlz CPURegs:$rs))]>;
+def CLO : CountLeading<0x21, "clo",
+ [(set CPURegs:$rd, (ctlz (not CPURegs:$rs)))]>;
/// Byte Swap
-let Predicates = [HasSwap] in {
- let shamt = 0x3, rs = 0 in
- def WSBW : ByteSwap<0x20, "wsbw">;
-}
-
-/// Conditional Move
-def MIPS_CMOV_ZERO : PatLeaf<(i32 0)>;
-def MIPS_CMOV_NZERO : PatLeaf<(i32 1)>;
+def WSBW : ByteSwap<0x20, 0x2, "wsbw">;
// Conditional moves:
// These instructions are expanded in
// MipsISelLowering::EmitInstrWithCustomInserter if target does not have
// conditional move instructions.
// flag:int, data:int
-let usesCustomInserter = 1, shamt = 0, Constraints = "$F = $dst" in
- class CondMovIntInt<bits<6> funct, string instr_asm> :
- FR<0, funct, (outs CPURegs:$dst),
- (ins CPURegs:$T, CPURegs:$cond, CPURegs:$F),
- !strconcat(instr_asm, "\t$dst, $T, $cond"), [], NoItinerary>;
+class CondMovIntInt<bits<6> funct, string instr_asm> :
+ FR<0, funct, (outs CPURegs:$rd),
+ (ins CPURegs:$rs, CPURegs:$rt, CPURegs:$F),
+ !strconcat(instr_asm, "\t$rd, $rs, $rt"), [], NoItinerary> {
+ let shamt = 0;
+ let usesCustomInserter = 1;
+ let Constraints = "$F = $rd";
+}
def MOVZ_I : CondMovIntInt<0x0a, "movz">;
def MOVN_I : CondMovIntInt<0x0b, "movn">;
@@ -685,13 +775,13 @@ let addr=0 in
// instructions. The same not happens for stack address copies, so an
// add op with mem ComplexPattern is used and the stack address copy
// can be matched. It's similar to Sparc LEA_ADDRi
-def LEA_ADDiu : EffectiveAddress<"addiu\t$dst, $addr">;
+def LEA_ADDiu : EffectiveAddress<"addiu\t$rt, $addr">;
// DynAlloc node points to dynamically allocated stack space.
// $sp is added to the list of implicitly used registers to prevent dead code
// elimination from removing instructions that modify $sp.
let Uses = [SP] in
-def DynAlloc : EffectiveAddress<"addiu\t$dst, $addr">;
+def DynAlloc : EffectiveAddress<"addiu\t$rt, $addr">;
// MADD*/MSUB*
def MADD : MArithR<0, "madd", MipsMAdd, 1>;
@@ -701,10 +791,25 @@ def MSUBU : MArithR<5, "msubu", MipsMSubu>;
// MUL is a assembly macro in the current used ISAs. In recent ISA's
// it is a real instruction.
-def MUL : ArithR<0x1c, 0x02, "mul", mul, IIImul, 1>, Requires<[IsMips32]>;
+def MUL : ArithLogicR<0x1c, 0x02, "mul", mul, IIImul, CPURegs, 1>,
+ Requires<[HasMips32]>;
def RDHWR : ReadHardware;
+def EXT : ExtIns<0, "ext", (outs CPURegs:$rt),
+ (ins CPURegs:$rs, uimm16:$pos, uimm16:$sz),
+ [(set CPURegs:$rt,
+ (MipsExt CPURegs:$rs, immZExt5:$pos, immZExt5:$sz))],
+ NoItinerary>;
+
+let Constraints = "$src = $rt" in
+def INS : ExtIns<4, "ins", (outs CPURegs:$rt),
+ (ins CPURegs:$rs, uimm16:$pos, uimm16:$sz, CPURegs:$src),
+ [(set CPURegs:$rt,
+ (MipsIns CPURegs:$rs, immZExt5:$pos, immZExt5:$sz,
+ CPURegs:$src))],
+ NoItinerary>;
+
//===----------------------------------------------------------------------===//
// Arbitrary patterns that map to one or more instructions
//===----------------------------------------------------------------------===//
@@ -738,16 +843,20 @@ def : Pat<(MipsJmpLink (i32 texternalsym:$dst)),
// hi/lo relocs
def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
def : Pat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>;
+def : Pat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>;
+def : Pat<(MipsLo tblockaddress:$in), (ADDiu ZERO, tblockaddress:$in)>;
def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)),
(ADDiu CPURegs:$hi, tglobaladdr:$lo)>;
def : Pat<(add CPURegs:$hi, (MipsLo tblockaddress:$lo)),
(ADDiu CPURegs:$hi, tblockaddress:$lo)>;
def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>;
+def : Pat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>;
def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)),
(ADDiu CPURegs:$hi, tjumptable:$lo)>;
def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>;
+def : Pat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>;
def : Pat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)),
(ADDiu CPURegs:$hi, tconstpool:$lo)>;
@@ -763,6 +872,7 @@ def : Pat<(add CPURegs:$gp, (MipsTlsGd tglobaltlsaddr:$in)),
// tprel hi/lo
def : Pat<(MipsTprelHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>;
+def : Pat<(MipsTprelLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>;
def : Pat<(add CPURegs:$hi, (MipsTprelLo tglobaltlsaddr:$lo)),
(ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>;
@@ -784,60 +894,67 @@ def : Pat<(not CPURegs:$in),
// extended load and stores
def : Pat<(extloadi1 addr:$src), (LBu addr:$src)>;
def : Pat<(extloadi8 addr:$src), (LBu addr:$src)>;
-def : Pat<(extloadi16 addr:$src), (LHu addr:$src)>;
+def : Pat<(extloadi16_a addr:$src), (LHu addr:$src)>;
+def : Pat<(extloadi16_u addr:$src), (ULHu addr:$src)>;
// peepholes
def : Pat<(store (i32 0), addr:$dst), (SW ZERO, addr:$dst)>;
// brcond patterns
-def : Pat<(brcond (setne CPURegs:$lhs, 0), bb:$dst),
- (BNE CPURegs:$lhs, ZERO, bb:$dst)>;
-def : Pat<(brcond (seteq CPURegs:$lhs, 0), bb:$dst),
- (BEQ CPURegs:$lhs, ZERO, bb:$dst)>;
-
-def : Pat<(brcond (setge CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
- (BEQ (SLT CPURegs:$lhs, CPURegs:$rhs), ZERO, bb:$dst)>;
-def : Pat<(brcond (setuge CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
- (BEQ (SLTu CPURegs:$lhs, CPURegs:$rhs), ZERO, bb:$dst)>;
-def : Pat<(brcond (setge CPURegs:$lhs, immSExt16:$rhs), bb:$dst),
- (BEQ (SLTi CPURegs:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
-def : Pat<(brcond (setuge CPURegs:$lhs, immSExt16:$rhs), bb:$dst),
- (BEQ (SLTiu CPURegs:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
-
-def : Pat<(brcond (setle CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
- (BEQ (SLT CPURegs:$rhs, CPURegs:$lhs), ZERO, bb:$dst)>;
-def : Pat<(brcond (setule CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
- (BEQ (SLTu CPURegs:$rhs, CPURegs:$lhs), ZERO, bb:$dst)>;
-
-def : Pat<(brcond CPURegs:$cond, bb:$dst),
- (BNE CPURegs:$cond, ZERO, bb:$dst)>;
+multiclass BrcondPats<RegisterClass RC, Instruction BEQOp, Instruction BNEOp,
+ Instruction SLTOp, Instruction SLTuOp, Instruction SLTiOp,
+ Instruction SLTiuOp, Register ZEROReg> {
+def : Pat<(brcond (i32 (setne RC:$lhs, 0)), bb:$dst),
+ (BNEOp RC:$lhs, ZEROReg, bb:$dst)>;
+def : Pat<(brcond (i32 (seteq RC:$lhs, 0)), bb:$dst),
+ (BEQOp RC:$lhs, ZEROReg, bb:$dst)>;
+
+def : Pat<(brcond (i32 (setge RC:$lhs, RC:$rhs)), bb:$dst),
+ (BEQ (SLTOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (i32 (setuge RC:$lhs, RC:$rhs)), bb:$dst),
+ (BEQ (SLTuOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (i32 (setge RC:$lhs, immSExt16:$rhs)), bb:$dst),
+ (BEQ (SLTiOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (i32 (setuge RC:$lhs, immSExt16:$rhs)), bb:$dst),
+ (BEQ (SLTiuOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
+
+def : Pat<(brcond (i32 (setle RC:$lhs, RC:$rhs)), bb:$dst),
+ (BEQ (SLTOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (i32 (setule RC:$lhs, RC:$rhs)), bb:$dst),
+ (BEQ (SLTuOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>;
+
+def : Pat<(brcond RC:$cond, bb:$dst),
+ (BNEOp RC:$cond, ZEROReg, bb:$dst)>;
+}
+
+defm : BrcondPats<CPURegs, BEQ, BNE, SLT, SLTu, SLTi, SLTiu, ZERO>;
// select patterns
multiclass MovzPats<RegisterClass RC, Instruction MOVZInst> {
- def : Pat<(select (setge CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+ def : Pat<(select (i32 (setge CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
(MOVZInst RC:$T, (SLT CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
- def : Pat<(select (setuge CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+ def : Pat<(select (i32 (setuge CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
(MOVZInst RC:$T, (SLTu CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
- def : Pat<(select (setge CPURegs:$lhs, immSExt16:$rhs), RC:$T, RC:$F),
+ def : Pat<(select (i32 (setge CPURegs:$lhs, immSExt16:$rhs)), RC:$T, RC:$F),
(MOVZInst RC:$T, (SLTi CPURegs:$lhs, immSExt16:$rhs), RC:$F)>;
- def : Pat<(select (setuge CPURegs:$lh, immSExt16:$rh), RC:$T, RC:$F),
+ def : Pat<(select (i32 (setuge CPURegs:$lh, immSExt16:$rh)), RC:$T, RC:$F),
(MOVZInst RC:$T, (SLTiu CPURegs:$lh, immSExt16:$rh), RC:$F)>;
- def : Pat<(select (setle CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+ def : Pat<(select (i32 (setle CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
(MOVZInst RC:$T, (SLT CPURegs:$rhs, CPURegs:$lhs), RC:$F)>;
- def : Pat<(select (setule CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+ def : Pat<(select (i32 (setule CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
(MOVZInst RC:$T, (SLTu CPURegs:$rhs, CPURegs:$lhs), RC:$F)>;
- def : Pat<(select (seteq CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+ def : Pat<(select (i32 (seteq CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
(MOVZInst RC:$T, (XOR CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
- def : Pat<(select (seteq CPURegs:$lhs, 0), RC:$T, RC:$F),
+ def : Pat<(select (i32 (seteq CPURegs:$lhs, 0)), RC:$T, RC:$F),
(MOVZInst RC:$T, CPURegs:$lhs, RC:$F)>;
}
multiclass MovnPats<RegisterClass RC, Instruction MOVNInst> {
- def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+ def : Pat<(select (i32 (setne CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
(MOVNInst RC:$T, (XOR CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
def : Pat<(select CPURegs:$cond, RC:$T, RC:$F),
(MOVNInst RC:$T, CPURegs:$cond, RC:$F)>;
- def : Pat<(select (setne CPURegs:$lhs, 0), RC:$T, RC:$F),
+ def : Pat<(select (i32 (setne CPURegs:$lhs, 0)), RC:$T, RC:$F),
(MOVNInst RC:$T, CPURegs:$lhs, RC:$F)>;
}
@@ -845,30 +962,48 @@ defm : MovzPats<CPURegs, MOVZ_I>;
defm : MovnPats<CPURegs, MOVN_I>;
// setcc patterns
-def : Pat<(seteq CPURegs:$lhs, CPURegs:$rhs),
- (SLTu (XOR CPURegs:$lhs, CPURegs:$rhs), 1)>;
-def : Pat<(setne CPURegs:$lhs, CPURegs:$rhs),
- (SLTu ZERO, (XOR CPURegs:$lhs, CPURegs:$rhs))>;
-
-def : Pat<(setle CPURegs:$lhs, CPURegs:$rhs),
- (XORi (SLT CPURegs:$rhs, CPURegs:$lhs), 1)>;
-def : Pat<(setule CPURegs:$lhs, CPURegs:$rhs),
- (XORi (SLTu CPURegs:$rhs, CPURegs:$lhs), 1)>;
-
-def : Pat<(setgt CPURegs:$lhs, CPURegs:$rhs),
- (SLT CPURegs:$rhs, CPURegs:$lhs)>;
-def : Pat<(setugt CPURegs:$lhs, CPURegs:$rhs),
- (SLTu CPURegs:$rhs, CPURegs:$lhs)>;
-
-def : Pat<(setge CPURegs:$lhs, CPURegs:$rhs),
- (XORi (SLT CPURegs:$lhs, CPURegs:$rhs), 1)>;
-def : Pat<(setuge CPURegs:$lhs, CPURegs:$rhs),
- (XORi (SLTu CPURegs:$lhs, CPURegs:$rhs), 1)>;
-
-def : Pat<(setge CPURegs:$lhs, immSExt16:$rhs),
- (XORi (SLTi CPURegs:$lhs, immSExt16:$rhs), 1)>;
-def : Pat<(setuge CPURegs:$lhs, immSExt16:$rhs),
- (XORi (SLTiu CPURegs:$lhs, immSExt16:$rhs), 1)>;
+multiclass SeteqPats<RegisterClass RC, Instruction SLTiuOp, Instruction XOROp,
+ Instruction SLTuOp, Register ZEROReg> {
+ def : Pat<(seteq RC:$lhs, RC:$rhs),
+ (SLTiuOp (XOROp RC:$lhs, RC:$rhs), 1)>;
+ def : Pat<(setne RC:$lhs, RC:$rhs),
+ (SLTuOp ZEROReg, (XOROp RC:$lhs, RC:$rhs))>;
+}
+
+multiclass SetlePats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> {
+ def : Pat<(setle RC:$lhs, RC:$rhs),
+ (XORi (SLTOp RC:$rhs, RC:$lhs), 1)>;
+ def : Pat<(setule RC:$lhs, RC:$rhs),
+ (XORi (SLTuOp RC:$rhs, RC:$lhs), 1)>;
+}
+
+multiclass SetgtPats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> {
+ def : Pat<(setgt RC:$lhs, RC:$rhs),
+ (SLTOp RC:$rhs, RC:$lhs)>;
+ def : Pat<(setugt RC:$lhs, RC:$rhs),
+ (SLTuOp RC:$rhs, RC:$lhs)>;
+}
+
+multiclass SetgePats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> {
+ def : Pat<(setge RC:$lhs, RC:$rhs),
+ (XORi (SLTOp RC:$lhs, RC:$rhs), 1)>;
+ def : Pat<(setuge RC:$lhs, RC:$rhs),
+ (XORi (SLTuOp RC:$lhs, RC:$rhs), 1)>;
+}
+
+multiclass SetgeImmPats<RegisterClass RC, Instruction SLTiOp,
+ Instruction SLTiuOp> {
+ def : Pat<(setge RC:$lhs, immSExt16:$rhs),
+ (XORi (SLTiOp RC:$lhs, immSExt16:$rhs), 1)>;
+ def : Pat<(setuge RC:$lhs, immSExt16:$rhs),
+ (XORi (SLTiuOp RC:$lhs, immSExt16:$rhs), 1)>;
+}
+
+defm : SeteqPats<CPURegs, SLTiu, XOR, SLTu, ZERO>;
+defm : SetlePats<CPURegs, SLT, SLTu>;
+defm : SetgtPats<CPURegs, SLT, SLTu>;
+defm : SetgePats<CPURegs, SLT, SLTu>;
+defm : SetgeImmPats<CPURegs, SLTi, SLTiu>;
// select MipsDynAlloc
def : Pat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>;
@@ -878,4 +1013,5 @@ def : Pat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>;
//===----------------------------------------------------------------------===//
include "MipsInstrFPU.td"
+include "Mips64InstrInfo.td"
diff --git a/lib/Target/Mips/MipsJITInfo.cpp b/lib/Target/Mips/MipsJITInfo.cpp
new file mode 100644
index 0000000..28c2b48
--- /dev/null
+++ b/lib/Target/Mips/MipsJITInfo.cpp
@@ -0,0 +1,230 @@
+//===- MipsJITInfo.cpp - Implement the JIT interfaces for the Mips target -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the Mips target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "MipsJITInfo.h"
+#include "MipsInstrInfo.h"
+#include "MipsRelocations.h"
+#include "MipsSubtarget.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Memory.h"
+#include <cstdlib>
+using namespace llvm;
+
+
+void MipsJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ report_fatal_error("MipsJITInfo::replaceMachineCodeForFunction");
+}
+
+/// JITCompilerFunction - This contains the address of the JIT function used to
+/// compile a function lazily.
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+// Get the ASMPREFIX for the current host. This is often '_'.
+#ifndef __USER_LABEL_PREFIX__
+#define __USER_LABEL_PREFIX__
+#endif
+#define GETASMPREFIX2(X) #X
+#define GETASMPREFIX(X) GETASMPREFIX2(X)
+#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
+
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because the prolog/epilog inserted by GCC won't work for us. Instead,
+// write our own wrapper, which does things our way, so we have complete control
+// over register saving and restoring. This code saves registers, calls
+// MipsCompilationCallbackC and restores registers.
+extern "C" {
+#if defined (__mips__)
+void MipsCompilationCallback();
+
+ asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl " ASMPREFIX "MipsCompilationCallback\n"
+ ASMPREFIX "MipsCompilationCallback:\n"
+ ".ent " ASMPREFIX "MipsCompilationCallback\n"
+ ".frame $29, 32, $31\n"
+ ".set noreorder\n"
+ ".cpload $t9\n"
+
+ "addiu $sp, $sp, -60\n"
+ ".cprestore 16\n"
+
+ // Save argument registers a0, a1, a2, a3, f12, f14 since they may contain
+ // stuff for the real target function right now. We have to act as if this
+ // whole compilation callback doesn't exist as far as the caller is
+ // concerned. We also need to save the ra register since it contains the
+ // original return address, and t8 register since it contains the address
+ // of the end of function stub.
+ "sw $a0, 20($sp)\n"
+ "sw $a1, 24($sp)\n"
+ "sw $a2, 28($sp)\n"
+ "sw $a3, 32($sp)\n"
+ "sw $ra, 36($sp)\n"
+ "sw $t8, 40($sp)\n"
+ "sdc1 $f12, 44($sp)\n"
+ "sdc1 $f14, 52($sp)\n"
+
+ // t8 points at the end of function stub. Pass the beginning of the stub
+ // to the MipsCompilationCallbackC.
+ "addiu $a0, $t8, -16\n"
+ "jal " ASMPREFIX "MipsCompilationCallbackC\n"
+ "nop\n"
+
+ // Restore registers.
+ "lw $a0, 20($sp)\n"
+ "lw $a1, 24($sp)\n"
+ "lw $a2, 28($sp)\n"
+ "lw $a3, 32($sp)\n"
+ "lw $ra, 36($sp)\n"
+ "lw $t8, 40($sp)\n"
+ "ldc1 $f12, 44($sp)\n"
+ "ldc1 $f14, 52($sp)\n"
+ "addiu $sp, $sp, 60\n"
+
+ // Jump to the (newly modified) stub to invoke the real function.
+ "addiu $t8, $t8, -16\n"
+ "jr $t8\n"
+ "nop\n"
+
+ ".set reorder\n"
+ ".end " ASMPREFIX "MipsCompilationCallback\n"
+ );
+#else // host != Mips
+ void MipsCompilationCallback() {
+ llvm_unreachable(
+ "Cannot call MipsCompilationCallback() on a non-Mips arch!");
+ }
+#endif
+}
+
+/// MipsCompilationCallbackC - This is the target-specific function invoked
+/// by the function stub when we did not know the real target of a call.
+/// This function must locate the start of the stub or call site and pass
+/// it into the JIT compiler function.
+extern "C" void MipsCompilationCallbackC(intptr_t StubAddr) {
+ // Get the address of the compiled code for this function.
+ intptr_t NewVal = (intptr_t) JITCompilerFunction((void*) StubAddr);
+
+ // Rewrite the function stub so that we don't end up here every time we
+ // execute the call. We're replacing the first four instructions of the
+ // stub with code that jumps to the compiled function:
+ // lui $t9, %hi(NewVal)
+ // addiu $t9, $t9, %lo(NewVal)
+ // jr $t9
+ // nop
+
+ int Hi = ((unsigned)NewVal & 0xffff0000) >> 16;
+ if ((NewVal & 0x8000) != 0)
+ Hi++;
+ int Lo = (int)(NewVal & 0xffff);
+
+ *(intptr_t *)(StubAddr) = 0xf << 26 | 25 << 16 | Hi;
+ *(intptr_t *)(StubAddr + 4) = 9 << 26 | 25 << 21 | 25 << 16 | Lo;
+ *(intptr_t *)(StubAddr + 8) = 25 << 21 | 8;
+ *(intptr_t *)(StubAddr + 12) = 0;
+
+ sys::Memory::InvalidateInstructionCache((void*) StubAddr, 16);
+}
+
+TargetJITInfo::LazyResolverFn MipsJITInfo::getLazyResolverFunction(
+ JITCompilerFn F) {
+ JITCompilerFunction = F;
+ return MipsCompilationCallback;
+}
+
+TargetJITInfo::StubLayout MipsJITInfo::getStubLayout() {
+ // The stub contains 4 4-byte instructions, aligned at 4 bytes. See
+ // emitFunctionStub for details.
+ StubLayout Result = { 4*4, 4 };
+ return Result;
+}
+
+void *MipsJITInfo::emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE) {
+ JCE.emitAlignment(4);
+ void *Addr = (void*) (JCE.getCurrentPCValue());
+ if (!sys::Memory::setRangeWritable(Addr, 16))
+ llvm_unreachable("ERROR: Unable to mark stub writable.");
+
+ intptr_t EmittedAddr;
+ if (Fn != (void*)(intptr_t)MipsCompilationCallback)
+ EmittedAddr = (intptr_t)Fn;
+ else
+ EmittedAddr = (intptr_t)MipsCompilationCallback;
+
+
+ int Hi = ((unsigned)EmittedAddr & 0xffff0000) >> 16;
+ if ((EmittedAddr & 0x8000) != 0)
+ Hi++;
+ int Lo = (int)(EmittedAddr & 0xffff);
+
+ // lui t9, %hi(EmittedAddr)
+ // addiu t9, t9, %lo(EmittedAddr)
+ // jalr t8, t9
+ // nop
+ JCE.emitWordLE(0xf << 26 | 25 << 16 | Hi);
+ JCE.emitWordLE(9 << 26 | 25 << 21 | 25 << 16 | Lo);
+ JCE.emitWordLE(25 << 21 | 24 << 11 | 9);
+ JCE.emitWordLE(0);
+
+ sys::Memory::InvalidateInstructionCache(Addr, 16);
+ if (!sys::Memory::setRangeExecutable(Addr, 16))
+ llvm_unreachable("ERROR: Unable to mark stub executable.");
+
+ return Addr;
+}
+
+/// relocate - Before the JIT can run a block of code that has been emitted,
+/// it must rewrite the code to contain the actual addresses of any
+/// referenced global symbols.
+void MipsJITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase) {
+ for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+
+ void *RelocPos = (char*) Function + MR->getMachineCodeOffset();
+ intptr_t ResultPtr = (intptr_t) MR->getResultPointer();
+
+ switch ((Mips::RelocationType) MR->getRelocationType()) {
+ case Mips::reloc_mips_branch:
+ ResultPtr = (((ResultPtr - (intptr_t) RelocPos) - 4) >> 2) & 0xffff;
+ *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
+ break;
+
+ case Mips::reloc_mips_26:
+ ResultPtr = (ResultPtr & 0x0fffffff) >> 2;
+ *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
+ break;
+
+ case Mips::reloc_mips_hi:
+ ResultPtr = ResultPtr >> 16;
+ if ((((intptr_t) (MR->getResultPointer()) & 0xffff) >> 15) == 1) {
+ ResultPtr += 1;
+ }
+ *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
+ break;
+
+ case Mips::reloc_mips_lo:
+ ResultPtr = ResultPtr & 0xffff;
+ *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
+ break;
+
+ default:
+ llvm_unreachable("ERROR: Unknown Mips relocation.");
+ }
+ }
+}
diff --git a/lib/Target/Mips/MipsJITInfo.h b/lib/Target/Mips/MipsJITInfo.h
new file mode 100644
index 0000000..41f32a3
--- /dev/null
+++ b/lib/Target/Mips/MipsJITInfo.h
@@ -0,0 +1,70 @@
+//===- MipsJITInfo.h - Mips implementation of the JIT interface -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MipsJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSJITINFO_H
+#define MIPSJITINFO_H
+
+#include "MipsMachineFunction.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+class MipsTargetMachine;
+
+class MipsJITInfo : public TargetJITInfo {
+
+ bool IsPIC;
+
+ public:
+ explicit MipsJITInfo() :
+ IsPIC(false) {}
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ virtual void replaceMachineCodeForFunction(void *Old, void *New);
+
+ // getStubLayout - Returns the size and alignment of the largest call stub
+ // on Mips.
+ virtual StubLayout getStubLayout();
+
+ /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
+ /// small native function that simply calls the function at the specified
+ /// address.
+ virtual void *emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE);
+
+ /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
+ virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+
+ /// relocate - Before the JIT can run a block of code that has been emitted,
+ /// it must rewrite the code to contain the actual addresses of any
+ /// referenced global symbols.
+ virtual void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase);
+
+ /// Initialize - Initialize internal stage for the function being JITted.
+ void Initialize(const MachineFunction &MF, bool isPIC) {
+ IsPIC = isPIC;
+ }
+
+};
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index f5cc3aa..608a7d2 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -29,10 +29,10 @@ MipsMCInstLower::MipsMCInstLower(Mangler *mang, const MachineFunction &mf,
: Ctx(mf.getContext()), Mang(mang), AsmPrinter(asmprinter) {}
MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
- MachineOperandType MOTy) const {
+ MachineOperandType MOTy,
+ unsigned Offset) const {
MipsMCSymbolRefExpr::VariantKind Kind;
const MCSymbol *Symbol;
- int Offset = 0;
switch(MO.getTargetFlags()) {
default: assert(0 && "Invalid target flag!");
@@ -46,6 +46,11 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case MipsII::MO_GOTTPREL: Kind = MipsMCSymbolRefExpr::VK_Mips_GOTTPREL; break;
case MipsII::MO_TPREL_HI: Kind = MipsMCSymbolRefExpr::VK_Mips_TPREL_HI; break;
case MipsII::MO_TPREL_LO: Kind = MipsMCSymbolRefExpr::VK_Mips_TPREL_LO; break;
+ case MipsII::MO_GPOFF_HI: Kind = MipsMCSymbolRefExpr::VK_Mips_GPOFF_HI; break;
+ case MipsII::MO_GPOFF_LO: Kind = MipsMCSymbolRefExpr::VK_Mips_GPOFF_LO; break;
+ case MipsII::MO_GOT_DISP: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_DISP; break;
+ case MipsII::MO_GOT_PAGE: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_PAGE; break;
+ case MipsII::MO_GOT_OFST: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_OFST; break;
}
switch (MOTy) {
@@ -72,7 +77,7 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case MachineOperand::MO_ConstantPoolIndex:
Symbol = AsmPrinter.GetCPISymbol(MO.getIndex());
if (MO.getOffset())
- Offset = MO.getOffset();
+ Offset += MO.getOffset();
break;
default:
@@ -83,36 +88,39 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
Ctx));
}
+MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO) const {
+ MachineOperandType MOTy = MO.getType();
+
+ switch (MOTy) {
+ default:
+ assert(0 && "unknown operand type");
+ break;
+ case MachineOperand::MO_Register:
+ // Ignore all implicit register operands.
+ if (MO.isImplicit()) break;
+ return MCOperand::CreateReg(MO.getReg());
+ case MachineOperand::MO_Immediate:
+ return MCOperand::CreateImm(MO.getImm());
+ case MachineOperand::MO_MachineBasicBlock:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ case MachineOperand::MO_JumpTableIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_BlockAddress:
+ return LowerSymbolOperand(MO, MOTy, 0);
+ }
+
+ return MCOperand();
+}
+
void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
- MCOperand MCOp;
- MachineOperandType MOTy = MO.getType();
+ MCOperand MCOp = LowerOperand(MO);
- switch (MOTy) {
- default:
- MI->dump();
- llvm_unreachable("unknown operand type");
- case MachineOperand::MO_Register:
- // Ignore all implicit register operands.
- if (MO.isImplicit()) continue;
- MCOp = MCOperand::CreateReg(MO.getReg());
- break;
- case MachineOperand::MO_Immediate:
- MCOp = MCOperand::CreateImm(MO.getImm());
- break;
- case MachineOperand::MO_MachineBasicBlock:
- case MachineOperand::MO_GlobalAddress:
- case MachineOperand::MO_ExternalSymbol:
- case MachineOperand::MO_JumpTableIndex:
- case MachineOperand::MO_ConstantPoolIndex:
- case MachineOperand::MO_BlockAddress:
- MCOp = LowerSymbolOperand(MO, MOTy);
- break;
- }
-
- OutMI.addOperand(MCOp);
+ if (MCOp.isValid())
+ OutMI.addOperand(MCOp);
}
}
diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h
index ec5201b..223f23a 100644
--- a/lib/Target/Mips/MipsMCInstLower.h
+++ b/lib/Target/Mips/MipsMCInstLower.h
@@ -1,4 +1,4 @@
-//===-- MipsMCInstLower.h - Lower MachineInstr to MCInst -------------------===//
+//===-- MipsMCInstLower.h - Lower MachineInstr to MCInst -------------------==//
//
// The LLVM Compiler Infrastructure
//
@@ -36,7 +36,8 @@ public:
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
private:
MCOperand LowerSymbolOperand(const MachineOperand &MO,
- MachineOperandType MOTy) const;
+ MachineOperandType MOTy, unsigned Offset) const;
+ MCOperand LowerOperand(const MachineOperand& MO) const;
};
}
diff --git a/lib/Target/Mips/MipsMCSymbolRefExpr.cpp b/lib/Target/Mips/MipsMCSymbolRefExpr.cpp
index 9a2bdae..a0a242c 100644
--- a/lib/Target/Mips/MipsMCSymbolRefExpr.cpp
+++ b/lib/Target/Mips/MipsMCSymbolRefExpr.cpp
@@ -33,6 +33,11 @@ void MipsMCSymbolRefExpr::PrintImpl(raw_ostream &OS) const {
case VK_Mips_GOTTPREL: OS << "%gottprel("; break;
case VK_Mips_TPREL_HI: OS << "%tprel_hi("; break;
case VK_Mips_TPREL_LO: OS << "%tprel_lo("; break;
+ case VK_Mips_GPOFF_HI: OS << "%hi(%neg(%gp_rel("; break;
+ case VK_Mips_GPOFF_LO: OS << "%lo(%neg(%gp_rel("; break;
+ case VK_Mips_GOT_DISP: OS << "%got_disp("; break;
+ case VK_Mips_GOT_PAGE: OS << "%got_page("; break;
+ case VK_Mips_GOT_OFST: OS << "%got_ofst("; break;
}
OS << *Symbol;
@@ -43,7 +48,9 @@ void MipsMCSymbolRefExpr::PrintImpl(raw_ostream &OS) const {
OS << Offset;
}
- if (Kind != VK_Mips_None)
+ if (Kind == VK_Mips_GPOFF_HI || Kind == VK_Mips_GPOFF_LO)
+ OS << ")))";
+ else if (Kind != VK_Mips_None)
OS << ')';
}
diff --git a/lib/Target/Mips/MipsMCSymbolRefExpr.h b/lib/Target/Mips/MipsMCSymbolRefExpr.h
index 3e69596..55e85a7 100644
--- a/lib/Target/Mips/MipsMCSymbolRefExpr.h
+++ b/lib/Target/Mips/MipsMCSymbolRefExpr.h
@@ -25,7 +25,12 @@ public:
VK_Mips_TLSGD,
VK_Mips_GOTTPREL,
VK_Mips_TPREL_HI,
- VK_Mips_TPREL_LO
+ VK_Mips_TPREL_LO,
+ VK_Mips_GPOFF_HI,
+ VK_Mips_GPOFF_LO,
+ VK_Mips_GOT_DISP,
+ VK_Mips_GOT_PAGE,
+ VK_Mips_GOT_OFST
};
private:
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index dbb7a67..bc30b6b 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -51,16 +51,12 @@ private:
mutable int DynAllocFI; // Frame index of dynamically allocated stack area.
unsigned MaxCallFrameSize;
- /// AtomicFrameIndex - To implement atomic.swap and atomic.cmp.swap
- /// intrinsics, it is necessary to use a temporary stack location.
- /// This field holds the frame index of this location.
- int AtomicFrameIndex;
public:
MipsFunctionInfo(MachineFunction& MF)
: MF(MF), SRetReturnReg(0), GlobalBaseReg(0),
VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)),
OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), DynAllocFI(0),
- MaxCallFrameSize(0), AtomicFrameIndex(-1)
+ MaxCallFrameSize(0)
{}
bool isInArgFI(int FI) const {
@@ -104,9 +100,6 @@ public:
unsigned getMaxCallFrameSize() const { return MaxCallFrameSize; }
void setMaxCallFrameSize(unsigned S) { MaxCallFrameSize = S; }
-
- int getAtomicFrameIndex() const { return AtomicFrameIndex; }
- void setAtomicFrameIndex(int Index) { AtomicFrameIndex = Index; }
};
} // end of namespace llvm
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 24390da..f8c0fda 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -24,7 +24,6 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -44,7 +43,7 @@ using namespace llvm;
MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST,
const TargetInstrInfo &tii)
- : MipsGenRegisterInfo(), Subtarget(ST), TII(tii) {}
+ : MipsGenRegisterInfo(Mips::RA), Subtarget(ST), TII(tii) {}
/// getRegisterNumbering - Given the enum value for some register, e.g.
/// Mips::RA, return the number that it corresponds to (e.g. 31).
@@ -52,39 +51,87 @@ unsigned MipsRegisterInfo::
getRegisterNumbering(unsigned RegEnum)
{
switch (RegEnum) {
- case Mips::ZERO : case Mips::F0 : case Mips::D0 : return 0;
- case Mips::AT : case Mips::F1 : return 1;
- case Mips::V0 : case Mips::F2 : case Mips::D1 : return 2;
- case Mips::V1 : case Mips::F3 : return 3;
- case Mips::A0 : case Mips::F4 : case Mips::D2 : return 4;
- case Mips::A1 : case Mips::F5 : return 5;
- case Mips::A2 : case Mips::F6 : case Mips::D3 : return 6;
- case Mips::A3 : case Mips::F7 : return 7;
- case Mips::T0 : case Mips::F8 : case Mips::D4 : return 8;
- case Mips::T1 : case Mips::F9 : return 9;
- case Mips::T2 : case Mips::F10: case Mips::D5: return 10;
- case Mips::T3 : case Mips::F11: return 11;
- case Mips::T4 : case Mips::F12: case Mips::D6: return 12;
- case Mips::T5 : case Mips::F13: return 13;
- case Mips::T6 : case Mips::F14: case Mips::D7: return 14;
- case Mips::T7 : case Mips::F15: return 15;
- case Mips::S0 : case Mips::F16: case Mips::D8: return 16;
- case Mips::S1 : case Mips::F17: return 17;
- case Mips::S2 : case Mips::F18: case Mips::D9: return 18;
- case Mips::S3 : case Mips::F19: return 19;
- case Mips::S4 : case Mips::F20: case Mips::D10: return 20;
- case Mips::S5 : case Mips::F21: return 21;
- case Mips::S6 : case Mips::F22: case Mips::D11: return 22;
- case Mips::S7 : case Mips::F23: return 23;
- case Mips::T8 : case Mips::F24: case Mips::D12: return 24;
- case Mips::T9 : case Mips::F25: return 25;
- case Mips::K0 : case Mips::F26: case Mips::D13: return 26;
- case Mips::K1 : case Mips::F27: return 27;
- case Mips::GP : case Mips::F28: case Mips::D14: return 28;
- case Mips::SP : case Mips::F29: return 29;
- case Mips::FP : case Mips::F30: case Mips::D15: return 30;
- case Mips::RA : case Mips::F31: return 31;
- default: llvm_unreachable("Unknown register number!");
+ case Mips::ZERO: case Mips::ZERO_64: case Mips::F0: case Mips::D0_64:
+ case Mips::D0:
+ return 0;
+ case Mips::AT: case Mips::AT_64: case Mips::F1: case Mips::D1_64:
+ return 1;
+ case Mips::V0: case Mips::V0_64: case Mips::F2: case Mips::D2_64:
+ case Mips::D1:
+ return 2;
+ case Mips::V1: case Mips::V1_64: case Mips::F3: case Mips::D3_64:
+ return 3;
+ case Mips::A0: case Mips::A0_64: case Mips::F4: case Mips::D4_64:
+ case Mips::D2:
+ return 4;
+ case Mips::A1: case Mips::A1_64: case Mips::F5: case Mips::D5_64:
+ return 5;
+ case Mips::A2: case Mips::A2_64: case Mips::F6: case Mips::D6_64:
+ case Mips::D3:
+ return 6;
+ case Mips::A3: case Mips::A3_64: case Mips::F7: case Mips::D7_64:
+ return 7;
+ case Mips::T0: case Mips::T0_64: case Mips::F8: case Mips::D8_64:
+ case Mips::D4:
+ return 8;
+ case Mips::T1: case Mips::T1_64: case Mips::F9: case Mips::D9_64:
+ return 9;
+ case Mips::T2: case Mips::T2_64: case Mips::F10: case Mips::D10_64:
+ case Mips::D5:
+ return 10;
+ case Mips::T3: case Mips::T3_64: case Mips::F11: case Mips::D11_64:
+ return 11;
+ case Mips::T4: case Mips::T4_64: case Mips::F12: case Mips::D12_64:
+ case Mips::D6:
+ return 12;
+ case Mips::T5: case Mips::T5_64: case Mips::F13: case Mips::D13_64:
+ return 13;
+ case Mips::T6: case Mips::T6_64: case Mips::F14: case Mips::D14_64:
+ case Mips::D7:
+ return 14;
+ case Mips::T7: case Mips::T7_64: case Mips::F15: case Mips::D15_64:
+ return 15;
+ case Mips::S0: case Mips::S0_64: case Mips::F16: case Mips::D16_64:
+ case Mips::D8:
+ return 16;
+ case Mips::S1: case Mips::S1_64: case Mips::F17: case Mips::D17_64:
+ return 17;
+ case Mips::S2: case Mips::S2_64: case Mips::F18: case Mips::D18_64:
+ case Mips::D9:
+ return 18;
+ case Mips::S3: case Mips::S3_64: case Mips::F19: case Mips::D19_64:
+ return 19;
+ case Mips::S4: case Mips::S4_64: case Mips::F20: case Mips::D20_64:
+ case Mips::D10:
+ return 20;
+ case Mips::S5: case Mips::S5_64: case Mips::F21: case Mips::D21_64:
+ return 21;
+ case Mips::S6: case Mips::S6_64: case Mips::F22: case Mips::D22_64:
+ case Mips::D11:
+ return 22;
+ case Mips::S7: case Mips::S7_64: case Mips::F23: case Mips::D23_64:
+ return 23;
+ case Mips::T8: case Mips::T8_64: case Mips::F24: case Mips::D24_64:
+ case Mips::D12:
+ return 24;
+ case Mips::T9: case Mips::T9_64: case Mips::F25: case Mips::D25_64:
+ return 25;
+ case Mips::K0: case Mips::K0_64: case Mips::F26: case Mips::D26_64:
+ case Mips::D13:
+ return 26;
+ case Mips::K1: case Mips::K1_64: case Mips::F27: case Mips::D27_64:
+ return 27;
+ case Mips::GP: case Mips::GP_64: case Mips::F28: case Mips::D28_64:
+ case Mips::D14:
+ return 28;
+ case Mips::SP: case Mips::SP_64: case Mips::F29: case Mips::D29_64:
+ return 29;
+ case Mips::FP: case Mips::FP_64: case Mips::F30: case Mips::D30_64:
+ case Mips::D15:
+ return 30;
+ case Mips::RA: case Mips::RA_64: case Mips::F31: case Mips::D31_64:
+ return 31;
+ default: llvm_unreachable("Unknown register number!");
}
return 0; // Not reached
}
@@ -101,7 +148,7 @@ getCalleeSavedRegs(const MachineFunction *MF) const
{
// Mips callee-save register range is $16-$23, $f20-$f30
static const unsigned SingleFloatOnlyCalleeSavedRegs[] = {
- Mips::F30, Mips::F29, Mips::F28, Mips::F27, Mips::F26,
+ Mips::F31, Mips::F30, Mips::F29, Mips::F28, Mips::F27, Mips::F26,
Mips::F25, Mips::F24, Mips::F23, Mips::F22, Mips::F21, Mips::F20,
Mips::RA, Mips::FP, Mips::S7, Mips::S6, Mips::S5, Mips::S4,
Mips::S3, Mips::S2, Mips::S1, Mips::S0, 0
@@ -113,31 +160,71 @@ getCalleeSavedRegs(const MachineFunction *MF) const
Mips::S3, Mips::S2, Mips::S1, Mips::S0, 0
};
+ static const unsigned N32CalleeSavedRegs[] = {
+ Mips::D31_64, Mips::D29_64, Mips::D27_64, Mips::D25_64, Mips::D23_64,
+ Mips::D21_64,
+ Mips::RA_64, Mips::FP_64, Mips::GP_64, Mips::S7_64, Mips::S6_64,
+ Mips::S5_64, Mips::S4_64, Mips::S3_64, Mips::S2_64, Mips::S1_64,
+ Mips::S0_64, 0
+ };
+
+ static const unsigned N64CalleeSavedRegs[] = {
+ Mips::D31_64, Mips::D30_64, Mips::D29_64, Mips::D28_64, Mips::D27_64,
+ Mips::D26_64, Mips::D25_64, Mips::D24_64,
+ Mips::RA_64, Mips::FP_64, Mips::GP_64, Mips::S7_64, Mips::S6_64,
+ Mips::S5_64, Mips::S4_64, Mips::S3_64, Mips::S2_64, Mips::S1_64,
+ Mips::S0_64, 0
+ };
+
if (Subtarget.isSingleFloat())
return SingleFloatOnlyCalleeSavedRegs;
- else
+ else if (!Subtarget.hasMips64())
return Mips32CalleeSavedRegs;
+ else if (Subtarget.isABI_N32())
+ return N32CalleeSavedRegs;
+
+ assert(Subtarget.isABI_N64());
+ return N64CalleeSavedRegs;
}
BitVector MipsRegisterInfo::
getReservedRegs(const MachineFunction &MF) const {
+ static const unsigned ReservedCPURegs[] = {
+ Mips::ZERO, Mips::AT, Mips::K0, Mips::K1,
+ Mips::GP, Mips::SP, Mips::FP, Mips::RA, 0
+ };
+
+ static const unsigned ReservedCPU64Regs[] = {
+ Mips::ZERO_64, Mips::AT_64, Mips::K0_64, Mips::K1_64,
+ Mips::GP_64, Mips::SP_64, Mips::FP_64, Mips::RA_64, 0
+ };
+
BitVector Reserved(getNumRegs());
- Reserved.set(Mips::ZERO);
- Reserved.set(Mips::AT);
- Reserved.set(Mips::K0);
- Reserved.set(Mips::K1);
- Reserved.set(Mips::GP);
- Reserved.set(Mips::SP);
- Reserved.set(Mips::FP);
- Reserved.set(Mips::RA);
- Reserved.set(Mips::F31);
- Reserved.set(Mips::D15);
-
- // SRV4 requires that odd register can't be used.
- if (!Subtarget.isSingleFloat() && !Subtarget.isMips32())
- for (unsigned FReg=(Mips::F0)+1; FReg < Mips::F30; FReg+=2)
- Reserved.set(FReg);
+ typedef TargetRegisterClass::iterator RegIter;
+
+ for (const unsigned *Reg = ReservedCPURegs; *Reg; ++Reg)
+ Reserved.set(*Reg);
+ if (Subtarget.hasMips64()) {
+ for (const unsigned *Reg = ReservedCPU64Regs; *Reg; ++Reg)
+ Reserved.set(*Reg);
+
+ // Reserve all registers in AFGR64.
+ for (RegIter Reg = Mips::AFGR64RegisterClass->begin();
+ Reg != Mips::AFGR64RegisterClass->end(); ++Reg)
+ Reserved.set(*Reg);
+ }
+ else {
+ // Reserve all registers in CPU64Regs & FGR64.
+ for (RegIter Reg = Mips::CPU64RegsRegisterClass->begin();
+ Reg != Mips::CPU64RegsRegisterClass->end(); ++Reg)
+ Reserved.set(*Reg);
+
+ for (RegIter Reg = Mips::FGR64RegisterClass->begin();
+ Reg != Mips::FGR64RegisterClass->end(); ++Reg)
+ Reserved.set(*Reg);
+ }
+
return Reserved;
}
@@ -245,11 +332,6 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
}
unsigned MipsRegisterInfo::
-getRARegister() const {
- return Mips::RA;
-}
-
-unsigned MipsRegisterInfo::
getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
@@ -267,12 +349,3 @@ getEHHandlerRegister() const {
llvm_unreachable("What is the exception handler register");
return 0;
}
-
-int MipsRegisterInfo::
-getDwarfRegNum(unsigned RegNum, bool isEH) const {
- return MipsGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
-}
-
-int MipsRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
- return MipsGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0);
-}
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 646369b..67e57dd 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -57,15 +57,11 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo {
void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
/// Debug information queries.
- unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
/// Exception handling queries.
unsigned getEHExceptionRegister() const;
unsigned getEHHandlerRegister() const;
-
- int getDwarfRegNum(unsigned RegNum, bool isEH) const;
- int getLLVMRegNum(unsigned RegNum, bool isEH) const;
};
} // end namespace llvm
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index f0db518..925ad9e 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -10,6 +10,11 @@
//===----------------------------------------------------------------------===//
// Declarations that describe the MIPS register file
//===----------------------------------------------------------------------===//
+let Namespace = "Mips" in {
+def sub_fpeven : SubRegIndex;
+def sub_fpodd : SubRegIndex;
+def sub_32 : SubRegIndex;
+}
// We have banks of 32 registers each.
class MipsReg<string n> : Register<n> {
@@ -28,22 +33,31 @@ class MipsGPRReg<bits<5> num, string n> : MipsReg<n> {
let Num = num;
}
+// Mips 64-bit CPU Registers
+class Mips64GPRReg<bits<5> num, string n, list<Register> subregs>
+ : MipsRegWithSubRegs<n, subregs> {
+ let Num = num;
+ let SubRegIndices = [sub_32];
+}
+
// Mips 32-bit FPU Registers
class FPR<bits<5> num, string n> : MipsReg<n> {
let Num = num;
}
// Mips 64-bit (aliased) FPU Registers
-let Namespace = "Mips" in {
-def sub_fpeven : SubRegIndex;
-def sub_fpodd : SubRegIndex;
-}
class AFPR<bits<5> num, string n, list<Register> subregs>
: MipsRegWithSubRegs<n, subregs> {
let Num = num;
let SubRegIndices = [sub_fpeven, sub_fpodd];
}
+class AFPR64<bits<5> num, string n, list<Register> subregs>
+ : MipsRegWithSubRegs<n, subregs> {
+ let Num = num;
+ let SubRegIndices = [sub_32];
+}
+
// Mips Hardware Registers
class HWR<bits<5> num, string n> : MipsReg<n> {
let Num = num;
@@ -54,6 +68,7 @@ class HWR<bits<5> num, string n> : MipsReg<n> {
//===----------------------------------------------------------------------===//
let Namespace = "Mips" in {
+ // FIXME: Fix DwarfRegNum.
// General Purpose Registers
def ZERO : MipsGPRReg< 0, "ZERO">, DwarfRegNum<[0]>;
@@ -89,6 +104,40 @@ let Namespace = "Mips" in {
def FP : MipsGPRReg< 30, "FP">, DwarfRegNum<[30]>;
def RA : MipsGPRReg< 31, "RA">, DwarfRegNum<[31]>;
+ // General Purpose 64-bit Registers
+ def ZERO_64 : Mips64GPRReg< 0, "ZERO", [ZERO]>;
+ def AT_64 : Mips64GPRReg< 1, "AT", [AT]>;
+ def V0_64 : Mips64GPRReg< 2, "2", [V0]>;
+ def V1_64 : Mips64GPRReg< 3, "3", [V1]>;
+ def A0_64 : Mips64GPRReg< 4, "4", [A0]>;
+ def A1_64 : Mips64GPRReg< 5, "5", [A1]>;
+ def A2_64 : Mips64GPRReg< 6, "6", [A2]>;
+ def A3_64 : Mips64GPRReg< 7, "7", [A3]>;
+ def T0_64 : Mips64GPRReg< 8, "8", [T0]>;
+ def T1_64 : Mips64GPRReg< 9, "9", [T1]>;
+ def T2_64 : Mips64GPRReg< 10, "10", [T2]>;
+ def T3_64 : Mips64GPRReg< 11, "11", [T3]>;
+ def T4_64 : Mips64GPRReg< 12, "12", [T4]>;
+ def T5_64 : Mips64GPRReg< 13, "13", [T5]>;
+ def T6_64 : Mips64GPRReg< 14, "14", [T6]>;
+ def T7_64 : Mips64GPRReg< 15, "15", [T7]>;
+ def S0_64 : Mips64GPRReg< 16, "16", [S0]>;
+ def S1_64 : Mips64GPRReg< 17, "17", [S1]>;
+ def S2_64 : Mips64GPRReg< 18, "18", [S2]>;
+ def S3_64 : Mips64GPRReg< 19, "19", [S3]>;
+ def S4_64 : Mips64GPRReg< 20, "20", [S4]>;
+ def S5_64 : Mips64GPRReg< 21, "21", [S5]>;
+ def S6_64 : Mips64GPRReg< 22, "22", [S6]>;
+ def S7_64 : Mips64GPRReg< 23, "23", [S7]>;
+ def T8_64 : Mips64GPRReg< 24, "24", [T8]>;
+ def T9_64 : Mips64GPRReg< 25, "25", [T9]>;
+ def K0_64 : Mips64GPRReg< 26, "26", [K0]>;
+ def K1_64 : Mips64GPRReg< 27, "27", [K1]>;
+ def GP_64 : Mips64GPRReg< 28, "GP", [GP]>;
+ def SP_64 : Mips64GPRReg< 29, "SP", [SP]>;
+ def FP_64 : Mips64GPRReg< 30, "FP", [FP]>;
+ def RA_64 : Mips64GPRReg< 31, "RA", [RA]>;
+
/// Mips Single point precision FPU Registers
def F0 : FPR< 0, "F0">, DwarfRegNum<[32]>;
def F1 : FPR< 1, "F1">, DwarfRegNum<[33]>;
@@ -142,10 +191,49 @@ let Namespace = "Mips" in {
def D14 : AFPR<28, "F28", [F28, F29]>;
def D15 : AFPR<30, "F30", [F30, F31]>;
+ /// Mips Double point precision FPU Registers in MFP64 mode.
+ def D0_64 : AFPR64<0, "F0", [F0]>;
+ def D1_64 : AFPR64<1, "F1", [F1]>;
+ def D2_64 : AFPR64<2, "F2", [F2]>;
+ def D3_64 : AFPR64<3, "F3", [F3]>;
+ def D4_64 : AFPR64<4, "F4", [F4]>;
+ def D5_64 : AFPR64<5, "F5", [F5]>;
+ def D6_64 : AFPR64<6, "F6", [F6]>;
+ def D7_64 : AFPR64<7, "F7", [F7]>;
+ def D8_64 : AFPR64<8, "F8", [F8]>;
+ def D9_64 : AFPR64<9, "F9", [F9]>;
+ def D10_64 : AFPR64<10, "F10", [F10]>;
+ def D11_64 : AFPR64<11, "F11", [F11]>;
+ def D12_64 : AFPR64<12, "F12", [F12]>;
+ def D13_64 : AFPR64<13, "F13", [F13]>;
+ def D14_64 : AFPR64<14, "F14", [F14]>;
+ def D15_64 : AFPR64<15, "F15", [F15]>;
+ def D16_64 : AFPR64<16, "F16", [F16]>;
+ def D17_64 : AFPR64<17, "F17", [F17]>;
+ def D18_64 : AFPR64<18, "F18", [F18]>;
+ def D19_64 : AFPR64<19, "F19", [F19]>;
+ def D20_64 : AFPR64<20, "F20", [F20]>;
+ def D21_64 : AFPR64<21, "F21", [F21]>;
+ def D22_64 : AFPR64<22, "F22", [F22]>;
+ def D23_64 : AFPR64<23, "F23", [F23]>;
+ def D24_64 : AFPR64<24, "F24", [F24]>;
+ def D25_64 : AFPR64<25, "F25", [F25]>;
+ def D26_64 : AFPR64<26, "F26", [F26]>;
+ def D27_64 : AFPR64<27, "F27", [F27]>;
+ def D28_64 : AFPR64<28, "F28", [F28]>;
+ def D29_64 : AFPR64<29, "F29", [F29]>;
+ def D30_64 : AFPR64<30, "F30", [F30]>;
+ def D31_64 : AFPR64<31, "F31", [F31]>;
+
// Hi/Lo registers
def HI : Register<"hi">, DwarfRegNum<[64]>;
def LO : Register<"lo">, DwarfRegNum<[65]>;
+ let SubRegIndices = [sub_32] in {
+ def HI64 : RegisterWithSubRegs<"hi", [HI]>;
+ def LO64 : RegisterWithSubRegs<"lo", [LO]>;
+ }
+
// Status flags register
def FCR31 : Register<"31">;
@@ -167,6 +255,18 @@ def CPURegs : RegisterClass<"Mips", [i32], 32, (add
// Reserved
ZERO, AT, K0, K1, GP, SP, FP, RA)>;
+def CPU64Regs : RegisterClass<"Mips", [i64], 64, (add
+ // Return Values and Arguments
+ V0_64, V1_64, A0_64, A1_64, A2_64, A3_64,
+ // Not preserved across procedure calls
+ T0_64, T1_64, T2_64, T3_64, T4_64, T5_64, T6_64, T7_64, T8_64, T9_64,
+ // Callee save
+ S0_64, S1_64, S2_64, S3_64, S4_64, S5_64, S6_64, S7_64,
+ // Reserved
+ ZERO_64, AT_64, K0_64, K1_64, GP_64, SP_64, FP_64, RA_64)> {
+ let SubRegClasses = [(CPURegs sub_32)];
+}
+
// 64bit fp:
// * FGR64 - 32 64-bit registers
// * AFGR64 - 16 32-bit even registers (32-bit FP Mode)
@@ -182,17 +282,22 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64, (add
// Not preserved across procedure calls
D2, D3, D4, D5, D8, D9,
// Callee save
- D10, D11, D12, D13, D14,
- // Reserved
- D15)> {
+ D10, D11, D12, D13, D14, D15)> {
let SubRegClasses = [(FGR32 sub_fpeven, sub_fpodd)];
}
+def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)> {
+ let SubRegClasses = [(FGR32 sub_32)];
+}
+
// Condition Register for floating point operations
def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31)>;
// Hi/Lo Registers
def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>;
+def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)> {
+ let SubRegClasses = [(HILO sub_32)];
+}
// Hardware registers
def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>;
diff --git a/lib/Target/Mips/MipsRelocations.h b/lib/Target/Mips/MipsRelocations.h
new file mode 100644
index 0000000..66d1bfd
--- /dev/null
+++ b/lib/Target/Mips/MipsRelocations.h
@@ -0,0 +1,41 @@
+//===- MipsRelocations.h - Mips Code Relocations ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// This file defines the Mips target-specific relocation types
+// (for relocation-model=static).
+//
+//===---------------------------------------------------------------------===//
+
+#ifndef MIPSRELOCATIONS_H_
+#define MIPSRELOCATIONS_H_
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+ namespace Mips{
+ enum RelocationType {
+ // reloc_mips_branch - pc relative relocation for branches. The lower 18
+ // bits of the difference between the branch target and the branch
+ // instruction, shifted right by 2.
+ reloc_mips_branch = 1,
+
+ // reloc_mips_hi - upper 16 bits of the address (modified by +1 if the
+ // lower 16 bits of the address is negative).
+ reloc_mips_hi = 2,
+
+ // reloc_mips_lo - lower 16 bits of the address.
+ reloc_mips_lo = 3,
+
+ // reloc_mips_26 - lower 28 bits of the address, shifted right by 2.
+ reloc_mips_26 = 4
+ };
+ }
+}
+
+#endif /* MIPSRELOCATIONS_H_ */
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 6eee333..016d449 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -13,7 +13,7 @@
#include "MipsSubtarget.h"
#include "Mips.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
@@ -24,15 +24,14 @@ using namespace llvm;
MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, bool little) :
MipsGenSubtargetInfo(TT, CPU, FS),
- MipsArchVersion(Mips1), MipsABI(O32), IsLittle(little), IsSingleFloat(false),
- IsFP64bit(false), IsGP64bit(false), HasVFPU(false), IsLinux(true),
- HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false), HasMinMax(false),
- HasSwap(false), HasBitCount(false)
+ MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little),
+ IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false),
+ IsLinux(true), HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false),
+ HasMinMax(false), HasSwap(false), HasBitCount(false)
{
std::string CPUName = CPU;
if (CPUName.empty())
- CPUName = "mips1";
- MipsArchVersion = Mips1;
+ CPUName = "mips32r1";
// Parse features string.
ParseSubtargetFeatures(CPUName, FS);
@@ -40,23 +39,16 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUName);
+ // Set MipsABI if it hasn't been set yet.
+ if (MipsABI == UnknownABI)
+ MipsABI = hasMips64() ? N64 : O32;
+
+ // Check if Architecture and ABI are compatible.
+ assert(((!hasMips64() && (isABI_O32() || isABI_EABI())) ||
+ (hasMips64() && (isABI_N32() || isABI_N64()))) &&
+ "Invalid Arch & ABI pair.");
+
// Is the target system Linux ?
if (TT.find("linux") == std::string::npos)
IsLinux = false;
-
- // When only the target triple is specified and is
- // a allegrex target, set the features. We also match
- // big and little endian allegrex cores (dont really
- // know if a big one exists)
- if (TT.find("mipsallegrex") != std::string::npos ||
- TT.find("psp") != std::string::npos) {
- MipsABI = EABI;
- IsSingleFloat = true;
- MipsArchVersion = Mips2;
- HasVFPU = true; // Enables Allegrex Vector FPU (not supported yet)
- HasSEInReg = true;
- HasBitCount = true;
- HasSwap = true;
- HasCondMov = true;
- }
}
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 533d4af..d9dddad 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -27,14 +27,15 @@ class StringRef;
class MipsSubtarget : public MipsGenSubtargetInfo {
public:
+ // NOTE: O64 will not be supported.
enum MipsABIEnum {
- O32, O64, N32, N64, EABI
+ UnknownABI, O32, N32, N64, EABI
};
protected:
enum MipsArchEnum {
- Mips1, Mips2, Mips3, Mips4, Mips32, Mips32r2
+ Mips32, Mips32r2, Mips64, Mips64r2
};
// Mips architecture version
@@ -90,6 +91,8 @@ public:
/// Only O32 and EABI supported right now.
bool isABI_EABI() const { return MipsABI == EABI; }
+ bool isABI_N64() const { return MipsABI == N64; }
+ bool isABI_N32() const { return MipsABI == N32; }
bool isABI_O32() const { return MipsABI == O32; }
unsigned getTargetABI() const { return MipsABI; }
@@ -102,9 +105,11 @@ public:
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
- bool isMips1() const { return MipsArchVersion == Mips1; }
- bool isMips32() const { return MipsArchVersion >= Mips32; }
- bool isMips32r2() const { return MipsArchVersion == Mips32r2; }
+ bool hasMips32() const { return MipsArchVersion >= Mips32; }
+ bool hasMips32r2() const { return MipsArchVersion == Mips32r2 ||
+ MipsArchVersion == Mips64r2; }
+ bool hasMips64() const { return MipsArchVersion >= Mips64; }
+ bool hasMips64r2() const { return MipsArchVersion == Mips64r2; }
bool isLittle() const { return IsLittle; }
bool isFP64bit() const { return IsFP64bit; }
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 20b9f4e..6480da3 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -14,13 +14,15 @@
#include "Mips.h"
#include "MipsTargetMachine.h"
#include "llvm/PassManager.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
extern "C" void LLVMInitializeMipsTarget() {
// Register the target.
- RegisterTargetMachine<MipsTargetMachine> X(TheMipsTarget);
+ RegisterTargetMachine<MipsebTargetMachine> X(TheMipsTarget);
RegisterTargetMachine<MipselTargetMachine> Y(TheMipselTarget);
+ RegisterTargetMachine<Mips64ebTargetMachine> A(TheMips64Target);
+ RegisterTargetMachine<Mips64elTargetMachine> B(TheMips64elTarget);
}
// DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
@@ -31,30 +33,47 @@ extern "C" void LLVMInitializeMipsTarget() {
// an easier handling.
// Using CodeModel::Large enables different CALL behavior.
MipsTargetMachine::
-MipsTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS,
- bool isLittle=false):
- LLVMTargetMachine(T, TT, CPU, FS),
+MipsTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM,
+ bool isLittle):
+ LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
Subtarget(TT, CPU, FS, isLittle),
- DataLayout(isLittle ?
- std::string("e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") :
- std::string("E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")),
+ DataLayout(isLittle ?
+ (Subtarget.isABI_N64() ?
+ "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
+ "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") :
+ (Subtarget.isABI_N64() ?
+ "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
+ "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")),
InstrInfo(*this),
FrameLowering(Subtarget),
- TLInfo(*this), TSInfo(*this) {
- // Abicall enables PIC by default
- if (getRelocationModel() == Reloc::Default) {
- if (Subtarget.isABI_O32())
- setRelocationModel(Reloc::PIC_);
- else
- setRelocationModel(Reloc::Static);
- }
+ TLInfo(*this), TSInfo(*this), JITInfo() {
}
+MipsebTargetMachine::
+MipsebTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM) :
+ MipsTargetMachine(T, TT, CPU, FS, RM, CM, false) {}
+
MipselTargetMachine::
-MipselTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS) :
- MipsTargetMachine(T, TT, CPU, FS, true) {}
+MipselTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM) :
+ MipsTargetMachine(T, TT, CPU, FS, RM, CM, true) {}
+
+Mips64ebTargetMachine::
+Mips64ebTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM) :
+ MipsTargetMachine(T, TT, CPU, FS, RM, CM, false) {}
+
+Mips64elTargetMachine::
+Mips64elTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM) :
+ MipsTargetMachine(T, TT, CPU, FS, RM, CM, true) {}
// Install an instruction selector pass using
// the ISelDag to gen Mips code.
@@ -77,7 +96,10 @@ addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
bool MipsTargetMachine::
addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
- PM.add(createMipsEmitGPRestorePass(*this));
+ // Do not restore $gp if target is Mips64.
+ // In N32/64, $gp is a callee-saved register.
+ if (!Subtarget.hasMips64())
+ PM.add(createMipsEmitGPRestorePass(*this));
return true;
}
@@ -86,3 +108,12 @@ addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
PM.add(createMipsExpandPseudoPass(*this));
return true;
}
+
+bool MipsTargetMachine::addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ JITCodeEmitter &JCE) {
+ // Machine code emitter pass for Mips.
+ PM.add(createMipsJITCodeEmitterPass(*this, JCE));
+ return false;
+}
+
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index a021af2..118ed10 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -22,6 +22,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameLowering.h"
+#include "MipsJITInfo.h"
namespace llvm {
class formatted_raw_ostream;
@@ -33,9 +34,12 @@ namespace llvm {
MipsFrameLowering FrameLowering;
MipsTargetLowering TLInfo;
MipsSelectionDAGInfo TSInfo;
+ MipsJITInfo JITInfo;
+
public:
- MipsTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS,
+ MipsTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM,
bool isLittle);
virtual const MipsInstrInfo *getInstrInfo() const
@@ -46,6 +50,9 @@ namespace llvm {
{ return &Subtarget; }
virtual const TargetData *getTargetData() const
{ return &DataLayout;}
+ virtual MipsJITInfo *getJITInfo()
+ { return &JITInfo; }
+
virtual const MipsRegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
@@ -67,16 +74,47 @@ namespace llvm {
virtual bool addPreRegAlloc(PassManagerBase &PM,
CodeGenOpt::Level OptLevel);
virtual bool addPostRegAlloc(PassManagerBase &, CodeGenOpt::Level);
+ virtual bool addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ JITCodeEmitter &JCE);
+
};
-/// MipselTargetMachine - Mipsel target machine.
+/// MipsebTargetMachine - Mips32 big endian target machine.
+///
+class MipsebTargetMachine : public MipsTargetMachine {
+public:
+ MipsebTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+};
+
+/// MipselTargetMachine - Mips32 little endian target machine.
///
class MipselTargetMachine : public MipsTargetMachine {
public:
- MipselTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ MipselTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
};
+/// Mips64ebTargetMachine - Mips64 big endian target machine.
+///
+class Mips64ebTargetMachine : public MipsTargetMachine {
+public:
+ Mips64ebTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+};
+
+/// Mips64elTargetMachine - Mips64 little endian target machine.
+///
+class Mips64elTargetMachine : public MipsTargetMachine {
+public:
+ Mips64elTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+};
} // End llvm namespace
#endif
diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp
index cf5d1b5..05c46f5 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -79,7 +79,7 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
if (Kind.isMergeable1ByteCString())
return false;
- const Type *Ty = GV->getType()->getElementType();
+ Type *Ty = GV->getType()->getElementType();
return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty));
}
diff --git a/lib/Target/Mips/TargetInfo/CMakeLists.txt b/lib/Target/Mips/TargetInfo/CMakeLists.txt
index 6e5d56b..5692604 100644
--- a/lib/Target/Mips/TargetInfo/CMakeLists.txt
+++ b/lib/Target/Mips/TargetInfo/CMakeLists.txt
@@ -4,4 +4,10 @@ add_llvm_library(LLVMMipsInfo
MipsTargetInfo.cpp
)
-add_dependencies(LLVMMipsInfo MipsCodeGenTable_gen)
+add_llvm_library_dependencies(LLVMMipsInfo
+ LLVMMC
+ LLVMSupport
+ LLVMTarget
+ )
+
+add_dependencies(LLVMMipsInfo MipsCommonTableGen)
diff --git a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
index a8d6fe9..243632b 100644
--- a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
+++ b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
@@ -9,13 +9,23 @@
#include "Mips.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
Target llvm::TheMipsTarget, llvm::TheMipselTarget;
+Target llvm::TheMips64Target, llvm::TheMips64elTarget;
extern "C" void LLVMInitializeMipsTargetInfo() {
- RegisterTarget<Triple::mips> X(TheMipsTarget, "mips", "Mips");
+ RegisterTarget<Triple::mips,
+ /*HasJIT=*/true> X(TheMipsTarget, "mips", "Mips");
- RegisterTarget<Triple::mipsel> Y(TheMipselTarget, "mipsel", "Mipsel");
+ RegisterTarget<Triple::mipsel,
+ /*HasJIT=*/true> Y(TheMipselTarget, "mipsel", "Mipsel");
+
+ RegisterTarget<Triple::mips64,
+ /*HasJIT=*/false> A(TheMips64Target, "mips64", "Mips64 [experimental]");
+
+ RegisterTarget<Triple::mips64el,
+ /*HasJIT=*/false> B(TheMips64elTarget,
+ "mips64el", "Mips64el [experimental]");
}
diff --git a/lib/Target/PTX/CMakeLists.txt b/lib/Target/PTX/CMakeLists.txt
index ce08916..6e87b17 100644
--- a/lib/Target/PTX/CMakeLists.txt
+++ b/lib/Target/PTX/CMakeLists.txt
@@ -1,24 +1,44 @@
set(LLVM_TARGET_DEFINITIONS PTX.td)
-tablegen(PTXGenAsmWriter.inc -gen-asm-writer)
-tablegen(PTXGenCallingConv.inc -gen-callingconv)
-tablegen(PTXGenDAGISel.inc -gen-dag-isel)
-tablegen(PTXGenInstrInfo.inc -gen-instr-info)
-tablegen(PTXGenRegisterInfo.inc -gen-register-info)
-tablegen(PTXGenSubtargetInfo.inc -gen-subtarget)
+llvm_tablegen(PTXGenAsmWriter.inc -gen-asm-writer)
+llvm_tablegen(PTXGenDAGISel.inc -gen-dag-isel)
+llvm_tablegen(PTXGenInstrInfo.inc -gen-instr-info)
+llvm_tablegen(PTXGenRegisterInfo.inc -gen-register-info)
+llvm_tablegen(PTXGenSubtargetInfo.inc -gen-subtarget)
+add_public_tablegen_target(PTXCommonTableGen)
add_llvm_target(PTXCodeGen
PTXAsmPrinter.cpp
PTXISelDAGToDAG.cpp
PTXISelLowering.cpp
PTXInstrInfo.cpp
+ PTXFPRoundingModePass.cpp
PTXFrameLowering.cpp
PTXMCAsmStreamer.cpp
+ PTXMCInstLower.cpp
PTXMFInfoExtract.cpp
+ PTXParamManager.cpp
+ PTXRegAlloc.cpp
PTXRegisterInfo.cpp
+ PTXSelectionDAGInfo.cpp
PTXSubtarget.cpp
PTXTargetMachine.cpp
)
+add_llvm_library_dependencies(LLVMPTXCodeGen
+ LLVMAnalysis
+ LLVMAsmPrinter
+ LLVMCodeGen
+ LLVMCore
+ LLVMMC
+ LLVMPTXDesc
+ LLVMPTXInfo
+ LLVMSelectionDAG
+ LLVMSupport
+ LLVMTarget
+ )
+
add_subdirectory(TargetInfo)
+add_subdirectory(InstPrinter)
add_subdirectory(MCTargetDesc)
+
diff --git a/lib/Target/PTX/InstPrinter/CMakeLists.txt b/lib/Target/PTX/InstPrinter/CMakeLists.txt
new file mode 100644
index 0000000..029d060
--- /dev/null
+++ b/lib/Target/PTX/InstPrinter/CMakeLists.txt
@@ -0,0 +1,13 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMPTXAsmPrinter
+ PTXInstPrinter.cpp
+ )
+
+add_dependencies(LLVMPTXAsmPrinter PTXCommonTableGen)
+
+add_llvm_library_dependencies(LLVMPTXAsmPrinter
+ LLVMMC
+ LLVMSupport
+ )
+
diff --git a/lib/Target/PTX/InstPrinter/Makefile b/lib/Target/PTX/InstPrinter/Makefile
new file mode 100644
index 0000000..0ccfe44
--- /dev/null
+++ b/lib/Target/PTX/InstPrinter/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/PTX/AsmPrinter/Makefile ------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPTXAsmPrinter
+
+# Hack: we need to include 'main' ptx target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp
new file mode 100644
index 0000000..aabb404
--- /dev/null
+++ b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp
@@ -0,0 +1,192 @@
+//===-- PTXInstPrinter.cpp - Convert PTX MCInst to assembly syntax --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a PTX MCInst to a .ptx file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "PTXInstPrinter.h"
+#include "MCTargetDesc/PTXBaseInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#include "PTXGenAsmWriter.inc"
+
+PTXInstPrinter::PTXInstPrinter(const MCAsmInfo &MAI,
+ const MCSubtargetInfo &STI) :
+ MCInstPrinter(MAI) {
+ // Initialize the set of available features.
+ setAvailableFeatures(STI.getFeatureBits());
+}
+
+StringRef PTXInstPrinter::getOpcodeName(unsigned Opcode) const {
+ return getInstructionName(Opcode);
+}
+
+void PTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << getRegisterName(RegNo);
+}
+
+void PTXInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
+ printPredicate(MI, O);
+ switch (MI->getOpcode()) {
+ default:
+ printInstruction(MI, O);
+ break;
+ case PTX::CALL:
+ printCall(MI, O);
+ }
+ O << ";";
+ printAnnotation(O, Annot);
+}
+
+void PTXInstPrinter::printPredicate(const MCInst *MI, raw_ostream &O) {
+ // The last two operands are the predicate operands
+ int RegIndex;
+ int OpIndex;
+
+ if (MI->getOpcode() == PTX::CALL) {
+ RegIndex = 0;
+ OpIndex = 1;
+ } else {
+ RegIndex = MI->getNumOperands()-2;
+ OpIndex = MI->getNumOperands()-1;
+ }
+
+ int PredOp = MI->getOperand(OpIndex).getImm();
+ if (PredOp == PTXPredicate::None)
+ return;
+
+ if (PredOp == PTXPredicate::Negate)
+ O << '!';
+ else
+ O << '@';
+
+ printOperand(MI, RegIndex, O);
+}
+
+void PTXInstPrinter::printCall(const MCInst *MI, raw_ostream &O) {
+ O << "\tcall.uni\t";
+ // The first two operands are the predicate slot
+ unsigned Index = 2;
+ unsigned NumRets = MI->getOperand(Index++).getImm();
+
+ if (NumRets > 0) {
+ O << "(";
+ printOperand(MI, Index++, O);
+ for (unsigned i = 1; i < NumRets; ++i) {
+ O << ", ";
+ printOperand(MI, Index++, O);
+ }
+ O << "), ";
+ }
+
+ O << *(MI->getOperand(Index++).getExpr()) << ", (";
+
+ unsigned NumArgs = MI->getOperand(Index++).getImm();
+ if (NumArgs > 0) {
+ printOperand(MI, Index++, O);
+ for (unsigned i = 1; i < NumArgs; ++i) {
+ O << ", ";
+ printOperand(MI, Index++, O);
+ }
+ }
+ O << ")";
+}
+
+void PTXInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isImm()) {
+ O << Op.getImm();
+ } else if (Op.isFPImm()) {
+ double Imm = Op.getFPImm();
+ APFloat FPImm(Imm);
+ APInt FPIntImm = FPImm.bitcastToAPInt();
+ O << "0D";
+ // PTX requires us to output the full 64 bits, even if the number is zero
+ if (FPIntImm.getZExtValue() > 0) {
+ O << FPIntImm.toString(16, false);
+ } else {
+ O << "0000000000000000";
+ }
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ const MCExpr *Expr = Op.getExpr();
+ if (const MCSymbolRefExpr *SymRefExpr = dyn_cast<MCSymbolRefExpr>(Expr)) {
+ const MCSymbol &Sym = SymRefExpr->getSymbol();
+ O << Sym.getName();
+ } else {
+ O << *Op.getExpr();
+ }
+ }
+}
+
+void PTXInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ // By definition, operand OpNo+1 is an i32imm
+ const MCOperand &Op2 = MI->getOperand(OpNo+1);
+ printOperand(MI, OpNo, O);
+ if (Op2.getImm() == 0)
+ return; // don't print "+0"
+ O << "+" << Op2.getImm();
+}
+
+void PTXInstPrinter::printRoundingMode(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ assert (Op.isImm() && "Rounding modes must be immediate values");
+ switch (Op.getImm()) {
+ default:
+ llvm_unreachable("Unknown rounding mode!");
+ case PTXRoundingMode::RndDefault:
+ llvm_unreachable("FP rounding-mode pass did not handle instruction!");
+ break;
+ case PTXRoundingMode::RndNone:
+ // Do not print anything.
+ break;
+ case PTXRoundingMode::RndNearestEven:
+ O << ".rn";
+ break;
+ case PTXRoundingMode::RndTowardsZero:
+ O << ".rz";
+ break;
+ case PTXRoundingMode::RndNegInf:
+ O << ".rm";
+ break;
+ case PTXRoundingMode::RndPosInf:
+ O << ".rp";
+ break;
+ case PTXRoundingMode::RndApprox:
+ O << ".approx";
+ break;
+ case PTXRoundingMode::RndNearestEvenInt:
+ O << ".rni";
+ break;
+ case PTXRoundingMode::RndTowardsZeroInt:
+ O << ".rzi";
+ break;
+ case PTXRoundingMode::RndNegInfInt:
+ O << ".rmi";
+ break;
+ case PTXRoundingMode::RndPosInfInt:
+ O << ".rpi";
+ break;
+ }
+}
+
diff --git a/lib/Target/PTX/InstPrinter/PTXInstPrinter.h b/lib/Target/PTX/InstPrinter/PTXInstPrinter.h
new file mode 100644
index 0000000..86dfd48
--- /dev/null
+++ b/lib/Target/PTX/InstPrinter/PTXInstPrinter.h
@@ -0,0 +1,47 @@
+//===-- PTXInstPrinter.h - Convert PTX MCInst to assembly syntax ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints n PTX MCInst to a .ptx file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTXINSTPRINTER_H
+#define PTXINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class PTXInstPrinter : public MCInstPrinter {
+public:
+ PTXInstPrinter(const MCAsmInfo &MAI, const MCSubtargetInfo &STI);
+
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+ virtual StringRef getOpcodeName(unsigned Opcode) const;
+ virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+
+ static const char *getInstructionName(unsigned Opcode);
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+ void printPredicate(const MCInst *MI, raw_ostream &O);
+ void printCall(const MCInst *MI, raw_ostream &O);
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printRoundingMode(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+};
+}
+
+#endif
+
diff --git a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
index df0f63f..811ef4b 100644
--- a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
@@ -2,3 +2,12 @@ add_llvm_library(LLVMPTXDesc
PTXMCTargetDesc.cpp
PTXMCAsmInfo.cpp
)
+
+add_llvm_library_dependencies(LLVMPTXDesc
+ LLVMMC
+ LLVMPTXInfo
+ LLVMPTXAsmPrinter
+ LLVMSupport
+ )
+
+add_dependencies(LLVMPTXDesc PTXCommonTableGen)
diff --git a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h b/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
new file mode 100644
index 0000000..c6094be
--- /dev/null
+++ b/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
@@ -0,0 +1,63 @@
+//===-- PTXBaseInfo.h - Top level definitions for PTX -------- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the PTX target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTXBASEINFO_H
+#define PTXBASEINFO_H
+
+#include "PTXMCTargetDesc.h"
+
+namespace llvm {
+ namespace PTXStateSpace {
+ enum {
+ Global = 0, // default to global state space
+ Constant = 1,
+ Local = 2,
+ Parameter = 3,
+ Shared = 4
+ };
+ } // namespace PTXStateSpace
+
+ namespace PTXPredicate {
+ enum {
+ Normal = 0,
+ Negate = 1,
+ None = 2
+ };
+ } // namespace PTXPredicate
+
+ /// Namespace to hold all target-specific flags.
+ namespace PTXRoundingMode {
+ // Instruction Flags
+ enum {
+ // Rounding Mode Flags
+ RndMask = 15,
+ RndDefault = 0, // ---
+ RndNone = 1, // <NONE>
+ RndNearestEven = 2, // .rn
+ RndTowardsZero = 3, // .rz
+ RndNegInf = 4, // .rm
+ RndPosInf = 5, // .rp
+ RndApprox = 6, // .approx
+ RndNearestEvenInt = 7, // .rni
+ RndTowardsZeroInt = 8, // .rzi
+ RndNegInfInt = 9, // .rmi
+ RndPosInfInt = 10 // .rpi
+ };
+ } // namespace PTXII
+} // namespace llvm
+
+#endif
+
diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
index 23f70bd..a5af3b8 100644
--- a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
+++ b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
@@ -13,10 +13,12 @@
#include "PTXMCTargetDesc.h"
#include "PTXMCAsmInfo.h"
+#include "InstPrinter/PTXInstPrinter.h"
+#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "PTXGenInstrInfo.inc"
@@ -35,9 +37,11 @@ static MCInstrInfo *createPTXMCInstrInfo() {
return X;
}
-extern "C" void LLVMInitializePTXMCInstrInfo() {
- TargetRegistry::RegisterMCInstrInfo(ThePTX32Target, createPTXMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(ThePTX64Target, createPTXMCInstrInfo);
+static MCRegisterInfo *createPTXMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ // PTX does not have a return address register.
+ InitPTXMCRegisterInfo(X, 0);
+ return X;
}
static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
@@ -47,14 +51,45 @@ static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
return X;
}
-extern "C" void LLVMInitializePTXMCSubtargetInfo() {
+static MCCodeGenInfo *createPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
+}
+
+static MCInstPrinter *createPTXMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCSubtargetInfo &STI) {
+ assert(SyntaxVariant == 0 && "We only have one syntax variant");
+ return new PTXInstPrinter(MAI, STI);
+}
+
+extern "C" void LLVMInitializePTXTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfo<PTXMCAsmInfo> X(ThePTX32Target);
+ RegisterMCAsmInfo<PTXMCAsmInfo> Y(ThePTX64Target);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(ThePTX32Target, createPTXMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(ThePTX64Target, createPTXMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(ThePTX32Target, createPTXMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(ThePTX64Target, createPTXMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(ThePTX32Target, createPTXMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(ThePTX64Target, createPTXMCRegisterInfo);
+
+ // Register the MC subtarget info.
TargetRegistry::RegisterMCSubtargetInfo(ThePTX32Target,
createPTXMCSubtargetInfo);
TargetRegistry::RegisterMCSubtargetInfo(ThePTX64Target,
createPTXMCSubtargetInfo);
-}
-extern "C" void LLVMInitializePTXMCAsmInfo() {
- RegisterMCAsmInfo<PTXMCAsmInfo> X(ThePTX32Target);
- RegisterMCAsmInfo<PTXMCAsmInfo> Y(ThePTX64Target);
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(ThePTX32Target, createPTXMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(ThePTX64Target, createPTXMCInstPrinter);
}
diff --git a/lib/Target/PTX/Makefile b/lib/Target/PTX/Makefile
index 93dd38a..fa09634 100644
--- a/lib/Target/PTX/Makefile
+++ b/lib/Target/PTX/Makefile
@@ -13,12 +13,11 @@ TARGET = PTX
# Make sure that tblgen is run, first thing.
BUILT_SOURCES = PTXGenAsmWriter.inc \
- PTXGenCallingConv.inc \
PTXGenDAGISel.inc \
PTXGenInstrInfo.inc \
PTXGenRegisterInfo.inc \
PTXGenSubtargetInfo.inc
-DIRS = TargetInfo MCTargetDesc
+DIRS = InstPrinter TargetInfo MCTargetDesc
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PTX/PTX.h b/lib/Target/PTX/PTX.h
index 28cab24..7d46cce 100644
--- a/lib/Target/PTX/PTX.h
+++ b/lib/Target/PTX/PTX.h
@@ -15,34 +15,30 @@
#ifndef PTX_H
#define PTX_H
-#include "MCTargetDesc/PTXMCTargetDesc.h"
+#include "MCTargetDesc/PTXBaseInfo.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
+ class MachineInstr;
+ class MCInst;
+ class PTXAsmPrinter;
class PTXTargetMachine;
class FunctionPass;
- namespace PTX {
- enum StateSpace {
- GLOBAL = 0, // default to global state space
- CONSTANT = 1,
- LOCAL = 2,
- PARAMETER = 3,
- SHARED = 4
- };
-
- enum Predicate {
- PRED_NORMAL = 0,
- PRED_NEGATE = 1
- };
- } // namespace PTX
-
FunctionPass *createPTXISelDag(PTXTargetMachine &TM,
CodeGenOpt::Level OptLevel);
FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM,
CodeGenOpt::Level OptLevel);
+ FunctionPass *createPTXFPRoundingModePass(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+ FunctionPass *createPTXRegisterAllocator();
+
+ void LowerPTXMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ PTXAsmPrinter &AP);
+
} // namespace llvm;
#endif // PTX_H
diff --git a/lib/Target/PTX/PTX.td b/lib/Target/PTX/PTX.td
index f6fbe9f..693bb9c 100644
--- a/lib/Target/PTX/PTX.td
+++ b/lib/Target/PTX/PTX.td
@@ -52,13 +52,13 @@ def FeatureSM12 : SubtargetFeature<"sm12", "PTXTarget", "PTX_SM_1_2",
def FeatureSM13 : SubtargetFeature<"sm13", "PTXTarget", "PTX_SM_1_3",
"Use Shader Model 1.3">;
def FeatureSM20 : SubtargetFeature<"sm20", "PTXTarget", "PTX_SM_2_0",
- "Use Shader Model 2.0">;
+ "Use Shader Model 2.0", [FeatureDouble]>;
def FeatureSM21 : SubtargetFeature<"sm21", "PTXTarget", "PTX_SM_2_1",
- "Use Shader Model 2.1">;
+ "Use Shader Model 2.1", [FeatureDouble]>;
def FeatureSM22 : SubtargetFeature<"sm22", "PTXTarget", "PTX_SM_2_2",
- "Use Shader Model 2.2">;
+ "Use Shader Model 2.2", [FeatureDouble]>;
def FeatureSM23 : SubtargetFeature<"sm23", "PTXTarget", "PTX_SM_2_3",
- "Use Shader Model 2.3">;
+ "Use Shader Model 2.3", [FeatureDouble]>;
def FeatureCOMPUTE10 : SubtargetFeature<"compute10", "PTXTarget",
"PTX_COMPUTE_1_0",
@@ -74,7 +74,8 @@ def FeatureCOMPUTE13 : SubtargetFeature<"compute13", "PTXTarget",
"Use Compute Compatibility 1.3">;
def FeatureCOMPUTE20 : SubtargetFeature<"compute20", "PTXTarget",
"PTX_COMPUTE_2_0",
- "Use Compute Compatibility 2.0">;
+ "Use Compute Compatibility 2.0",
+ [FeatureDouble]>;
//===----------------------------------------------------------------------===//
// PTX supported processors
@@ -113,12 +114,6 @@ def : Proc<"fermi", [FeatureSM20, FeatureDouble]>;
include "PTXRegisterInfo.td"
//===----------------------------------------------------------------------===//
-// Calling Conventions
-//===----------------------------------------------------------------------===//
-
-include "PTXCallingConv.td"
-
-//===----------------------------------------------------------------------===//
// Instruction Descriptions
//===----------------------------------------------------------------------===//
@@ -127,9 +122,20 @@ include "PTXInstrInfo.td"
def PTXInstrInfo : InstrInfo;
//===----------------------------------------------------------------------===//
+// Assembly printer
+//===----------------------------------------------------------------------===//
+// PTX uses the MC printer for asm output, so make sure the TableGen
+// AsmWriter bits get associated with the correct class.
+def PTXAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
// Target Declaration
//===----------------------------------------------------------------------===//
def PTX : Target {
let InstructionSet = PTXInstrInfo;
+ let AssemblyWriters = [PTXAsmWriter];
}
diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp
index 2848d54..733744b 100644
--- a/lib/Target/PTX/PTXAsmPrinter.cpp
+++ b/lib/Target/PTX/PTXAsmPrinter.cpp
@@ -15,9 +15,14 @@
#define DEBUG_TYPE "ptx-asm-printer"
#include "PTX.h"
+#include "PTXAsmPrinter.h"
#include "PTXMachineFunctionInfo.h"
+#include "PTXParamManager.h"
+#include "PTXRegisterInfo.h"
#include "PTXTargetMachine.h"
+#include "llvm/Argument.h"
#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
#include "llvm/Module.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
@@ -28,69 +33,32 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-namespace {
-class PTXAsmPrinter : public AsmPrinter {
-public:
- explicit PTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer) {}
-
- const char *getPassName() const { return "PTX Assembly Printer"; }
-
- bool doFinalization(Module &M);
-
- virtual void EmitStartOfAsmFile(Module &M);
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- virtual void EmitFunctionBodyStart();
- virtual void EmitFunctionBodyEnd() { OutStreamer.EmitRawText(Twine("}")); }
-
- virtual void EmitInstruction(const MachineInstr *MI);
-
- void printOperand(const MachineInstr *MI, int opNum, raw_ostream &OS);
- void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
- const char *Modifier = 0);
- void printParamOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
- const char *Modifier = 0);
- void printReturnOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
- const char *Modifier = 0);
- void printPredicateOperand(const MachineInstr *MI, raw_ostream &O);
-
- unsigned GetOrCreateSourceID(StringRef FileName,
- StringRef DirName);
-
- // autogen'd.
- void printInstruction(const MachineInstr *MI, raw_ostream &OS);
- static const char *getRegisterName(unsigned RegNo);
-
-private:
- void EmitVariableDeclaration(const GlobalVariable *gv);
- void EmitFunctionDeclaration();
-
- StringMap<unsigned> SourceIdMap;
-}; // class PTXAsmPrinter
-} // namespace
-
static const char PARAM_PREFIX[] = "__param_";
static const char RETURN_PREFIX[] = "__ret_";
-static const char *getRegisterTypeName(unsigned RegNo) {
-#define TEST_REGCLS(cls, clsstr) \
- if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr;
+static const char *getRegisterTypeName(unsigned RegNo,
+ const MachineRegisterInfo& MRI) {
+ const TargetRegisterClass *TRC = MRI.getRegClass(RegNo);
+
+#define TEST_REGCLS(cls, clsstr) \
+ if (PTX::cls ## RegisterClass == TRC) return # clsstr;
+
TEST_REGCLS(RegPred, pred);
TEST_REGCLS(RegI16, b16);
TEST_REGCLS(RegI32, b32);
@@ -106,16 +74,16 @@ static const char *getRegisterTypeName(unsigned RegNo) {
static const char *getStateSpaceName(unsigned addressSpace) {
switch (addressSpace) {
default: llvm_unreachable("Unknown state space");
- case PTX::GLOBAL: return "global";
- case PTX::CONSTANT: return "const";
- case PTX::LOCAL: return "local";
- case PTX::PARAMETER: return "param";
- case PTX::SHARED: return "shared";
+ case PTXStateSpace::Global: return "global";
+ case PTXStateSpace::Constant: return "const";
+ case PTXStateSpace::Local: return "local";
+ case PTXStateSpace::Parameter: return "param";
+ case PTXStateSpace::Shared: return "shared";
}
return NULL;
}
-static const char *getTypeName(const Type* type) {
+static const char *getTypeName(Type* type) {
while (true) {
switch (type->getTypeID()) {
default: llvm_unreachable("Unknown type");
@@ -130,7 +98,7 @@ static const char *getTypeName(const Type* type) {
}
case Type::ArrayTyID:
case Type::PointerTyID:
- type = dyn_cast<const SequentialType>(type)->getElementType();
+ type = dyn_cast<SequentialType>(type)->getElementType();
break;
}
}
@@ -170,6 +138,7 @@ void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
{
const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
+ // Emit the PTX .version and .target attributes
OutStreamer.EmitRawText(Twine("\t.version " + ST.getPTXVersionString()));
OutStreamer.EmitRawText(Twine("\t.target " + ST.getTargetString() +
(ST.supportsDouble() ? ""
@@ -203,177 +172,118 @@ void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
EmitVariableDeclaration(i);
}
-bool PTXAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
- SetupMachineFunction(MF);
- EmitFunctionDeclaration();
- EmitFunctionBody();
- return false;
-}
-
void PTXAsmPrinter::EmitFunctionBodyStart() {
OutStreamer.EmitRawText(Twine("{"));
const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+ const PTXParamManager &PM = MFI->getParamManager();
+
+ // Print register definitions
+ std::string regDefs;
+ unsigned numRegs;
+
+ // pred
+ numRegs = MFI->getNumRegistersForClass(PTX::RegPredRegisterClass);
+ if(numRegs > 0) {
+ regDefs += "\t.reg .pred %p<";
+ regDefs += utostr(numRegs);
+ regDefs += ">;\n";
+ }
+
+ // i16
+ numRegs = MFI->getNumRegistersForClass(PTX::RegI16RegisterClass);
+ if(numRegs > 0) {
+ regDefs += "\t.reg .b16 %rh<";
+ regDefs += utostr(numRegs);
+ regDefs += ">;\n";
+ }
+
+ // i32
+ numRegs = MFI->getNumRegistersForClass(PTX::RegI32RegisterClass);
+ if(numRegs > 0) {
+ regDefs += "\t.reg .b32 %r<";
+ regDefs += utostr(numRegs);
+ regDefs += ">;\n";
+ }
+
+ // i64
+ numRegs = MFI->getNumRegistersForClass(PTX::RegI64RegisterClass);
+ if(numRegs > 0) {
+ regDefs += "\t.reg .b64 %rd<";
+ regDefs += utostr(numRegs);
+ regDefs += ">;\n";
+ }
+
+ // f32
+ numRegs = MFI->getNumRegistersForClass(PTX::RegF32RegisterClass);
+ if(numRegs > 0) {
+ regDefs += "\t.reg .f32 %f<";
+ regDefs += utostr(numRegs);
+ regDefs += ">;\n";
+ }
+
+ // f64
+ numRegs = MFI->getNumRegistersForClass(PTX::RegF64RegisterClass);
+ if(numRegs > 0) {
+ regDefs += "\t.reg .f64 %fd<";
+ regDefs += utostr(numRegs);
+ regDefs += ">;\n";
+ }
- // Print local variable definition
- for (PTXMachineFunctionInfo::reg_iterator
- i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd(); i != e; ++ i) {
- unsigned reg = *i;
-
- std::string def = "\t.reg .";
- def += getRegisterTypeName(reg);
- def += ' ';
- def += getRegisterName(reg);
- def += ';';
- OutStreamer.EmitRawText(Twine(def));
+ // Local params
+ for (PTXParamManager::param_iterator i = PM.local_begin(), e = PM.local_end();
+ i != e; ++i) {
+ regDefs += "\t.param .b";
+ regDefs += utostr(PM.getParamSize(*i));
+ regDefs += " ";
+ regDefs += PM.getParamName(*i);
+ regDefs += ";\n";
}
+ OutStreamer.EmitRawText(Twine(regDefs));
+
+
const MachineFrameInfo* FrameInfo = MF->getFrameInfo();
DEBUG(dbgs() << "Have " << FrameInfo->getNumObjects()
<< " frame object(s)\n");
for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) {
DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n");
if (FrameInfo->getObjectSize(i) > 0) {
- std::string def = "\t.reg .b";
- def += utostr(FrameInfo->getObjectSize(i)*8); // Convert to bits
- def += " s";
+ std::string def = "\t.local .align ";
+ def += utostr(FrameInfo->getObjectAlignment(i));
+ def += " .b8";
+ def += " __local";
def += utostr(i);
+ def += "[";
+ def += utostr(FrameInfo->getObjectSize(i)); // Convert to bits
+ def += "]";
def += ";";
OutStreamer.EmitRawText(Twine(def));
}
}
-}
-
-void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
- std::string str;
- str.reserve(64);
-
- raw_string_ostream OS(str);
-
- DebugLoc DL = MI->getDebugLoc();
- if (!DL.isUnknown()) {
-
- const MDNode *S = DL.getScope(MF->getFunction()->getContext());
-
- // This is taken from DwarfDebug.cpp, which is conveniently not a public
- // LLVM class.
- StringRef Fn;
- StringRef Dir;
- unsigned Src = 1;
- if (S) {
- DIDescriptor Scope(S);
- if (Scope.isCompileUnit()) {
- DICompileUnit CU(S);
- Fn = CU.getFilename();
- Dir = CU.getDirectory();
- } else if (Scope.isFile()) {
- DIFile F(S);
- Fn = F.getFilename();
- Dir = F.getDirectory();
- } else if (Scope.isSubprogram()) {
- DISubprogram SP(S);
- Fn = SP.getFilename();
- Dir = SP.getDirectory();
- } else if (Scope.isLexicalBlock()) {
- DILexicalBlock DB(S);
- Fn = DB.getFilename();
- Dir = DB.getDirectory();
- } else
- assert(0 && "Unexpected scope info");
-
- Src = GetOrCreateSourceID(Fn, Dir);
- }
- OutStreamer.EmitDwarfLocDirective(Src, DL.getLine(), DL.getCol(),
- 0, 0, 0, Fn);
-
- const MCDwarfLoc& MDL = OutContext.getCurrentDwarfLoc();
-
- OS << "\t.loc ";
- OS << utostr(MDL.getFileNum());
- OS << " ";
- OS << utostr(MDL.getLine());
- OS << " ";
- OS << utostr(MDL.getColumn());
- OS << "\n";
- }
-
-
- // Emit predicate
- printPredicateOperand(MI, OS);
-
- // Write instruction to str
- printInstruction(MI, OS);
- OS << ';';
- OS.flush();
-
- StringRef strref = StringRef(str);
- OutStreamer.EmitRawText(strref);
-}
-
-void PTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
- raw_ostream &OS) {
- const MachineOperand &MO = MI->getOperand(opNum);
-
- switch (MO.getType()) {
- default:
- llvm_unreachable("<unknown operand type>");
- break;
- case MachineOperand::MO_GlobalAddress:
- OS << *Mang->getSymbol(MO.getGlobal());
- break;
- case MachineOperand::MO_Immediate:
- OS << (long) MO.getImm();
- break;
- case MachineOperand::MO_MachineBasicBlock:
- OS << *MO.getMBB()->getSymbol();
- break;
- case MachineOperand::MO_Register:
- OS << getRegisterName(MO.getReg());
- break;
- case MachineOperand::MO_FPImmediate:
- APInt constFP = MO.getFPImm()->getValueAPF().bitcastToAPInt();
- bool isFloat = MO.getFPImm()->getType()->getTypeID() == Type::FloatTyID;
- // Emit 0F for 32-bit floats and 0D for 64-bit doubles.
- if (isFloat) {
- OS << "0F";
- }
- else {
- OS << "0D";
- }
- // Emit the encoded floating-point value.
- if (constFP.getZExtValue() > 0) {
- OS << constFP.toString(16, false);
- }
- else {
- OS << "00000000";
- // If We have a double-precision zero, pad to 8-bytes.
- if (!isFloat) {
- OS << "00000000";
- }
- }
- break;
- }
-}
-
-void PTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
- raw_ostream &OS, const char *Modifier) {
- printOperand(MI, opNum, OS);
- if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
- return; // don't print "+0"
-
- OS << "+";
- printOperand(MI, opNum+1, OS);
+ //unsigned Index = 1;
+ // Print parameter passing params
+ //for (PTXMachineFunctionInfo::param_iterator
+ // i = MFI->paramBegin(), e = MFI->paramEnd(); i != e; ++i) {
+ // std::string def = "\t.param .b";
+ // def += utostr(*i);
+ // def += " __ret_";
+ // def += utostr(Index);
+ // Index++;
+ // def += ";";
+ // OutStreamer.EmitRawText(Twine(def));
+ //}
}
-void PTXAsmPrinter::printParamOperand(const MachineInstr *MI, int opNum,
- raw_ostream &OS, const char *Modifier) {
- OS << PARAM_PREFIX << (int) MI->getOperand(opNum).getImm() + 1;
+void PTXAsmPrinter::EmitFunctionBodyEnd() {
+ OutStreamer.EmitRawText(Twine("}"));
}
-void PTXAsmPrinter::printReturnOperand(const MachineInstr *MI, int opNum,
- raw_ostream &OS, const char *Modifier) {
- OS << RETURN_PREFIX << (int) MI->getOperand(opNum).getImm() + 1;
+void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ MCInst TmpInst;
+ LowerPTXMachineInstrToMCInst(MI, TmpInst, *this);
+ OutStreamer.EmitInstruction(TmpInst);
}
void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
@@ -400,14 +310,14 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
unsigned alignment = gv->getAlignment();
if (alignment != 0) {
decl += ".align ";
- decl += utostr(Log2_32(gv->getAlignment()));
+ decl += utostr(gv->getAlignment());
decl += " ";
}
if (PointerType::classof(gv->getType())) {
- const PointerType* pointerTy = dyn_cast<const PointerType>(gv->getType());
- const Type* elementTy = pointerTy->getElementType();
+ PointerType* pointerTy = dyn_cast<PointerType>(gv->getType());
+ Type* elementTy = pointerTy->getElementType();
decl += ".b8 ";
decl += gvsym->getName();
@@ -417,14 +327,14 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
{
assert(elementTy->isArrayTy() && "Only pointers to arrays are supported");
- const ArrayType* arrayTy = dyn_cast<const ArrayType>(elementTy);
+ ArrayType* arrayTy = dyn_cast<ArrayType>(elementTy);
elementTy = arrayTy->getElementType();
unsigned numElements = arrayTy->getNumElements();
while (elementTy->isArrayTy()) {
- arrayTy = dyn_cast<const ArrayType>(elementTy);
+ arrayTy = dyn_cast<ArrayType>(elementTy);
elementTy = arrayTy->getElementType();
numElements *= arrayTy->getNumElements();
@@ -447,7 +357,7 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
if (gv->hasInitializer())
{
- const Constant *C = gv->getInitializer();
+ const Constant *C = gv->getInitializer();
if (const ConstantArray *CA = dyn_cast<ConstantArray>(C))
{
decl += " = {";
@@ -484,7 +394,7 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
OutStreamer.AddBlankLine();
}
-void PTXAsmPrinter::EmitFunctionDeclaration() {
+void PTXAsmPrinter::EmitFunctionEntryLabel() {
// The function label could have already been emitted if two symbols end up
// conflicting due to asm renaming. Detect this and emit an error.
if (!CurrentFnSym->isUndefined()) {
@@ -494,25 +404,39 @@ void PTXAsmPrinter::EmitFunctionDeclaration() {
}
const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+ const PTXParamManager &PM = MFI->getParamManager();
const bool isKernel = MFI->isKernel();
const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
+ const MachineRegisterInfo& MRI = MF->getRegInfo();
std::string decl = isKernel ? ".entry" : ".func";
- unsigned cnt = 0;
-
if (!isKernel) {
decl += " (";
- for (PTXMachineFunctionInfo::ret_iterator
- i = MFI->retRegBegin(), e = MFI->retRegEnd(), b = i;
- i != e; ++i) {
- if (i != b) {
- decl += ", ";
+ if (ST.useParamSpaceForDeviceArgs()) {
+ for (PTXParamManager::param_iterator i = PM.ret_begin(), e = PM.ret_end(),
+ b = i; i != e; ++i) {
+ if (i != b) {
+ decl += ", ";
+ }
+
+ decl += ".param .b";
+ decl += utostr(PM.getParamSize(*i));
+ decl += " ";
+ decl += PM.getParamName(*i);
+ }
+ } else {
+ for (PTXMachineFunctionInfo::reg_iterator
+ i = MFI->retreg_begin(), e = MFI->retreg_end(), b = i;
+ i != e; ++i) {
+ if (i != b) {
+ decl += ", ";
+ }
+ decl += ".reg .";
+ decl += getRegisterTypeName(*i, MRI);
+ decl += " ";
+ decl += MFI->getRegisterName(*i);
}
- decl += ".reg .";
- decl += getRegisterTypeName(*i);
- decl += " ";
- decl += getRegisterName(*i);
}
decl += ")";
}
@@ -523,26 +447,65 @@ void PTXAsmPrinter::EmitFunctionDeclaration() {
decl += " (";
- cnt = 0;
+ const Function *F = MF->getFunction();
// Print parameters
- for (PTXMachineFunctionInfo::reg_iterator
- i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i;
- i != e; ++i) {
- if (i != b) {
- decl += ", ";
- }
- if (isKernel || ST.useParamSpaceForDeviceArgs()) {
+ if (isKernel || ST.useParamSpaceForDeviceArgs()) {
+ /*for (PTXParamManager::param_iterator i = PM.arg_begin(), e = PM.arg_end(),
+ b = i; i != e; ++i) {
+ if (i != b) {
+ decl += ", ";
+ }
+
decl += ".param .b";
- decl += utostr(*i);
+ decl += utostr(PM.getParamSize(*i));
decl += " ";
- decl += PARAM_PREFIX;
- decl += utostr(++cnt);
- } else {
+ decl += PM.getParamName(*i);
+ }*/
+ int Counter = 1;
+ for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end(),
+ b = i; i != e; ++i) {
+ if (i != b)
+ decl += ", ";
+ const Type *ArgType = (*i).getType();
+ decl += ".param .b";
+ if (ArgType->isPointerTy()) {
+ if (ST.is64Bit())
+ decl += "64";
+ else
+ decl += "32";
+ } else {
+ decl += utostr(ArgType->getPrimitiveSizeInBits());
+ }
+ if (ArgType->isPointerTy() && ST.emitPtrAttribute()) {
+ const PointerType *PtrType = dyn_cast<const PointerType>(ArgType);
+ decl += " .ptr";
+ switch (PtrType->getAddressSpace()) {
+ default:
+ llvm_unreachable("Unknown address space in argument");
+ case PTXStateSpace::Global:
+ decl += " .global";
+ break;
+ case PTXStateSpace::Shared:
+ decl += " .shared";
+ break;
+ }
+ }
+ decl += " __param_";
+ decl += utostr(Counter++);
+ }
+ } else {
+ for (PTXMachineFunctionInfo::reg_iterator
+ i = MFI->argreg_begin(), e = MFI->argreg_end(), b = i;
+ i != e; ++i) {
+ if (i != b) {
+ decl += ", ";
+ }
+
decl += ".reg .";
- decl += getRegisterTypeName(*i);
+ decl += getRegisterTypeName(*i, MRI);
decl += " ";
- decl += getRegisterName(*i);
+ decl += MFI->getRegisterName(*i);
}
}
decl += ")";
@@ -550,25 +513,6 @@ void PTXAsmPrinter::EmitFunctionDeclaration() {
OutStreamer.EmitRawText(Twine(decl));
}
-void PTXAsmPrinter::
-printPredicateOperand(const MachineInstr *MI, raw_ostream &O) {
- int i = MI->findFirstPredOperandIdx();
- if (i == -1)
- llvm_unreachable("missing predicate operand");
-
- unsigned reg = MI->getOperand(i).getReg();
- int predOp = MI->getOperand(i+1).getImm();
-
- DEBUG(dbgs() << "predicate: (" << reg << ", " << predOp << ")\n");
-
- if (reg != PTX::NoRegister) {
- O << '@';
- if (predOp == PTX::PRED_NEGATE)
- O << '!';
- O << getRegisterName(reg);
- }
-}
-
unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName,
StringRef DirName) {
// If FE did not provide a file name, then assume stdin.
@@ -596,10 +540,58 @@ unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName,
return SrcId;
}
-#include "PTXGenAsmWriter.inc"
+MCOperand PTXAsmPrinter::GetSymbolRef(const MachineOperand &MO,
+ const MCSymbol *Symbol) {
+ const MCExpr *Expr;
+ Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, OutContext);
+ return MCOperand::CreateExpr(Expr);
+}
+
+MCOperand PTXAsmPrinter::lowerOperand(const MachineOperand &MO) {
+ MCOperand MCOp;
+ const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+ const MCExpr *Expr;
+ const char *RegSymbolName;
+ switch (MO.getType()) {
+ default:
+ llvm_unreachable("Unknown operand type");
+ case MachineOperand::MO_Register:
+ // We create register operands as symbols, since the PTXInstPrinter class
+ // has no way to map virtual registers back to a name without some ugly
+ // hacks.
+ // FIXME: Figure out a better way to handle virtual register naming.
+ RegSymbolName = MFI->getRegisterName(MO.getReg());
+ Expr = MCSymbolRefExpr::Create(RegSymbolName, MCSymbolRefExpr::VK_None,
+ OutContext);
+ MCOp = MCOperand::CreateExpr(Expr);
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MO.getMBB()->getSymbol(), OutContext));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MCOp = GetSymbolRef(MO, Mang->getSymbol(MO.getGlobal()));
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName()));
+ break;
+ case MachineOperand::MO_FPImmediate:
+ APFloat Val = MO.getFPImm()->getValueAPF();
+ bool ignored;
+ Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
+ MCOp = MCOperand::CreateFPImm(Val.convertToDouble());
+ break;
+ }
+
+ return MCOp;
+}
// Force static initialization.
extern "C" void LLVMInitializePTXAsmPrinter() {
RegisterAsmPrinter<PTXAsmPrinter> X(ThePTX32Target);
RegisterAsmPrinter<PTXAsmPrinter> Y(ThePTX64Target);
}
+
diff --git a/lib/Target/PTX/PTXAsmPrinter.h b/lib/Target/PTX/PTXAsmPrinter.h
new file mode 100644
index 0000000..538c080
--- /dev/null
+++ b/lib/Target/PTX/PTXAsmPrinter.h
@@ -0,0 +1,57 @@
+//===-- PTXAsmPrinter.h - Print machine code to a PTX file ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// PTX Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTXASMPRINTER_H
+#define PTXASMPRINTER_H
+
+#include "PTX.h"
+#include "PTXTargetMachine.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class LLVM_LIBRARY_VISIBILITY PTXAsmPrinter : public AsmPrinter {
+public:
+ explicit PTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {}
+
+ const char *getPassName() const { return "PTX Assembly Printer"; }
+
+ bool doFinalization(Module &M);
+
+ virtual void EmitStartOfAsmFile(Module &M);
+ virtual void EmitFunctionBodyStart();
+ virtual void EmitFunctionBodyEnd();
+ virtual void EmitFunctionEntryLabel();
+ virtual void EmitInstruction(const MachineInstr *MI);
+
+ unsigned GetOrCreateSourceID(StringRef FileName,
+ StringRef DirName);
+
+ MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
+ MCOperand lowerOperand(const MachineOperand &MO);
+
+private:
+ void EmitVariableDeclaration(const GlobalVariable *gv);
+ void EmitFunctionDeclaration();
+
+ StringMap<unsigned> SourceIdMap;
+}; // class PTXAsmPrinter
+} // namespace llvm
+
+#endif
+
diff --git a/lib/Target/PTX/PTXCallingConv.td b/lib/Target/PTX/PTXCallingConv.td
deleted file mode 100644
index 3e3ff48..0000000
--- a/lib/Target/PTX/PTXCallingConv.td
+++ /dev/null
@@ -1,29 +0,0 @@
-
-//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This describes the calling conventions for the PTX architecture.
-//
-//===----------------------------------------------------------------------===//
-
-// PTX Formal Parameter Calling Convention
-def CC_PTX : CallingConv<[
- CCIfType<[i1], CCAssignToReg<[P12, P13, P14, P15, P16, P17, P18, P19, P20, P21, P22, P23, P24, P25, P26, P27, P28, P29, P30, P31, P32, P33, P34, P35, P36, P37, P38, P39, P40, P41, P42, P43, P44, P45, P46, P47, P48, P49, P50, P51, P52, P53, P54, P55, P56, P57, P58, P59, P60, P61, P62, P63, P64, P65, P66, P67, P68, P69, P70, P71, P72, P73, P74, P75, P76, P77, P78, P79, P80, P81, P82, P83, P84, P85, P86, P87, P88, P89, P90, P91, P92, P93, P94, P95, P96, P97, P98, P99, P100, P101, P102, P103, P104, P105, P106, P107, P108, P109, P110, P111, P112, P113, P114, P115, P116, P117, P118, P119, P120, P121, P122, P123, P124, P125, P126, P127]>>,
- CCIfType<[i16], CCAssignToReg<[RH12, RH13, RH14, RH15, RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23, RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31, RH32, RH33, RH34, RH35, RH36, RH37, RH38, RH39, RH40, RH41, RH42, RH43, RH44, RH45, RH46, RH47, RH48, RH49, RH50, RH51, RH52, RH53, RH54, RH55, RH56, RH57, RH58, RH59, RH60, RH61, RH62, RH63, RH64, RH65, RH66, RH67, RH68, RH69, RH70, RH71, RH72, RH73, RH74, RH75, RH76, RH77, RH78, RH79, RH80, RH81, RH82, RH83, RH84, RH85, RH86, RH87, RH88, RH89, RH90, RH91, RH92, RH93, RH94, RH95, RH96, RH97, RH98, RH99, RH100, RH101, RH102, RH103, RH104, RH105, RH106, RH107, RH108, RH109, RH110, RH111, RH112, RH113, RH114, RH115, RH116, RH117, RH118, RH119, RH120, RH121, RH122, RH123, RH124, RH125, RH126, RH127]>>,
- CCIfType<[i32,f32], CCAssignToReg<[R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127]>>,
- CCIfType<[i64,f64], CCAssignToReg<[RD12, RD13, RD14, RD15, RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23, RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31, RD32, RD33, RD34, RD35, RD36, RD37, RD38, RD39, RD40, RD41, RD42, RD43, RD44, RD45, RD46, RD47, RD48, RD49, RD50, RD51, RD52, RD53, RD54, RD55, RD56, RD57, RD58, RD59, RD60, RD61, RD62, RD63, RD64, RD65, RD66, RD67, RD68, RD69, RD70, RD71, RD72, RD73, RD74, RD75, RD76, RD77, RD78, RD79, RD80, RD81, RD82, RD83, RD84, RD85, RD86, RD87, RD88, RD89, RD90, RD91, RD92, RD93, RD94, RD95, RD96, RD97, RD98, RD99, RD100, RD101, RD102, RD103, RD104, RD105, RD106, RD107, RD108, RD109, RD110, RD111, RD112, RD113, RD114, RD115, RD116, RD117, RD118, RD119, RD120, RD121, RD122, RD123, RD124, RD125, RD126, RD127]>>
-]>;
-
-// PTX Return Value Calling Convention
-def RetCC_PTX : CallingConv<[
- CCIfType<[i1], CCAssignToReg<[P0, P1, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11]>>,
- CCIfType<[i16], CCAssignToReg<[RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7, RH8, RH9, RH10, RH11]>>,
- CCIfType<[i32,f32], CCAssignToReg<[R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11]>>,
- CCIfType<[i64,f64], CCAssignToReg<[RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7, RD8, RD9, RD10, RD11]>>
-]>;
diff --git a/lib/Target/PTX/PTXFPRoundingModePass.cpp b/lib/Target/PTX/PTXFPRoundingModePass.cpp
new file mode 100644
index 0000000..0b653e0
--- /dev/null
+++ b/lib/Target/PTX/PTXFPRoundingModePass.cpp
@@ -0,0 +1,179 @@
+//===-- PTXFPRoundingModePass.cpp - Assign rounding modes pass ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a machine function pass that sets appropriate FP rounding
+// modes for all relevant instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-fp-rounding-mode"
+
+#include "PTX.h"
+#include "PTXTargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+// NOTE: PTXFPRoundingModePass should be executed just before emission.
+
+namespace llvm {
+ /// PTXFPRoundingModePass - Pass to assign appropriate FP rounding modes to
+ /// all FP instructions. Essentially, this pass just looks for all FP
+ /// instructions that have a rounding mode set to RndDefault, and sets an
+ /// appropriate rounding mode based on the target device.
+ ///
+ class PTXFPRoundingModePass : public MachineFunctionPass {
+ private:
+ static char ID;
+
+ typedef std::pair<unsigned, unsigned> RndModeDesc;
+
+ PTXTargetMachine& TargetMachine;
+ DenseMap<unsigned, RndModeDesc> Instrs;
+
+ public:
+ PTXFPRoundingModePass(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel)
+ : MachineFunctionPass(ID),
+ TargetMachine(TM) {
+ initializeMap();
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "PTX FP Rounding Mode Pass";
+ }
+
+ private:
+
+ void initializeMap();
+ void processInstruction(MachineInstr &MI);
+ }; // class PTXFPRoundingModePass
+} // namespace llvm
+
+using namespace llvm;
+
+char PTXFPRoundingModePass::ID = 0;
+
+bool PTXFPRoundingModePass::runOnMachineFunction(MachineFunction &MF) {
+ // Look at each basic block
+ for (MachineFunction::iterator bbi = MF.begin(), bbe = MF.end(); bbi != bbe;
+ ++bbi) {
+ MachineBasicBlock &MBB = *bbi;
+ // Look at each instruction
+ for (MachineBasicBlock::iterator ii = MBB.begin(), ie = MBB.end();
+ ii != ie; ++ii) {
+ MachineInstr &MI = *ii;
+ processInstruction(MI);
+ }
+ }
+ return false;
+}
+
+void PTXFPRoundingModePass::initializeMap() {
+ using namespace PTXRoundingMode;
+ const PTXSubtarget& ST = TargetMachine.getSubtarget<PTXSubtarget>();
+
+ // Build a map of default rounding mode for all instructions that need a
+ // rounding mode.
+ Instrs[PTX::FADDrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FADDri32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FADDrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FADDri64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSUBrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSUBri32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSUBrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSUBri64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FMULrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FMULri32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FMULrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FMULri64] = std::make_pair(1U, (unsigned)RndNearestEven);
+
+ Instrs[PTX::FNEGrr32] = std::make_pair(1U, (unsigned)RndNone);
+ Instrs[PTX::FNEGri32] = std::make_pair(1U, (unsigned)RndNone);
+ Instrs[PTX::FNEGrr64] = std::make_pair(1U, (unsigned)RndNone);
+ Instrs[PTX::FNEGri64] = std::make_pair(1U, (unsigned)RndNone);
+
+ unsigned FDivRndMode = ST.fdivNeedsRoundingMode() ? RndNearestEven : RndNone;
+ Instrs[PTX::FDIVrr32] = std::make_pair(1U, FDivRndMode);
+ Instrs[PTX::FDIVri32] = std::make_pair(1U, FDivRndMode);
+ Instrs[PTX::FDIVrr64] = std::make_pair(1U, FDivRndMode);
+ Instrs[PTX::FDIVri64] = std::make_pair(1U, FDivRndMode);
+
+ unsigned FMADRndMode = ST.fmadNeedsRoundingMode() ? RndNearestEven : RndNone;
+ Instrs[PTX::FMADrrr32] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrri32] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrii32] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrrr64] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrri64] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrii64] = std::make_pair(1U, FMADRndMode);
+
+ Instrs[PTX::FSQRTrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSQRTri32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSQRTrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSQRTri64] = std::make_pair(1U, (unsigned)RndNearestEven);
+
+ Instrs[PTX::FSINrr32] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FSINri32] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FSINrr64] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FSINri64] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FCOSrr32] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FCOSri32] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FCOSrr64] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FCOSri64] = std::make_pair(1U, (unsigned)RndApprox);
+
+ Instrs[PTX::CVTu16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+
+ Instrs[PTX::CVTf32u16] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32s16] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32u32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32s32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32u64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32s64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32f64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64u16] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64s16] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64u32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64s32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64u64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64s64] = std::make_pair(1U, (unsigned)RndNearestEven);
+}
+
+void PTXFPRoundingModePass::processInstruction(MachineInstr &MI) {
+ // Is this an instruction that needs a rounding mode?
+ if (Instrs.count(MI.getOpcode())) {
+ const RndModeDesc &Desc = Instrs[MI.getOpcode()];
+ // Get the rounding mode operand
+ MachineOperand &Op = MI.getOperand(Desc.first);
+ // Update the rounding mode if needed
+ if (Op.getImm() == PTXRoundingMode::RndDefault) {
+ Op.setImm(Desc.second);
+ }
+ }
+}
+
+FunctionPass *llvm::createPTXFPRoundingModePass(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new PTXFPRoundingModePass(TM, OptLevel);
+}
+
diff --git a/lib/Target/PTX/PTXISelDAGToDAG.cpp b/lib/Target/PTX/PTXISelDAGToDAG.cpp
index 9adfa62..5c7ee29 100644
--- a/lib/Target/PTX/PTXISelDAGToDAG.cpp
+++ b/lib/Target/PTX/PTXISelDAGToDAG.cpp
@@ -12,7 +12,9 @@
//===----------------------------------------------------------------------===//
#include "PTX.h"
+#include "PTXMachineFunctionInfo.h"
#include "PTXTargetMachine.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Support/Debug.h"
@@ -37,6 +39,7 @@ class PTXDAGToDAGISel : public SelectionDAGISel {
bool SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2);
bool SelectADDRri(SDValue &Addr, SDValue &Base, SDValue &Offset);
bool SelectADDRii(SDValue &Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRlocal(SDValue &Addr, SDValue &Base, SDValue &Offset);
// Include the pieces auto'gened from the target description
#include "PTXGenDAGISel.inc"
@@ -46,6 +49,10 @@ class PTXDAGToDAGISel : public SelectionDAGISel {
// pattern (PTXbrcond bb:$d, ...) in PTXInstrInfo.td
SDNode *SelectBRCOND(SDNode *Node);
+ SDNode *SelectREADPARAM(SDNode *Node);
+ SDNode *SelectWRITEPARAM(SDNode *Node);
+ SDNode *SelectFrameIndex(SDNode *Node);
+
bool isImm(const SDValue &operand);
bool SelectImm(const SDValue &operand, SDValue &imm);
@@ -68,6 +75,12 @@ SDNode *PTXDAGToDAGISel::Select(SDNode *Node) {
switch (Node->getOpcode()) {
case ISD::BRCOND:
return SelectBRCOND(Node);
+ case PTXISD::READ_PARAM:
+ return SelectREADPARAM(Node);
+ case PTXISD::WRITE_PARAM:
+ return SelectWRITEPARAM(Node);
+ case ISD::FrameIndex:
+ return SelectFrameIndex(Node);
default:
return SelectCode(Node);
}
@@ -79,7 +92,7 @@ SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) {
SDValue Chain = Node->getOperand(0);
SDValue Pred = Node->getOperand(1);
SDValue Target = Node->getOperand(2); // branch target
- SDValue PredOp = CurDAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32);
+ SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::Normal, MVT::i32);
DebugLoc dl = Node->getDebugLoc();
assert(Target.getOpcode() == ISD::BasicBlock);
@@ -90,6 +103,97 @@ SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) {
return CurDAG->getMachineNode(PTX::BRAdp, dl, MVT::Other, Ops, 4);
}
+SDNode *PTXDAGToDAGISel::SelectREADPARAM(SDNode *Node) {
+ SDValue Chain = Node->getOperand(0);
+ SDValue Index = Node->getOperand(1);
+
+ int OpCode;
+
+ // Get the type of parameter we are reading
+ EVT VT = Node->getValueType(0);
+ assert(VT.isSimple() && "READ_PARAM only implemented for MVT types");
+
+ MVT Type = VT.getSimpleVT();
+
+ if (Type == MVT::i1)
+ OpCode = PTX::READPARAMPRED;
+ else if (Type == MVT::i16)
+ OpCode = PTX::READPARAMI16;
+ else if (Type == MVT::i32)
+ OpCode = PTX::READPARAMI32;
+ else if (Type == MVT::i64)
+ OpCode = PTX::READPARAMI64;
+ else if (Type == MVT::f32)
+ OpCode = PTX::READPARAMF32;
+ else {
+ assert(Type == MVT::f64 && "Unexpected type!");
+ OpCode = PTX::READPARAMF64;
+ }
+
+ SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1);
+ SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32);
+ DebugLoc dl = Node->getDebugLoc();
+
+ SDValue Ops[] = { Index, Pred, PredOp, Chain };
+ return CurDAG->getMachineNode(OpCode, dl, VT, Ops, 4);
+}
+
+SDNode *PTXDAGToDAGISel::SelectWRITEPARAM(SDNode *Node) {
+
+ SDValue Chain = Node->getOperand(0);
+ SDValue Value = Node->getOperand(1);
+
+ int OpCode;
+
+ //Node->dumpr(CurDAG);
+
+ // Get the type of parameter we are writing
+ EVT VT = Value->getValueType(0);
+ assert(VT.isSimple() && "WRITE_PARAM only implemented for MVT types");
+
+ MVT Type = VT.getSimpleVT();
+
+ if (Type == MVT::i1)
+ OpCode = PTX::WRITEPARAMPRED;
+ else if (Type == MVT::i16)
+ OpCode = PTX::WRITEPARAMI16;
+ else if (Type == MVT::i32)
+ OpCode = PTX::WRITEPARAMI32;
+ else if (Type == MVT::i64)
+ OpCode = PTX::WRITEPARAMI64;
+ else if (Type == MVT::f32)
+ OpCode = PTX::WRITEPARAMF32;
+ else if (Type == MVT::f64)
+ OpCode = PTX::WRITEPARAMF64;
+ else
+ llvm_unreachable("Invalid type in SelectWRITEPARAM");
+
+ SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1);
+ SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32);
+ DebugLoc dl = Node->getDebugLoc();
+
+ SDValue Ops[] = { Value, Pred, PredOp, Chain };
+ SDNode* Ret = CurDAG->getMachineNode(OpCode, dl, MVT::Other, Ops, 4);
+
+ //dbgs() << "SelectWRITEPARAM produced:\n\t";
+ //Ret->dumpr(CurDAG);
+
+ return Ret;
+}
+
+SDNode *PTXDAGToDAGISel::SelectFrameIndex(SDNode *Node) {
+ int FI = cast<FrameIndexSDNode>(Node)->getIndex();
+ //dbgs() << "Selecting FrameIndex at index " << FI << "\n";
+ //SDValue TFI = CurDAG->getTargetFrameIndex(FI, Node->getValueType(0));
+
+ PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+
+ SDValue FrameSymbol = CurDAG->getTargetExternalSymbol(MFI->getFrameSymbol(FI),
+ Node->getValueType(0));
+
+ return FrameSymbol.getNode();
+}
+
// Match memory operand of the form [reg+reg]
bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) {
if (Addr.getOpcode() != ISD::ADD || Addr.getNumOperands() < 2 ||
@@ -107,14 +211,54 @@ bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) {
// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base,
SDValue &Offset) {
- if (Addr.getOpcode() != ISD::ADD) {
+ // FrameIndex addresses are handled separately
+ //errs() << "SelectADDRri: ";
+ //Addr.getNode()->dumpr();
+ if (isa<FrameIndexSDNode>(Addr)) {
+ //errs() << "Failure\n";
+ return false;
+ }
+
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ Base = Addr.getOperand(0);
+ if (isa<FrameIndexSDNode>(Base)) {
+ //errs() << "Failure\n";
+ return false;
+ }
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+ //errs() << "Success\n";
+ return true;
+ }
+
+ /*if (Addr.getNumOperands() == 1) {
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+ errs() << "Success\n";
+ return true;
+ }*/
+
+ //errs() << "SelectADDRri fails on: ";
+ //Addr.getNode()->dumpr();
+
+ if (isImm(Addr)) {
+ //errs() << "Failure\n";
+ return false;
+ }
+
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+
+ //errs() << "Success\n";
+ return true;
+
+ /*if (Addr.getOpcode() != ISD::ADD) {
// let SelectADDRii handle the [imm] case
if (isImm(Addr))
return false;
// it is [reg]
assert(Addr.getValueType().isSimple() && "Type must be simple");
-
Base = Addr;
Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
@@ -136,7 +280,7 @@ bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base,
}
// neither [reg+imm] nor [imm+reg]
- return false;
+ return false;*/
}
// Match memory operand of the form [imm+imm] and [imm]
@@ -160,6 +304,36 @@ bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base,
return false;
}
+// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
+bool PTXDAGToDAGISel::SelectADDRlocal(SDValue &Addr, SDValue &Base,
+ SDValue &Offset) {
+ //errs() << "SelectADDRlocal: ";
+ //Addr.getNode()->dumpr();
+ if (isa<FrameIndexSDNode>(Addr)) {
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+ //errs() << "Success\n";
+ return true;
+ }
+
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ Base = Addr.getOperand(0);
+ if (!isa<FrameIndexSDNode>(Base)) {
+ //errs() << "Failure\n";
+ return false;
+ }
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+ //errs() << "Offset: ";
+ //Offset.getNode()->dumpr();
+ //errs() << "Success\n";
+ return true;
+ }
+
+ //errs() << "Failure\n";
+ return false;
+}
+
bool PTXDAGToDAGISel::isImm(const SDValue &operand) {
return ConstantSDNode::classof(operand.getNode());
}
diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp
index 6fcf710..3307d91 100644
--- a/lib/Target/PTX/PTXISelLowering.cpp
+++ b/lib/Target/PTX/PTXISelLowering.cpp
@@ -16,23 +16,19 @@
#include "PTXMachineFunctionInfo.h"
#include "PTXRegisterInfo.h"
#include "PTXSubtarget.h"
+#include "llvm/Function.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
-// Calling Convention Implementation
-//===----------------------------------------------------------------------===//
-
-#include "PTXGenCallingConv.inc"
-
-//===----------------------------------------------------------------------===//
// TargetLowering Implementation
//===----------------------------------------------------------------------===//
@@ -47,57 +43,58 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
addRegisterClass(MVT::f64, PTX::RegF64RegisterClass);
setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
setMinFunctionAlignment(2);
-
+
////////////////////////////////////
/////////// Expansion //////////////
////////////////////////////////////
-
+
// (any/zero/sign) extload => load + (any/zero/sign) extend
-
+
setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
-
+
// f32 extload => load + fextend
-
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
-
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+
// f64 truncstore => trunc + store
-
- setTruncStoreAction(MVT::f64, MVT::f32, Expand);
-
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
// sign_extend_inreg => sign_extend
-
+
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
-
+
// br_cc => brcond
-
+
setOperationAction(ISD::BR_CC, MVT::Other, Expand);
// select_cc => setcc
-
+
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
-
+
////////////////////////////////////
//////////// Legal /////////////////
////////////////////////////////////
-
+
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
-
+
////////////////////////////////////
//////////// Custom ////////////////
////////////////////////////////////
-
+
// customise setcc to use bitwise logic if possible
-
+
setOperationAction(ISD::SETCC, MVT::i1, Custom);
// customize translation of memory addresses
-
+
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
@@ -105,7 +102,7 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
computeRegisterProperties();
}
-MVT::SimpleValueType PTXTargetLowering::getSetCCResultType(EVT VT) const {
+EVT PTXTargetLowering::getSetCCResultType(EVT VT) const {
return MVT::i1;
}
@@ -130,10 +127,16 @@ const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "PTXISD::LOAD_PARAM";
case PTXISD::STORE_PARAM:
return "PTXISD::STORE_PARAM";
+ case PTXISD::READ_PARAM:
+ return "PTXISD::READ_PARAM";
+ case PTXISD::WRITE_PARAM:
+ return "PTXISD::WRITE_PARAM";
case PTXISD::EXIT:
return "PTXISD::EXIT";
case PTXISD::RET:
return "PTXISD::RET";
+ case PTXISD::CALL:
+ return "PTXISD::CALL";
}
}
@@ -149,7 +152,7 @@ SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
- // Look for X == 0, X == 1, X != 0, or X != 1
+ // Look for X == 0, X == 1, X != 0, or X != 1
// We can simplify these to bitwise logic
if (Op1.getOpcode() == ISD::Constant &&
@@ -197,6 +200,7 @@ SDValue PTXTargetLowering::
MachineFunction &MF = DAG.getMachineFunction();
const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>();
PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+ PTXParamManager &PM = MFI->getParamManager();
switch (CallConv) {
default:
@@ -216,68 +220,34 @@ SDValue PTXTargetLowering::
if (MFI->isKernel() || ST.useParamSpaceForDeviceArgs()) {
// We just need to emit the proper LOAD_PARAM ISDs
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
-
assert((!MFI->isKernel() || Ins[i].VT != MVT::i1) &&
"Kernels cannot take pred operands");
+ unsigned ParamSize = Ins[i].VT.getStoreSizeInBits();
+ unsigned Param = PM.addArgumentParam(ParamSize);
+ const std::string &ParamName = PM.getParamName(Param);
+ SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
+ MVT::Other);
SDValue ArgValue = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain,
- DAG.getTargetConstant(i, MVT::i32));
+ ParamValue);
InVals.push_back(ArgValue);
-
- // Instead of storing a physical register in our argument list, we just
- // store the total size of the parameter, in bits. The ASM printer
- // knows how to process this.
- MFI->addArgReg(Ins[i].VT.getStoreSizeInBits());
}
}
else {
- // For device functions, we use the PTX calling convention to do register
- // assignments then create CopyFromReg ISDs for the allocated registers
-
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), ArgLocs,
- *DAG.getContext());
-
- CCInfo.AnalyzeFormalArguments(Ins, CC_PTX);
-
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
-
- CCValAssign& VA = ArgLocs[i];
- EVT RegVT = VA.getLocVT();
- TargetRegisterClass* TRC = 0;
-
- assert(VA.isRegLoc() && "CCValAssign must be RegLoc");
-
- // Determine which register class we need
- if (RegVT == MVT::i1) {
- TRC = PTX::RegPredRegisterClass;
- }
- else if (RegVT == MVT::i16) {
- TRC = PTX::RegI16RegisterClass;
- }
- else if (RegVT == MVT::i32) {
- TRC = PTX::RegI32RegisterClass;
- }
- else if (RegVT == MVT::i64) {
- TRC = PTX::RegI64RegisterClass;
- }
- else if (RegVT == MVT::f32) {
- TRC = PTX::RegF32RegisterClass;
- }
- else if (RegVT == MVT::f64) {
- TRC = PTX::RegF64RegisterClass;
- }
- else {
- llvm_unreachable("Unknown parameter type");
- }
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ EVT RegVT = Ins[i].VT;
+ TargetRegisterClass* TRC = getRegClassFor(RegVT);
+ // Use a unique index in the instruction to prevent instruction folding.
+ // Yes, this is a hack.
+ SDValue Index = DAG.getTargetConstant(i, MVT::i32);
unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC);
- MF.getRegInfo().addLiveIn(VA.getLocReg(), Reg);
+ SDValue ArgValue = DAG.getNode(PTXISD::READ_PARAM, dl, RegVT, Chain,
+ Index);
- SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
InVals.push_back(ArgValue);
- MFI->addArgReg(VA.getLocReg());
+ MFI->addArgReg(Reg);
}
}
@@ -301,41 +271,66 @@ SDValue PTXTargetLowering::
assert(Outs.size() == 0 && "Kernel must return void.");
return DAG.getNode(PTXISD::EXIT, dl, MVT::Other, Chain);
case CallingConv::PTX_Device:
- //assert(Outs.size() <= 1 && "Can at most return one value.");
+ assert(Outs.size() <= 1 && "Can at most return one value.");
break;
}
MachineFunction& MF = DAG.getMachineFunction();
PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+ PTXParamManager &PM = MFI->getParamManager();
SDValue Flag;
+ const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>();
- // Even though we could use the .param space for return arguments for
- // device functions if SM >= 2.0 and the number of return arguments is
- // only 1, we just always use registers since this makes the codegen
- // easier.
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
-
- CCInfo.AnalyzeReturn(Outs, RetCC_PTX);
-
- for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
- CCValAssign& VA = RVLocs[i];
-
- assert(VA.isRegLoc() && "CCValAssign must be RegLoc");
+ if (ST.useParamSpaceForDeviceArgs()) {
+ assert(Outs.size() < 2 && "Device functions can return at most one value");
+
+ if (Outs.size() == 1) {
+ unsigned ParamSize = OutVals[0].getValueType().getSizeInBits();
+ unsigned Param = PM.addReturnParam(ParamSize);
+ const std::string &ParamName = PM.getParamName(Param);
+ SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
+ MVT::Other);
+ Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain,
+ ParamValue, OutVals[0]);
+ }
+ } else {
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ EVT RegVT = Outs[i].VT;
+ TargetRegisterClass* TRC = 0;
- unsigned Reg = VA.getLocReg();
+ // Determine which register class we need
+ if (RegVT == MVT::i1) {
+ TRC = PTX::RegPredRegisterClass;
+ }
+ else if (RegVT == MVT::i16) {
+ TRC = PTX::RegI16RegisterClass;
+ }
+ else if (RegVT == MVT::i32) {
+ TRC = PTX::RegI32RegisterClass;
+ }
+ else if (RegVT == MVT::i64) {
+ TRC = PTX::RegI64RegisterClass;
+ }
+ else if (RegVT == MVT::f32) {
+ TRC = PTX::RegF32RegisterClass;
+ }
+ else if (RegVT == MVT::f64) {
+ TRC = PTX::RegF64RegisterClass;
+ }
+ else {
+ llvm_unreachable("Unknown parameter type");
+ }
- DAG.getMachineFunction().getRegInfo().addLiveOut(Reg);
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC);
- Chain = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i], Flag);
+ SDValue Copy = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i]/*, Flag*/);
+ SDValue OutReg = DAG.getRegister(Reg, RegVT);
- // Guarantee that all emitted copies are stuck together,
- // avoiding something bad
- Flag = Chain.getValue(1);
+ Chain = DAG.getNode(PTXISD::WRITE_PARAM, dl, MVT::Other, Copy, OutReg);
- MFI->addRetReg(Reg);
+ MFI->addRetReg(Reg);
+ }
}
if (Flag.getNode() == 0) {
@@ -345,3 +340,83 @@ SDValue PTXTargetLowering::
return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag);
}
}
+
+SDValue
+PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ MachineFunction& MF = DAG.getMachineFunction();
+ PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+ PTXParamManager &PM = MFI->getParamManager();
+
+ assert(getTargetMachine().getSubtarget<PTXSubtarget>().callsAreHandled() &&
+ "Calls are not handled for the target device");
+
+ std::vector<SDValue> Ops;
+ // The layout of the ops will be [Chain, #Ins, Ins, Callee, #Outs, Outs]
+ Ops.resize(Outs.size() + Ins.size() + 4);
+
+ Ops[0] = Chain;
+
+ // Identify the callee function
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
+ assert(cast<Function>(GV)->getCallingConv() == CallingConv::PTX_Device &&
+ "PTX function calls must be to PTX device functions");
+ Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
+ Ops[Ins.size()+2] = Callee;
+
+ // Generate STORE_PARAM nodes for each function argument. In PTX, function
+ // arguments are explicitly stored into .param variables and passed as
+ // arguments. There is no register/stack-based calling convention in PTX.
+ Ops[Ins.size()+3] = DAG.getTargetConstant(OutVals.size(), MVT::i32);
+ for (unsigned i = 0; i != OutVals.size(); ++i) {
+ unsigned Size = OutVals[i].getValueType().getSizeInBits();
+ unsigned Param = PM.addLocalParam(Size);
+ const std::string &ParamName = PM.getParamName(Param);
+ SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
+ MVT::Other);
+ Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain,
+ ParamValue, OutVals[i]);
+ Ops[i+Ins.size()+4] = ParamValue;
+ }
+
+ std::vector<SDValue> InParams;
+
+ // Generate list of .param variables to hold the return value(s).
+ Ops[1] = DAG.getTargetConstant(Ins.size(), MVT::i32);
+ for (unsigned i = 0; i < Ins.size(); ++i) {
+ unsigned Size = Ins[i].VT.getStoreSizeInBits();
+ unsigned Param = PM.addLocalParam(Size);
+ const std::string &ParamName = PM.getParamName(Param);
+ SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
+ MVT::Other);
+ Ops[i+2] = ParamValue;
+ InParams.push_back(ParamValue);
+ }
+
+ Ops[0] = Chain;
+
+ // Create the CALL node.
+ Chain = DAG.getNode(PTXISD::CALL, dl, MVT::Other, &Ops[0], Ops.size());
+
+ // Create the LOAD_PARAM nodes that retrieve the function return value(s).
+ for (unsigned i = 0; i < Ins.size(); ++i) {
+ SDValue Load = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain,
+ InParams[i]);
+ InVals.push_back(Load);
+ }
+
+ return Chain;
+}
+
+unsigned PTXTargetLowering::getNumRegisters(LLVMContext &Context, EVT VT) {
+ // All arguments consist of one "register," regardless of the type.
+ return 1;
+}
+
diff --git a/lib/Target/PTX/PTXISelLowering.h b/lib/Target/PTX/PTXISelLowering.h
index 4318541..4d25665 100644
--- a/lib/Target/PTX/PTXISelLowering.h
+++ b/lib/Target/PTX/PTXISelLowering.h
@@ -26,9 +26,12 @@ namespace PTXISD {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
LOAD_PARAM,
STORE_PARAM,
+ READ_PARAM,
+ WRITE_PARAM,
EXIT,
RET,
- COPY_ADDRESS
+ COPY_ADDRESS,
+ CALL
};
} // namespace PTXISD
@@ -60,7 +63,19 @@ class PTXTargetLowering : public TargetLowering {
DebugLoc dl,
SelectionDAG &DAG) const;
- virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual EVT getSetCCResultType(EVT VT) const;
+
+ virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT);
private:
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/PTX/PTXInstrFormats.td b/lib/Target/PTX/PTXInstrFormats.td
index 8cee351..397fdc3 100644
--- a/lib/Target/PTX/PTXInstrFormats.td
+++ b/lib/Target/PTX/PTXInstrFormats.td
@@ -7,12 +7,39 @@
//
//===----------------------------------------------------------------------===//
-// PTX Predicate operand, default to (0, 0) = (zero-reg, always).
+
+// Rounding Mode Specifier
+/*class RoundingMode<bits<3> val> {
+ bits<3> Value = val;
+}
+
+def RndDefault : RoundingMode<0>;
+def RndNearestEven : RoundingMode<1>;
+def RndNearestZero : RoundingMode<2>;
+def RndNegInf : RoundingMode<3>;
+def RndPosInf : RoundingMode<4>;
+def RndApprox : RoundingMode<5>;*/
+
+
+// Rounding Mode Operand
+def RndMode : Operand<i32> {
+ let PrintMethod = "printRoundingMode";
+}
+
+def RndDefault : PatLeaf<(i32 0)>;
+
+// PTX Predicate operand, default to (0, 0) = (zero-reg, none).
// Leave PrintMethod empty; predicate printing is defined elsewhere.
def pred : PredicateOperand<OtherVT, (ops RegPred, i32imm),
- (ops (i1 zero_reg), (i32 0))>;
+ (ops (i1 zero_reg), (i32 2))>;
+def RndModeOperand : Operand<OtherVT> {
+ let MIOperandInfo = (ops i32imm);
+}
+
+// Instruction Types
let Namespace = "PTX" in {
+
class InstPTX<dag oops, dag iops, string asmstr, list<dag> pattern>
: Instruction {
dag OutOperandList = oops;
diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp
index 425265a..1b947a5 100644
--- a/lib/Target/PTX/PTXInstrInfo.cpp
+++ b/lib/Target/PTX/PTXInstrInfo.cpp
@@ -16,10 +16,11 @@
#include "PTX.h"
#include "PTXInstrInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#define GET_INSTRINFO_CTOR
@@ -47,8 +48,13 @@ void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DstReg, unsigned SrcReg,
bool KillSrc) const {
- for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) {
- if (map[i].cls->contains(DstReg, SrcReg)) {
+
+ const MachineRegisterInfo& MRI = MBB.getParent()->getRegInfo();
+ //assert(MRI.getRegClass(SrcReg) == MRI.getRegClass(DstReg) &&
+ // "Invalid register copy between two register classes");
+
+ for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++i) {
+ if (map[i].cls == MRI.getRegClass(DstReg)) {
const MCInstrDesc &MCID = get(map[i].opcode);
MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg).
addReg(SrcReg, getKillRegState(KillSrc));
@@ -161,7 +167,7 @@ DefinesPredicate(MachineInstr *MI,
return false;
Pred.push_back(MO);
- Pred.push_back(MachineOperand::CreateImm(PTX::PRED_NORMAL));
+ Pred.push_back(MachineOperand::CreateImm(PTXPredicate::None));
return true;
}
@@ -277,7 +283,7 @@ InsertBranch(MachineBasicBlock &MBB,
BuildMI(&MBB, DL, get(PTX::BRAdp))
.addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm());
BuildMI(&MBB, DL, get(PTX::BRAd))
- .addMBB(FBB).addReg(PTX::NoRegister).addImm(PTX::PRED_NORMAL);
+ .addMBB(FBB).addReg(PTX::NoRegister).addImm(PTXPredicate::None);
return 2;
} else if (Cond.size()) {
BuildMI(&MBB, DL, get(PTX::BRAdp))
@@ -285,7 +291,7 @@ InsertBranch(MachineBasicBlock &MBB,
return 1;
} else {
BuildMI(&MBB, DL, get(PTX::BRAd))
- .addMBB(TBB).addReg(PTX::NoRegister).addImm(PTX::PRED_NORMAL);
+ .addMBB(TBB).addReg(PTX::NoRegister).addImm(PTXPredicate::None);
return 1;
}
}
@@ -296,34 +302,7 @@ void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
unsigned SrcReg, bool isKill, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- MachineInstr& MI = *MII;
- DebugLoc DL = MI.getDebugLoc();
-
- DEBUG(dbgs() << "storeRegToStackSlot: " << MI);
-
- int OpCode;
-
- // Select the appropriate opcode based on the register class
- if (RC == PTX::RegI16RegisterClass) {
- OpCode = PTX::STACKSTOREI16;
- } else if (RC == PTX::RegI32RegisterClass) {
- OpCode = PTX::STACKSTOREI32;
- } else if (RC == PTX::RegI64RegisterClass) {
- OpCode = PTX::STACKSTOREI32;
- } else if (RC == PTX::RegF32RegisterClass) {
- OpCode = PTX::STACKSTOREF32;
- } else if (RC == PTX::RegF64RegisterClass) {
- OpCode = PTX::STACKSTOREF64;
- } else {
- llvm_unreachable("Unknown PTX register class!");
- }
-
- // Build the store instruction (really a mov)
- MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode));
- MIB.addFrameIndex(FrameIdx);
- MIB.addReg(SrcReg);
-
- AddDefaultPredicate(MIB);
+ assert(false && "storeRegToStackSlot should not be called for PTX");
}
void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
@@ -331,34 +310,7 @@ void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- MachineInstr& MI = *MII;
- DebugLoc DL = MI.getDebugLoc();
-
- DEBUG(dbgs() << "loadRegToStackSlot: " << MI);
-
- int OpCode;
-
- // Select the appropriate opcode based on the register class
- if (RC == PTX::RegI16RegisterClass) {
- OpCode = PTX::STACKLOADI16;
- } else if (RC == PTX::RegI32RegisterClass) {
- OpCode = PTX::STACKLOADI32;
- } else if (RC == PTX::RegI64RegisterClass) {
- OpCode = PTX::STACKLOADI32;
- } else if (RC == PTX::RegF32RegisterClass) {
- OpCode = PTX::STACKLOADF32;
- } else if (RC == PTX::RegF64RegisterClass) {
- OpCode = PTX::STACKLOADF64;
- } else {
- llvm_unreachable("Unknown PTX register class!");
- }
-
- // Build the load instruction (really a mov)
- MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode));
- MIB.addReg(DestReg);
- MIB.addFrameIndex(FrameIdx);
-
- AddDefaultPredicate(MIB);
+ assert(false && "loadRegFromStackSlot should not be called for PTX");
}
// static helper routines
@@ -367,7 +319,7 @@ MachineSDNode *PTXInstrInfo::
GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
DebugLoc dl, EVT VT, SDValue Op1) {
SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1);
- SDValue predOp = DAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32);
+ SDValue predOp = DAG->getTargetConstant(PTXPredicate::None, MVT::i32);
SDValue ops[] = { Op1, predReg, predOp };
return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
}
@@ -376,7 +328,7 @@ MachineSDNode *PTXInstrInfo::
GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
DebugLoc dl, EVT VT, SDValue Op1, SDValue Op2) {
SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1);
- SDValue predOp = DAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32);
+ SDValue predOp = DAG->getTargetConstant(PTXPredicate::None, MVT::i32);
SDValue ops[] = { Op1, Op2, predReg, predOp };
return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
}
@@ -384,7 +336,7 @@ GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) {
if (MI->findFirstPredOperandIdx() == -1) {
MI->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false));
- MI->addOperand(MachineOperand::CreateImm(PTX::PRED_NORMAL));
+ MI->addOperand(MachineOperand::CreateImm(PTXPredicate::None));
}
}
diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td
index 6bfe906..a3fcea9 100644
--- a/lib/Target/PTX/PTXInstrInfo.td
+++ b/lib/Target/PTX/PTXInstrInfo.td
@@ -21,10 +21,6 @@ include "PTXInstrFormats.td"
// Code Generation Predicates
//===----------------------------------------------------------------------===//
-// Addressing
-def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">;
-def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">;
-
// Shader Model Support
def FDivNeedsRoundingMode : Predicate<"getSubtarget().fdivNeedsRoundingMode()">;
def FDivNoRoundingMode : Predicate<"!getSubtarget().fdivNeedsRoundingMode()">;
@@ -43,130 +39,19 @@ def DoesNotSupportPTX23 : Predicate<"!getSubtarget().supportsPTX23()">;
def SupportsFMA : Predicate<"getSubtarget().supportsFMA()">;
def DoesNotSupportFMA : Predicate<"!getSubtarget().supportsFMA()">;
-//===----------------------------------------------------------------------===//
-// Instruction Pattern Stuff
-//===----------------------------------------------------------------------===//
-def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTX::GLOBAL;
- return false;
-}]>;
-
-def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTX::CONSTANT;
- return false;
-}]>;
-
-def load_local : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTX::LOCAL;
- return false;
-}]>;
-
-def load_parameter : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTX::PARAMETER;
- return false;
-}]>;
-
-def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTX::SHARED;
- return false;
-}]>;
-
-def store_global
- : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTX::GLOBAL;
- return false;
-}]>;
-
-def store_local
- : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTX::LOCAL;
- return false;
-}]>;
-
-def store_parameter
- : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTX::PARAMETER;
- return false;
-}]>;
-
-def store_shared
- : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTX::SHARED;
- return false;
-}]>;
-
-// Addressing modes.
-def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
-def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>;
-def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>;
-def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>;
-def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>;
-def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>;
-
-// Address operands
-def MEMri32 : Operand<i32> {
- let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops RegI32, i32imm);
-}
-def MEMri64 : Operand<i64> {
- let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops RegI64, i64imm);
-}
-def MEMii32 : Operand<i32> {
- let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops i32imm, i32imm);
-}
-def MEMii64 : Operand<i64> {
- let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops i64imm, i64imm);
-}
-// The operand here does not correspond to an actual address, so we
-// can use i32 in 64-bit address modes.
-def MEMpi : Operand<i32> {
- let PrintMethod = "printParamOperand";
- let MIOperandInfo = (ops i32imm);
-}
-def MEMret : Operand<i32> {
- let PrintMethod = "printReturnOperand";
- let MIOperandInfo = (ops i32imm);
-}
+
+// def SDT_PTXCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+// def SDT_PTXCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+// def PTXcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PTXCallSeqStart,
+// [SDNPHasChain, SDNPOutGlue]>;
+// def PTXcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PTXCallSeqEnd,
+// [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def PTXcall : SDNode<"PTXISD::CALL", SDTNone,
+ [SDNPHasChain, SDNPVariadic, SDNPOptInGlue, SDNPOutGlue]>;
+
// Branch & call targets have OtherVT type.
def brtarget : Operand<OtherVT>;
@@ -189,87 +74,73 @@ def PTXret
def PTXcopyaddress
: SDNode<"PTXISD::COPY_ADDRESS", SDTypeProfile<1, 1, []>, []>;
-// Load/store .param space
-def PTXloadparam
- : SDNode<"PTXISD::LOAD_PARAM", SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>,
- [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
-def PTXstoreparam
- : SDNode<"PTXISD::STORE_PARAM", SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>,
- [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+
//===----------------------------------------------------------------------===//
// Instruction Class Templates
//===----------------------------------------------------------------------===//
+// For floating-point instructions, we cannot just embed the pattern into the
+// instruction definition since we need to muck around with the rounding mode,
+// and I do not know how to insert constants into instructions directly from
+// pattern matches.
+
//===- Floating-Point Instructions - 2 Operand Form -----------------------===//
-multiclass PTX_FLOAT_2OP<string opcstr, SDNode opnode> {
+multiclass PTX_FLOAT_2OP<string opcstr> {
def rr32 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a),
- !strconcat(opcstr, ".f32\t$d, $a"),
- [(set RegF32:$d, (opnode RegF32:$a))]>;
+ (ins RndMode:$r, RegF32:$a),
+ !strconcat(opcstr, "$r.f32\t$d, $a"), []>;
def ri32 : InstPTX<(outs RegF32:$d),
- (ins f32imm:$a),
- !strconcat(opcstr, ".f32\t$d, $a"),
- [(set RegF32:$d, (opnode fpimm:$a))]>;
+ (ins RndMode:$r, f32imm:$a),
+ !strconcat(opcstr, "$r.f32\t$d, $a"), []>;
def rr64 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a),
- !strconcat(opcstr, ".f64\t$d, $a"),
- [(set RegF64:$d, (opnode RegF64:$a))]>;
+ (ins RndMode:$r, RegF64:$a),
+ !strconcat(opcstr, "$r.f64\t$d, $a"), []>;
def ri64 : InstPTX<(outs RegF64:$d),
- (ins f64imm:$a),
- !strconcat(opcstr, ".f64\t$d, $a"),
- [(set RegF64:$d, (opnode fpimm:$a))]>;
+ (ins RndMode:$r, f64imm:$a),
+ !strconcat(opcstr, "$r.f64\t$d, $a"), []>;
}
//===- Floating-Point Instructions - 3 Operand Form -----------------------===//
-multiclass PTX_FLOAT_3OP<string opcstr, SDNode opnode> {
+multiclass PTX_FLOAT_3OP<string opcstr> {
def rr32 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a, RegF32:$b),
- !strconcat(opcstr, ".f32\t$d, $a, $b"),
- [(set RegF32:$d, (opnode RegF32:$a, RegF32:$b))]>;
+ (ins RndMode:$r, RegF32:$a, RegF32:$b),
+ !strconcat(opcstr, "$r.f32\t$d, $a, $b"), []>;
def ri32 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a, f32imm:$b),
- !strconcat(opcstr, ".f32\t$d, $a, $b"),
- [(set RegF32:$d, (opnode RegF32:$a, fpimm:$b))]>;
+ (ins RndMode:$r, RegF32:$a, f32imm:$b),
+ !strconcat(opcstr, "$r.f32\t$d, $a, $b"), []>;
def rr64 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a, RegF64:$b),
- !strconcat(opcstr, ".f64\t$d, $a, $b"),
- [(set RegF64:$d, (opnode RegF64:$a, RegF64:$b))]>;
+ (ins RndMode:$r, RegF64:$a, RegF64:$b),
+ !strconcat(opcstr, "$r.f64\t$d, $a, $b"), []>;
def ri64 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a, f64imm:$b),
- !strconcat(opcstr, ".f64\t$d, $a, $b"),
- [(set RegF64:$d, (opnode RegF64:$a, fpimm:$b))]>;
+ (ins RndMode:$r, RegF64:$a, f64imm:$b),
+ !strconcat(opcstr, "$r.f64\t$d, $a, $b"), []>;
}
//===- Floating-Point Instructions - 4 Operand Form -----------------------===//
-multiclass PTX_FLOAT_4OP<string opcstr, SDNode opnode1, SDNode opnode2> {
+multiclass PTX_FLOAT_4OP<string opcstr> {
def rrr32 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a, RegF32:$b, RegF32:$c),
- !strconcat(opcstr, ".f32\t$d, $a, $b, $c"),
- [(set RegF32:$d, (opnode2 (opnode1 RegF32:$a,
- RegF32:$b),
- RegF32:$c))]>;
+ (ins RndMode:$r, RegF32:$a, RegF32:$b, RegF32:$c),
+ !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>;
def rri32 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a, RegF32:$b, f32imm:$c),
- !strconcat(opcstr, ".f32\t$d, $a, $b, $c"),
- [(set RegF32:$d, (opnode2 (opnode1 RegF32:$a,
- RegF32:$b),
- fpimm:$c))]>;
+ (ins RndMode:$r, RegF32:$a, RegF32:$b, f32imm:$c),
+ !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>;
+ def rii32 : InstPTX<(outs RegF32:$d),
+ (ins RndMode:$r, RegF32:$a, f32imm:$b, f32imm:$c),
+ !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>;
def rrr64 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a, RegF64:$b, RegF64:$c),
- !strconcat(opcstr, ".f64\t$d, $a, $b, $c"),
- [(set RegF64:$d, (opnode2 (opnode1 RegF64:$a,
- RegF64:$b),
- RegF64:$c))]>;
+ (ins RndMode:$r, RegF64:$a, RegF64:$b, RegF64:$c),
+ !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>;
def rri64 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a, RegF64:$b, f64imm:$c),
- !strconcat(opcstr, ".f64\t$d, $a, $b, $c"),
- [(set RegF64:$d, (opnode2 (opnode1 RegF64:$a,
- RegF64:$b),
- fpimm:$c))]>;
+ (ins RndMode:$r, RegF64:$a, RegF64:$b, f64imm:$c),
+ !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>;
+ def rii64 : InstPTX<(outs RegF64:$d),
+ (ins RndMode:$r, RegF64:$a, f64imm:$b, f64imm:$c),
+ !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>;
}
-multiclass INT3<string opcstr, SDNode opnode> {
+//===- Integer Instructions - 3 Operand Form ------------------------------===//
+multiclass PTX_INT3<string opcstr, SDNode opnode> {
def rr16 : InstPTX<(outs RegI16:$d),
(ins RegI16:$a, RegI16:$b),
!strconcat(opcstr, ".u16\t$d, $a, $b"),
@@ -296,6 +167,35 @@ multiclass INT3<string opcstr, SDNode opnode> {
[(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
}
+//===- Integer Instructions - 3 Operand Form (Signed) ---------------------===//
+multiclass PTX_INT3_SIGNED<string opcstr, SDNode opnode> {
+ def rr16 : InstPTX<(outs RegI16:$d),
+ (ins RegI16:$a, RegI16:$b),
+ !strconcat(opcstr, ".s16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>;
+ def ri16 : InstPTX<(outs RegI16:$d),
+ (ins RegI16:$a, i16imm:$b),
+ !strconcat(opcstr, ".s16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>;
+ def rr32 : InstPTX<(outs RegI32:$d),
+ (ins RegI32:$a, RegI32:$b),
+ !strconcat(opcstr, ".s32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>;
+ def ri32 : InstPTX<(outs RegI32:$d),
+ (ins RegI32:$a, i32imm:$b),
+ !strconcat(opcstr, ".s32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>;
+ def rr64 : InstPTX<(outs RegI64:$d),
+ (ins RegI64:$a, RegI64:$b),
+ !strconcat(opcstr, ".s64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>;
+ def ri64 : InstPTX<(outs RegI64:$d),
+ (ins RegI64:$a, i64imm:$b),
+ !strconcat(opcstr, ".s64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
+}
+
+//===- Bitwise Logic Instructions - 3 Operand Form ------------------------===//
multiclass PTX_LOGIC<string opcstr, SDNode opnode> {
def ripreds : InstPTX<(outs RegPred:$d),
(ins RegPred:$a, i1imm:$b),
@@ -331,7 +231,8 @@ multiclass PTX_LOGIC<string opcstr, SDNode opnode> {
[(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
}
-multiclass INT3ntnc<string opcstr, SDNode opnode> {
+//===- Integer Shift Instructions - 3 Operand Form ------------------------===//
+multiclass PTX_INT3ntnc<string opcstr, SDNode opnode> {
def rr16 : InstPTX<(outs RegI16:$d),
(ins RegI16:$a, RegI16:$b),
!strconcat(opcstr, "16\t$d, $a, $b"),
@@ -370,6 +271,7 @@ multiclass INT3ntnc<string opcstr, SDNode opnode> {
[(set RegI64:$d, (opnode imm:$a, RegI64:$b))]>;
}
+//===- Set Predicate Instructions (Int) - 3/4 Operand Forms ---------------===//
multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls,
CondCode cmp, string cmpstr> {
// TODO support 5-operand format: p|q, a, b, c
@@ -385,56 +287,77 @@ multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls,
def rr_and_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, $c"),
[(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
def ri_and_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>;
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp),
+ RegPred:$c))]>;
def rr_or_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, $c"),
[(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
def ri_or_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, $c"),
[(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>;
def rr_xor_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, $c"),
[(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
def ri_xor_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>;
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp),
+ RegPred:$c))]>;
def rr_and_not_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp),
+ (not RegPred:$c)))]>;
def ri_and_not_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp),
+ (not RegPred:$c)))]>;
def rr_or_not_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp),
+ (not RegPred:$c)))]>;
def ri_or_not_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp),
+ (not RegPred:$c)))]>;
def rr_xor_not_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp),
+ (not RegPred:$c)))]>;
def ri_xor_not_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp),
+ (not RegPred:$c)))]>;
}
-multiclass PTX_SETP_FP<RegisterClass RC, string regclsname,
+//===- Set Predicate Instructions (FP) - 3/4 Operand Form -----------------===//
+multiclass PTX_SETP_FP<RegisterClass RC, string regclsname, Operand immcls,
CondCode ucmp, CondCode ocmp, string cmpstr> {
// TODO support 5-operand format: p|q, a, b, c
@@ -447,137 +370,110 @@ multiclass PTX_SETP_FP<RegisterClass RC, string regclsname,
!strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
[(set RegPred:$p, (setcc RC:$a, RC:$b, ocmp))]>;
+ def ri_u
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b),
+ !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"),
+ [(set RegPred:$p, (setcc RC:$a, fpimm:$b, ucmp))]>;
+ def ri_o
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b),
+ !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
+ [(set RegPred:$p, (setcc RC:$a, fpimm:$b, ocmp))]>;
+
def rr_and_r_u
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>;
+ !strconcat("setp.", cmpstr, "u.and.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp),
+ RegPred:$c))]>;
def rr_and_r_o
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>;
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp),
+ RegPred:$c))]>;
def rr_or_r_u
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, $c"),
+ !strconcat("setp.", cmpstr, "u.or.", regclsname,
+ "\t$p, $a, $b, $c"),
[(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>;
def rr_or_r_o
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, $c"),
[(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>;
def rr_xor_r_u
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>;
+ !strconcat("setp.", cmpstr, "u.xor.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp),
+ RegPred:$c))]>;
def rr_xor_r_o
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>;
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp),
+ RegPred:$c))]>;
def rr_and_not_r_u
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, "u.and.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp),
+ (not RegPred:$c)))]>;
def rr_and_not_r_o
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp),
+ (not RegPred:$c)))]>;
def rr_or_not_r_u
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, "u.or.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp),
+ (not RegPred:$c)))]>;
def rr_or_not_r_o
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp),
+ (not RegPred:$c)))]>;
def rr_xor_not_r_u
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, "u.xor.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp),
+ (not RegPred:$c)))]>;
def rr_xor_not_r_o
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp),
+ (not RegPred:$c)))]>;
}
-multiclass PTX_SELP<RegisterClass RC, string regclsname> {
+//===- Select Predicate Instructions - 4 Operand Form ---------------------===//
+multiclass PTX_SELP<RegisterClass RC, string regclsname, Operand immcls,
+ SDNode immnode> {
def rr
: InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, RC:$c),
!strconcat("selp.", regclsname, "\t$r, $b, $c, $a"),
[(set RC:$r, (select RegPred:$a, RC:$b, RC:$c))]>;
+ def ri
+ : InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, immcls:$c),
+ !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"),
+ [(set RC:$r, (select RegPred:$a, RC:$b, immnode:$c))]>;
+ def ii
+ : InstPTX<(outs RC:$r), (ins RegPred:$a, immcls:$b, immcls:$c),
+ !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"),
+ [(set RC:$r, (select RegPred:$a, immnode:$b, immnode:$c))]>;
}
-multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> {
- def rr32 : InstPTX<(outs RC:$d),
- (ins MEMri32:$a),
- !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
- [(set RC:$d, (pat_load ADDRrr32:$a))]>, Requires<[Use32BitAddresses]>;
- def rr64 : InstPTX<(outs RC:$d),
- (ins MEMri64:$a),
- !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
- [(set RC:$d, (pat_load ADDRrr64:$a))]>, Requires<[Use64BitAddresses]>;
- def ri32 : InstPTX<(outs RC:$d),
- (ins MEMri32:$a),
- !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
- [(set RC:$d, (pat_load ADDRri32:$a))]>, Requires<[Use32BitAddresses]>;
- def ri64 : InstPTX<(outs RC:$d),
- (ins MEMri64:$a),
- !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
- [(set RC:$d, (pat_load ADDRri64:$a))]>, Requires<[Use64BitAddresses]>;
- def ii32 : InstPTX<(outs RC:$d),
- (ins MEMii32:$a),
- !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
- [(set RC:$d, (pat_load ADDRii32:$a))]>, Requires<[Use32BitAddresses]>;
- def ii64 : InstPTX<(outs RC:$d),
- (ins MEMii64:$a),
- !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
- [(set RC:$d, (pat_load ADDRii64:$a))]>, Requires<[Use64BitAddresses]>;
-}
-
-multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> {
- defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>;
- defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>;
- defm u64 : PTX_LD<opstr, ".u64", RegI64, pat_load>;
- defm f32 : PTX_LD<opstr, ".f32", RegF32, pat_load>;
- defm f64 : PTX_LD<opstr, ".f64", RegF64, pat_load>;
-}
-
-multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, PatFrag pat_store> {
- def rr32 : InstPTX<(outs),
- (ins RC:$d, MEMri32:$a),
- !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
- [(pat_store RC:$d, ADDRrr32:$a)]>, Requires<[Use32BitAddresses]>;
- def rr64 : InstPTX<(outs),
- (ins RC:$d, MEMri64:$a),
- !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
- [(pat_store RC:$d, ADDRrr64:$a)]>, Requires<[Use64BitAddresses]>;
- def ri32 : InstPTX<(outs),
- (ins RC:$d, MEMri32:$a),
- !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
- [(pat_store RC:$d, ADDRri32:$a)]>, Requires<[Use32BitAddresses]>;
- def ri64 : InstPTX<(outs),
- (ins RC:$d, MEMri64:$a),
- !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
- [(pat_store RC:$d, ADDRri64:$a)]>, Requires<[Use64BitAddresses]>;
- def ii32 : InstPTX<(outs),
- (ins RC:$d, MEMii32:$a),
- !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
- [(pat_store RC:$d, ADDRii32:$a)]>, Requires<[Use32BitAddresses]>;
- def ii64 : InstPTX<(outs),
- (ins RC:$d, MEMii64:$a),
- !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
- [(pat_store RC:$d, ADDRii64:$a)]>, Requires<[Use64BitAddresses]>;
-}
-multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
- defm u16 : PTX_ST<opstr, ".u16", RegI16, pat_store>;
- defm u32 : PTX_ST<opstr, ".u32", RegI32, pat_store>;
- defm u64 : PTX_ST<opstr, ".u64", RegI64, pat_store>;
- defm f32 : PTX_ST<opstr, ".f32", RegF32, pat_store>;
- defm f64 : PTX_ST<opstr, ".f64", RegF64, pat_store>;
-}
//===----------------------------------------------------------------------===//
// Instructions
@@ -585,118 +481,61 @@ multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
///===- Integer Arithmetic Instructions -----------------------------------===//
-defm ADD : INT3<"add", add>;
-defm SUB : INT3<"sub", sub>;
-defm MUL : INT3<"mul.lo", mul>; // FIXME: Allow 32x32 -> 64 multiplies
-defm DIV : INT3<"div", udiv>;
-defm REM : INT3<"rem", urem>;
+defm ADD : PTX_INT3<"add", add>;
+defm SUB : PTX_INT3<"sub", sub>;
+defm MUL : PTX_INT3<"mul.lo", mul>; // FIXME: Allow 32x32 -> 64 multiplies
+defm DIV : PTX_INT3<"div", udiv>;
+defm SDIV : PTX_INT3_SIGNED<"div", sdiv>;
+defm REM : PTX_INT3<"rem", urem>;
///===- Floating-Point Arithmetic Instructions ----------------------------===//
-// Standard Unary Operations
-defm FNEG : PTX_FLOAT_2OP<"neg", fneg>;
+// FNEG
+defm FNEG : PTX_FLOAT_2OP<"neg">;
// Standard Binary Operations
-defm FADD : PTX_FLOAT_3OP<"add.rn", fadd>;
-defm FSUB : PTX_FLOAT_3OP<"sub.rn", fsub>;
-defm FMUL : PTX_FLOAT_3OP<"mul.rn", fmul>;
-
-// For floating-point division:
-// SM_13+ defaults to .rn for f32 and f64,
-// SM10 must *not* provide a rounding
-
-// TODO:
-// - Allow user selection of rounding modes for fdiv
-// - Add support for -prec-div=false (.approx)
-
-def FDIVrr32SM13 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a, RegF32:$b),
- "div.rn.f32\t$d, $a, $b",
- [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>,
- Requires<[FDivNeedsRoundingMode]>;
-def FDIVri32SM13 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a, f32imm:$b),
- "div.rn.f32\t$d, $a, $b",
- [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>,
- Requires<[FDivNeedsRoundingMode]>;
-def FDIVrr32SM10 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a, RegF32:$b),
- "div.f32\t$d, $a, $b",
- [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>,
- Requires<[FDivNoRoundingMode]>;
-def FDIVri32SM10 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a, f32imm:$b),
- "div.f32\t$d, $a, $b",
- [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>,
- Requires<[FDivNoRoundingMode]>;
-
-def FDIVrr64SM13 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a, RegF64:$b),
- "div.rn.f64\t$d, $a, $b",
- [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>,
- Requires<[FDivNeedsRoundingMode]>;
-def FDIVri64SM13 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a, f64imm:$b),
- "div.rn.f64\t$d, $a, $b",
- [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>,
- Requires<[FDivNeedsRoundingMode]>;
-def FDIVrr64SM10 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a, RegF64:$b),
- "div.f64\t$d, $a, $b",
- [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>,
- Requires<[FDivNoRoundingMode]>;
-def FDIVri64SM10 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a, f64imm:$b),
- "div.f64\t$d, $a, $b",
- [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>,
- Requires<[FDivNoRoundingMode]>;
-
-
+defm FADD : PTX_FLOAT_3OP<"add">;
+defm FSUB : PTX_FLOAT_3OP<"sub">;
+defm FMUL : PTX_FLOAT_3OP<"mul">;
+defm FDIV : PTX_FLOAT_3OP<"div">;
// Multi-operation hybrid instructions
+defm FMAD : PTX_FLOAT_4OP<"mad">, Requires<[SupportsFMA]>;
-// The selection of mad/fma is tricky. In some cases, they are the *same*
-// instruction, but in other cases we may prefer one or the other. Also,
-// different PTX versions differ on whether rounding mode flags are required.
-// In the short term, mad is supported on all PTX versions and we use a
-// default rounding mode no matter what shader model or PTX version.
-// TODO: Allow the rounding mode to be selectable through llc.
-defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>,
- Requires<[FMadNeedsRoundingMode, SupportsFMA]>;
-defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>,
- Requires<[FMadNoRoundingMode, SupportsFMA]>;
///===- Floating-Point Intrinsic Instructions -----------------------------===//
-def FSQRT32 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a),
- "sqrt.rn.f32\t$d, $a",
- [(set RegF32:$d, (fsqrt RegF32:$a))]>;
-
-def FSQRT64 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a),
- "sqrt.rn.f64\t$d, $a",
- [(set RegF64:$d, (fsqrt RegF64:$a))]>;
-
-def FSIN32 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a),
- "sin.approx.f32\t$d, $a",
- [(set RegF32:$d, (fsin RegF32:$a))]>;
+// SQRT
+def FSQRTrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a),
+ "sqrt$r.f32\t$d, $a", []>;
+def FSQRTri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a),
+ "sqrt$r.f32\t$d, $a", []>;
+def FSQRTrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a),
+ "sqrt$r.f64\t$d, $a", []>;
+def FSQRTri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a),
+ "sqrt$r.f64\t$d, $a", []>;
+
+// SIN
+def FSINrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a),
+ "sin$r.f32\t$d, $a", []>;
+def FSINri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a),
+ "sin$r.f32\t$d, $a", []>;
+def FSINrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a),
+ "sin$r.f64\t$d, $a", []>;
+def FSINri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a),
+ "sin$r.f64\t$d, $a", []>;
+
+// COS
+def FCOSrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a),
+ "cos$r.f32\t$d, $a", []>;
+def FCOSri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a),
+ "cos$r.f32\t$d, $a", []>;
+def FCOSrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a),
+ "cos$r.f64\t$d, $a", []>;
+def FCOSri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a),
+ "cos$r.f64\t$d, $a", []>;
-def FSIN64 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a),
- "sin.approx.f64\t$d, $a",
- [(set RegF64:$d, (fsin RegF64:$a))]>;
-def FCOS32 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a),
- "cos.approx.f32\t$d, $a",
- [(set RegF32:$d, (fcos RegF32:$a))]>;
-
-def FCOS64 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a),
- "cos.approx.f64\t$d, $a",
- [(set RegF64:$d, (fcos RegF64:$a))]>;
///===- Comparison and Selection Instructions -----------------------------===//
@@ -744,35 +583,35 @@ defm SETPGEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGE, "ge">;
// Compare f32
-defm SETPEQf32 : PTX_SETP_FP<RegF32, "f32", SETUEQ, SETOEQ, "eq">;
-defm SETPNEf32 : PTX_SETP_FP<RegF32, "f32", SETUNE, SETONE, "ne">;
-defm SETPLTf32 : PTX_SETP_FP<RegF32, "f32", SETULT, SETOLT, "lt">;
-defm SETPLEf32 : PTX_SETP_FP<RegF32, "f32", SETULE, SETOLE, "le">;
-defm SETPGTf32 : PTX_SETP_FP<RegF32, "f32", SETUGT, SETOGT, "gt">;
-defm SETPGEf32 : PTX_SETP_FP<RegF32, "f32", SETUGE, SETOGE, "ge">;
+defm SETPEQf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUEQ, SETOEQ, "eq">;
+defm SETPNEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUNE, SETONE, "ne">;
+defm SETPLTf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETULT, SETOLT, "lt">;
+defm SETPLEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETULE, SETOLE, "le">;
+defm SETPGTf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUGT, SETOGT, "gt">;
+defm SETPGEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUGE, SETOGE, "ge">;
// Compare f64
-defm SETPEQf64 : PTX_SETP_FP<RegF64, "f64", SETUEQ, SETOEQ, "eq">;
-defm SETPNEf64 : PTX_SETP_FP<RegF64, "f64", SETUNE, SETONE, "ne">;
-defm SETPLTf64 : PTX_SETP_FP<RegF64, "f64", SETULT, SETOLT, "lt">;
-defm SETPLEf64 : PTX_SETP_FP<RegF64, "f64", SETULE, SETOLE, "le">;
-defm SETPGTf64 : PTX_SETP_FP<RegF64, "f64", SETUGT, SETOGT, "gt">;
-defm SETPGEf64 : PTX_SETP_FP<RegF64, "f64", SETUGE, SETOGE, "ge">;
+defm SETPEQf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUEQ, SETOEQ, "eq">;
+defm SETPNEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUNE, SETONE, "ne">;
+defm SETPLTf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETULT, SETOLT, "lt">;
+defm SETPLEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETULE, SETOLE, "le">;
+defm SETPGTf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUGT, SETOGT, "gt">;
+defm SETPGEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUGE, SETOGE, "ge">;
// .selp
-defm PTX_SELPu16 : PTX_SELP<RegI16, "u16">;
-defm PTX_SELPu32 : PTX_SELP<RegI32, "u32">;
-defm PTX_SELPu64 : PTX_SELP<RegI64, "u64">;
-defm PTX_SELPf32 : PTX_SELP<RegF32, "f32">;
-defm PTX_SELPf64 : PTX_SELP<RegF64, "f64">;
+defm SELPi16 : PTX_SELP<RegI16, "u16", i16imm, imm>;
+defm SELPi32 : PTX_SELP<RegI32, "u32", i32imm, imm>;
+defm SELPi64 : PTX_SELP<RegI64, "u64", i64imm, imm>;
+defm SELPf32 : PTX_SELP<RegF32, "f32", f32imm, fpimm>;
+defm SELPf64 : PTX_SELP<RegF64, "f64", f64imm, fpimm>;
///===- Logic and Shift Instructions --------------------------------------===//
-defm SHL : INT3ntnc<"shl.b", PTXshl>;
-defm SRL : INT3ntnc<"shr.u", PTXsrl>;
-defm SRA : INT3ntnc<"shr.s", PTXsra>;
+defm SHL : PTX_INT3ntnc<"shl.b", PTXshl>;
+defm SRL : PTX_INT3ntnc<"shr.u", PTXsrl>;
+defm SRA : PTX_INT3ntnc<"shr.s", PTXsra>;
defm AND : PTX_LOGIC<"and", and>;
defm OR : PTX_LOGIC<"or", or>;
@@ -780,6 +619,24 @@ defm XOR : PTX_LOGIC<"xor", xor>;
///===- Data Movement and Conversion Instructions -------------------------===//
+// any_extend
+// Implement the anyext instruction in terms of the PTX cvt instructions.
+//def : Pat<(i32 (anyext RegI16:$a)), (CVT_u32_u16 RegI16:$a)>;
+//def : Pat<(i64 (anyext RegI16:$a)), (CVT_u64_u16 RegI16:$a)>;
+//def : Pat<(i64 (anyext RegI32:$a)), (CVT_u64_u32 RegI32:$a)>;
+
+// bitconvert
+// These instructions implement the bit-wise conversion between integer and
+// floating-point types.
+def MOVi32f32
+ : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "mov.b32\t$d, $a", []>;
+def MOVf32i32
+ : InstPTX<(outs RegF32:$d), (ins RegI32:$a), "mov.b32\t$d, $a", []>;
+def MOVi64f64
+ : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "mov.b64\t$d, $a", []>;
+def MOVf64i64
+ : InstPTX<(outs RegF64:$d), (ins RegI64:$a), "mov.b64\t$d, $a", []>;
+
let neverHasSideEffects = 1 in {
def MOVPREDrr
: InstPTX<(outs RegPred:$d), (ins RegPred:$a), "mov.pred\t$d, $a", []>;
@@ -825,278 +682,332 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
[(set RegI64:$d, (PTXcopyaddress tglobaladdr:$a))]>;
}
-// Loads
-defm LDg : PTX_LD_ALL<"ld.global", load_global>;
-defm LDc : PTX_LD_ALL<"ld.const", load_constant>;
-defm LDl : PTX_LD_ALL<"ld.local", load_local>;
-defm LDs : PTX_LD_ALL<"ld.shared", load_shared>;
+// PTX cvt instructions
+// Note all of these may actually be used, we just define all possible patterns
+// here (that make sense).
+// FIXME: Can we collapse this somehow into a multiclass def?
+
+// To i16
+def CVTu16u32
+ : InstPTX<(outs RegI16:$d), (ins RegI32:$a), "cvt.u16.u32\t$d, $a", []>;
+def CVTu16u64
+ : InstPTX<(outs RegI16:$d), (ins RegI64:$a), "cvt.u16.u64\t$d, $a", []>;
+def CVTu16f32
+ : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.u16.f32\t$d, $a", []>;
+def CVTs16f32
+ : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.s16.f32\t$d, $a", []>;
+def CVTu16f64
+ : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.u16.f64\t$d, $a", []>;
+def CVTs16f64
+ : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.s16.f64\t$d, $a", []>;
+
+// To i32
+def CVTu32u16
+ : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a", []>;
+def CVTs32s16
+ : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.s32.s16\t$d, $a", []>;
+def CVTu32u64
+ : InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a", []>;
+def CVTu32f32
+ : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.u32.f32\t$d, $a", []>;
+def CVTs32f32
+ : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.s32.f32\t$d, $a", []>;
+def CVTu32f64
+ : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.u32.f64\t$d, $a", []>;
+def CVTs32f64
+ : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.s32.f64\t$d, $a", []>;
+
+// To i64
+def CVTu64u16
+ : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.u16\t$d, $a", []>;
+def CVTs64s16
+ : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.s64.s16\t$d, $a", []>;
+def CVTu64u32
+ : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a", []>;
+def CVTs64s32
+ : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.s64.s32\t$d, $a", []>;
+def CVTu64f32
+ : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.u64.f32\t$d, $a", []>;
+def CVTs64f32
+ : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.s64.f32\t$d, $a", []>;
+def CVTu64f64
+ : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.u64.f64\t$d, $a", []>;
+def CVTs64f64
+ : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.s64.f64\t$d, $a", []>;
+
+// To f32
+def CVTf32u16
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a),
+ "cvt$r.f32.u16\t$d, $a", []>;
+def CVTf32s16
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a),
+ "cvt$r.f32.s16\t$d, $a", []>;
+def CVTf32u32
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a),
+ "cvt$r.f32.u32\t$d, $a", []>;
+def CVTf32s32
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a),
+ "cvt$r.f32.s32\t$d, $a", []>;
+def CVTf32u64
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a),
+ "cvt$r.f32.u64\t$d, $a", []>;
+def CVTf32s64
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a),
+ "cvt$r.f32.s64\t$d, $a", []>;
+def CVTf32f64
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.f32.f64\t$d, $a", []>;
+
+// To f64
+def CVTf64u16
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a),
+ "cvt$r.f64.u16\t$d, $a", []>;
+def CVTf64s16
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a),
+ "cvt$r.f64.s16\t$d, $a", []>;
+def CVTf64u32
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a),
+ "cvt$r.f64.u32\t$d, $a", []>;
+def CVTf64s32
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a),
+ "cvt$r.f64.s32\t$d, $a", []>;
+def CVTf64u64
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a),
+ "cvt$r.f64.u64\t$d, $a", []>;
+def CVTf64s64
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a),
+ "cvt$r.f64.s64\t$d, $a", []>;
+def CVTf64f32
+ : InstPTX<(outs RegF64:$d), (ins RegF32:$a), "cvt.f64.f32\t$d, $a", []>;
+
+ ///===- Control Flow Instructions -----------------------------------------===//
-// These instructions are used to load/store from the .param space for
-// device and kernel parameters
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+ def BRAd
+ : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [(br bb:$d)]>;
+}
-let hasSideEffects = 1 in {
- def LDpiPred : InstPTX<(outs RegPred:$d), (ins MEMpi:$a),
- "ld.param.pred\t$d, [$a]",
- [(set RegPred:$d, (PTXloadparam timm:$a))]>;
- def LDpiU16 : InstPTX<(outs RegI16:$d), (ins MEMpi:$a),
- "ld.param.u16\t$d, [$a]",
- [(set RegI16:$d, (PTXloadparam timm:$a))]>;
- def LDpiU32 : InstPTX<(outs RegI32:$d), (ins MEMpi:$a),
- "ld.param.u32\t$d, [$a]",
- [(set RegI32:$d, (PTXloadparam timm:$a))]>;
- def LDpiU64 : InstPTX<(outs RegI64:$d), (ins MEMpi:$a),
- "ld.param.u64\t$d, [$a]",
- [(set RegI64:$d, (PTXloadparam timm:$a))]>;
- def LDpiF32 : InstPTX<(outs RegF32:$d), (ins MEMpi:$a),
- "ld.param.f32\t$d, [$a]",
- [(set RegF32:$d, (PTXloadparam timm:$a))]>;
- def LDpiF64 : InstPTX<(outs RegF64:$d), (ins MEMpi:$a),
- "ld.param.f64\t$d, [$a]",
- [(set RegF64:$d, (PTXloadparam timm:$a))]>;
-
- def STpiPred : InstPTX<(outs), (ins MEMret:$d, RegPred:$a),
- "st.param.pred\t[$d], $a",
- [(PTXstoreparam timm:$d, RegPred:$a)]>;
- def STpiU16 : InstPTX<(outs), (ins MEMret:$d, RegI16:$a),
- "st.param.u16\t[$d], $a",
- [(PTXstoreparam timm:$d, RegI16:$a)]>;
- def STpiU32 : InstPTX<(outs), (ins MEMret:$d, RegI32:$a),
- "st.param.u32\t[$d], $a",
- [(PTXstoreparam timm:$d, RegI32:$a)]>;
- def STpiU64 : InstPTX<(outs), (ins MEMret:$d, RegI64:$a),
- "st.param.u64\t[$d], $a",
- [(PTXstoreparam timm:$d, RegI64:$a)]>;
- def STpiF32 : InstPTX<(outs), (ins MEMret:$d, RegF32:$a),
- "st.param.f32\t[$d], $a",
- [(PTXstoreparam timm:$d, RegF32:$a)]>;
- def STpiF64 : InstPTX<(outs), (ins MEMret:$d, RegF64:$a),
- "st.param.f64\t[$d], $a",
- [(PTXstoreparam timm:$d, RegF64:$a)]>;
+let isBranch = 1, isTerminator = 1 in {
+ // FIXME: The pattern part is blank because I cannot (or do not yet know
+ // how to) use the first operand of PredicateOperand (a RegPred register) here
+ def BRAdp
+ : InstPTX<(outs), (ins brtarget:$d), "bra\t$d",
+ [/*(brcond pred:$_p, bb:$d)*/]>;
}
-// Stores
-defm STg : PTX_ST_ALL<"st.global", store_global>;
-defm STl : PTX_ST_ALL<"st.local", store_local>;
-defm STs : PTX_ST_ALL<"st.shared", store_shared>;
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+ def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>;
+ def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>;
+}
-// defm STp : PTX_ST_ALL<"st.param", store_parameter>;
-// defm LDp : PTX_LD_ALL<"ld.param", load_parameter>;
-// TODO: Do something with st.param if/when it is needed.
+let hasSideEffects = 1 in {
+ def CALL : InstPTX<(outs), (ins), "call", [(PTXcall)]>;
+}
-// Conversion to pred
-// PTX does not directly support converting to a predicate type, so we fake it
-// by performing a greater-than test between the value and zero. This follows
-// the C convention that any non-zero value is equivalent to 'true'.
-def CVT_pred_u16
- : InstPTX<(outs RegPred:$d), (ins RegI16:$a), "setp.gt.u16\t$d, $a, 0",
- [(set RegPred:$d, (trunc RegI16:$a))]>;
+///===- Parameter Passing Pseudo-Instructions -----------------------------===//
+
+def READPARAMPRED : InstPTX<(outs RegPred:$a), (ins i32imm:$b),
+ "mov.pred\t$a, %param$b", []>;
+def READPARAMI16 : InstPTX<(outs RegI16:$a), (ins i32imm:$b),
+ "mov.b16\t$a, %param$b", []>;
+def READPARAMI32 : InstPTX<(outs RegI32:$a), (ins i32imm:$b),
+ "mov.b32\t$a, %param$b", []>;
+def READPARAMI64 : InstPTX<(outs RegI64:$a), (ins i32imm:$b),
+ "mov.b64\t$a, %param$b", []>;
+def READPARAMF32 : InstPTX<(outs RegF32:$a), (ins i32imm:$b),
+ "mov.f32\t$a, %param$b", []>;
+def READPARAMF64 : InstPTX<(outs RegF64:$a), (ins i32imm:$b),
+ "mov.f64\t$a, %param$b", []>;
+
+def WRITEPARAMPRED : InstPTX<(outs), (ins RegPred:$a), "//w", []>;
+def WRITEPARAMI16 : InstPTX<(outs), (ins RegI16:$a), "//w", []>;
+def WRITEPARAMI32 : InstPTX<(outs), (ins RegI32:$a), "//w", []>;
+def WRITEPARAMI64 : InstPTX<(outs), (ins RegI64:$a), "//w", []>;
+def WRITEPARAMF32 : InstPTX<(outs), (ins RegF32:$a), "//w", []>;
+def WRITEPARAMF64 : InstPTX<(outs), (ins RegF64:$a), "//w", []>;
-def CVT_pred_u32
- : InstPTX<(outs RegPred:$d), (ins RegI32:$a), "setp.gt.u32\t$d, $a, 0",
- [(set RegPred:$d, (trunc RegI32:$a))]>;
-def CVT_pred_u64
- : InstPTX<(outs RegPred:$d), (ins RegI64:$a), "setp.gt.u64\t$d, $a, 0",
- [(set RegPred:$d, (trunc RegI64:$a))]>;
+//===----------------------------------------------------------------------===//
+// Instruction Selection Patterns
+//===----------------------------------------------------------------------===//
-def CVT_pred_f32
- : InstPTX<(outs RegPred:$d), (ins RegF32:$a), "setp.gt.f32\t$d, $a, 0",
- [(set RegPred:$d, (fp_to_uint RegF32:$a))]>;
+// FADD
+def : Pat<(f32 (fadd RegF32:$a, RegF32:$b)),
+ (FADDrr32 RndDefault, RegF32:$a, RegF32:$b)>;
+def : Pat<(f32 (fadd RegF32:$a, fpimm:$b)),
+ (FADDri32 RndDefault, RegF32:$a, fpimm:$b)>;
+def : Pat<(f64 (fadd RegF64:$a, RegF64:$b)),
+ (FADDrr64 RndDefault, RegF64:$a, RegF64:$b)>;
+def : Pat<(f64 (fadd RegF64:$a, fpimm:$b)),
+ (FADDri64 RndDefault, RegF64:$a, fpimm:$b)>;
+
+// FSUB
+def : Pat<(f32 (fsub RegF32:$a, RegF32:$b)),
+ (FSUBrr32 RndDefault, RegF32:$a, RegF32:$b)>;
+def : Pat<(f32 (fsub RegF32:$a, fpimm:$b)),
+ (FSUBri32 RndDefault, RegF32:$a, fpimm:$b)>;
+def : Pat<(f64 (fsub RegF64:$a, RegF64:$b)),
+ (FSUBrr64 RndDefault, RegF64:$a, RegF64:$b)>;
+def : Pat<(f64 (fsub RegF64:$a, fpimm:$b)),
+ (FSUBri64 RndDefault, RegF64:$a, fpimm:$b)>;
+
+// FMUL
+def : Pat<(f32 (fmul RegF32:$a, RegF32:$b)),
+ (FMULrr32 RndDefault, RegF32:$a, RegF32:$b)>;
+def : Pat<(f32 (fmul RegF32:$a, fpimm:$b)),
+ (FMULri32 RndDefault, RegF32:$a, fpimm:$b)>;
+def : Pat<(f64 (fmul RegF64:$a, RegF64:$b)),
+ (FMULrr64 RndDefault, RegF64:$a, RegF64:$b)>;
+def : Pat<(f64 (fmul RegF64:$a, fpimm:$b)),
+ (FMULri64 RndDefault, RegF64:$a, fpimm:$b)>;
+
+// FDIV
+def : Pat<(f32 (fdiv RegF32:$a, RegF32:$b)),
+ (FDIVrr32 RndDefault, RegF32:$a, RegF32:$b)>;
+def : Pat<(f32 (fdiv RegF32:$a, fpimm:$b)),
+ (FDIVri32 RndDefault, RegF32:$a, fpimm:$b)>;
+def : Pat<(f64 (fdiv RegF64:$a, RegF64:$b)),
+ (FDIVrr64 RndDefault, RegF64:$a, RegF64:$b)>;
+def : Pat<(f64 (fdiv RegF64:$a, fpimm:$b)),
+ (FDIVri64 RndDefault, RegF64:$a, fpimm:$b)>;
+
+// FMUL+FADD
+def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), RegF32:$c)),
+ (FMADrrr32 RndDefault, RegF32:$a, RegF32:$b, RegF32:$c)>;
+def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), fpimm:$c)),
+ (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>;
+def : Pat<(f32 (fadd (fmul RegF32:$a, fpimm:$b), fpimm:$c)),
+ (FMADrrr32 RndDefault, RegF32:$a, fpimm:$b, fpimm:$c)>;
+def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), fpimm:$c)),
+ (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>;
+def : Pat<(f64 (fadd (fmul RegF64:$a, RegF64:$b), RegF64:$c)),
+ (FMADrrr64 RndDefault, RegF64:$a, RegF64:$b, RegF64:$c)>;
+def : Pat<(f64 (fadd (fmul RegF64:$a, RegF64:$b), fpimm:$c)),
+ (FMADrri64 RndDefault, RegF64:$a, RegF64:$b, fpimm:$c)>;
+def : Pat<(f64 (fadd (fmul RegF64:$a, fpimm:$b), fpimm:$c)),
+ (FMADrri64 RndDefault, RegF64:$a, fpimm:$b, fpimm:$c)>;
+
+// FNEG
+def : Pat<(f32 (fneg RegF32:$a)), (FNEGrr32 RndDefault, RegF32:$a)>;
+def : Pat<(f32 (fneg fpimm:$a)), (FNEGri32 RndDefault, fpimm:$a)>;
+def : Pat<(f64 (fneg RegF64:$a)), (FNEGrr64 RndDefault, RegF64:$a)>;
+def : Pat<(f64 (fneg fpimm:$a)), (FNEGri64 RndDefault, fpimm:$a)>;
+
+// FSQRT
+def : Pat<(f32 (fsqrt RegF32:$a)), (FSQRTrr32 RndDefault, RegF32:$a)>;
+def : Pat<(f32 (fsqrt fpimm:$a)), (FSQRTri32 RndDefault, fpimm:$a)>;
+def : Pat<(f64 (fsqrt RegF64:$a)), (FSQRTrr64 RndDefault, RegF64:$a)>;
+def : Pat<(f64 (fsqrt fpimm:$a)), (FSQRTri64 RndDefault, fpimm:$a)>;
+
+// FSIN
+def : Pat<(f32 (fsin RegF32:$a)), (FSINrr32 RndDefault, RegF32:$a)>;
+def : Pat<(f32 (fsin fpimm:$a)), (FSINri32 RndDefault, fpimm:$a)>;
+def : Pat<(f64 (fsin RegF64:$a)), (FSINrr64 RndDefault, RegF64:$a)>;
+def : Pat<(f64 (fsin fpimm:$a)), (FSINri64 RndDefault, fpimm:$a)>;
+
+// FCOS
+def : Pat<(f32 (fcos RegF32:$a)), (FCOSrr32 RndDefault, RegF32:$a)>;
+def : Pat<(f32 (fcos fpimm:$a)), (FCOSri32 RndDefault, fpimm:$a)>;
+def : Pat<(f64 (fcos RegF64:$a)), (FCOSrr64 RndDefault, RegF64:$a)>;
+def : Pat<(f64 (fcos fpimm:$a)), (FCOSri64 RndDefault, fpimm:$a)>;
+
+// Type conversion notes:
+// - PTX does not directly support converting a predicate to a value, so we
+// use a select instruction to select either 0 or 1 (integer or fp) based
+// on the truth value of the predicate.
+// - PTX does not directly support converting to a predicate type, so we fake it
+// by performing a greater-than test between the value and zero. This follows
+// the C convention that any non-zero value is equivalent to 'true'.
-def CVT_pred_f64
- : InstPTX<(outs RegPred:$d), (ins RegF64:$a), "setp.gt.f64\t$d, $a, 0",
- [(set RegPred:$d, (fp_to_uint RegF64:$a))]>;
+// Conversion to pred
+def : Pat<(i1 (trunc RegI16:$a)), (SETPGTu16ri RegI16:$a, 0)>;
+def : Pat<(i1 (trunc RegI32:$a)), (SETPGTu32ri RegI32:$a, 0)>;
+def : Pat<(i1 (trunc RegI64:$a)), (SETPGTu64ri RegI64:$a, 0)>;
+def : Pat<(i1 (fp_to_uint RegF32:$a)), (SETPGTu32ri (MOVi32f32 RegF32:$a), 0)>;
+def : Pat<(i1 (fp_to_uint RegF64:$a)), (SETPGTu64ri (MOVi64f64 RegF64:$a), 0)>;
// Conversion to u16
-// PTX does not directly support converting a predicate to a value, so we
-// use a select instruction to select either 0 or 1 (integer or fp) based
-// on the truth value of the predicate.
-def CVT_u16_preda
- : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
- [(set RegI16:$d, (anyext RegPred:$a))]>;
-
-def CVT_u16_pred
- : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
- [(set RegI16:$d, (zext RegPred:$a))]>;
-
-def CVT_u16_preds
- : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
- [(set RegI16:$d, (sext RegPred:$a))]>;
-
-def CVT_u16_u32
- : InstPTX<(outs RegI16:$d), (ins RegI32:$a), "cvt.u16.u32\t$d, $a",
- [(set RegI16:$d, (trunc RegI32:$a))]>;
-
-def CVT_u16_u64
- : InstPTX<(outs RegI16:$d), (ins RegI64:$a), "cvt.u16.u64\t$d, $a",
- [(set RegI16:$d, (trunc RegI64:$a))]>;
-
-def CVT_u16_f32
- : InstPTX<(outs RegI16:$d), (ins RegF32:$a), "cvt.rzi.u16.f32\t$d, $a",
- [(set RegI16:$d, (fp_to_uint RegF32:$a))]>;
-
-def CVT_u16_f64
- : InstPTX<(outs RegI16:$d), (ins RegF64:$a), "cvt.rzi.u16.f64\t$d, $a",
- [(set RegI16:$d, (fp_to_uint RegF64:$a))]>;
+def : Pat<(i16 (anyext RegPred:$a)), (SELPi16ii RegPred:$a, 1, 0)>;
+def : Pat<(i16 (sext RegPred:$a)), (SELPi16ii RegPred:$a, 0xFFFF, 0)>;
+def : Pat<(i16 (zext RegPred:$a)), (SELPi16ii RegPred:$a, 1, 0)>;
+def : Pat<(i16 (trunc RegI32:$a)), (CVTu16u32 RegI32:$a)>;
+def : Pat<(i16 (trunc RegI64:$a)), (CVTu16u64 RegI64:$a)>;
+def : Pat<(i16 (fp_to_uint RegF32:$a)), (CVTu16f32 RndDefault, RegF32:$a)>;
+def : Pat<(i16 (fp_to_sint RegF32:$a)), (CVTs16f32 RndDefault, RegF32:$a)>;
+def : Pat<(i16 (fp_to_uint RegF64:$a)), (CVTu16f64 RndDefault, RegF64:$a)>;
+def : Pat<(i16 (fp_to_sint RegF64:$a)), (CVTs16f64 RndDefault, RegF64:$a)>;
// Conversion to u32
-
-def CVT_u32_pred
- : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a",
- [(set RegI32:$d, (zext RegPred:$a))]>;
-
-def CVT_u32_b16
- : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a",
- [(set RegI32:$d, (anyext RegI16:$a))]>;
-
-def CVT_u32_u16
- : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a",
- [(set RegI32:$d, (zext RegI16:$a))]>;
-
-def CVT_u32_preds
- : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a",
- [(set RegI32:$d, (sext RegPred:$a))]>;
-
-def CVT_u32_s16
- : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.s16\t$d, $a",
- [(set RegI32:$d, (sext RegI16:$a))]>;
-
-def CVT_u32_u64
- : InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a",
- [(set RegI32:$d, (trunc RegI64:$a))]>;
-
-def CVT_u32_f32
- : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "cvt.rzi.u32.f32\t$d, $a",
- [(set RegI32:$d, (fp_to_uint RegF32:$a))]>;
-
-def CVT_u32_f64
- : InstPTX<(outs RegI32:$d), (ins RegF64:$a), "cvt.rzi.u32.f64\t$d, $a",
- [(set RegI32:$d, (fp_to_uint RegF64:$a))]>;
+def : Pat<(i32 (anyext RegPred:$a)), (SELPi32ii RegPred:$a, 1, 0)>;
+def : Pat<(i32 (sext RegPred:$a)), (SELPi32ii RegPred:$a, 0xFFFFFFFF, 0)>;
+def : Pat<(i32 (zext RegPred:$a)), (SELPi32ii RegPred:$a, 1, 0)>;
+def : Pat<(i32 (anyext RegI16:$a)), (CVTu32u16 RegI16:$a)>;
+def : Pat<(i32 (sext RegI16:$a)), (CVTs32s16 RegI16:$a)>;
+def : Pat<(i32 (zext RegI16:$a)), (CVTu32u16 RegI16:$a)>;
+def : Pat<(i32 (trunc RegI64:$a)), (CVTu32u64 RegI64:$a)>;
+def : Pat<(i32 (fp_to_uint RegF32:$a)), (CVTu32f32 RndDefault, RegF32:$a)>;
+def : Pat<(i32 (fp_to_sint RegF32:$a)), (CVTs32f32 RndDefault, RegF32:$a)>;
+def : Pat<(i32 (fp_to_uint RegF64:$a)), (CVTu32f64 RndDefault, RegF64:$a)>;
+def : Pat<(i32 (fp_to_sint RegF64:$a)), (CVTs32f64 RndDefault, RegF64:$a)>;
+def : Pat<(i32 (bitconvert RegF32:$a)), (MOVi32f32 RegF32:$a)>;
// Conversion to u64
-
-def CVT_u64_pred
- : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a",
- [(set RegI64:$d, (zext RegPred:$a))]>;
-
-def CVT_u64_preds
- : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a",
- [(set RegI64:$d, (sext RegPred:$a))]>;
-
-def CVT_u64_u16
- : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.u16\t$d, $a",
- [(set RegI64:$d, (zext RegI16:$a))]>;
-
-def CVT_u64_s16
- : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.s16\t$d, $a",
- [(set RegI64:$d, (sext RegI16:$a))]>;
-
-def CVT_u64_u32
- : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a",
- [(set RegI64:$d, (zext RegI32:$a))]>;
-
-def CVT_u64_s32
- : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.s32\t$d, $a",
- [(set RegI64:$d, (sext RegI32:$a))]>;
-
-def CVT_u64_f32
- : InstPTX<(outs RegI64:$d), (ins RegF32:$a), "cvt.rzi.u64.f32\t$d, $a",
- [(set RegI64:$d, (fp_to_uint RegF32:$a))]>;
-
-def CVT_u64_f64
- : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "cvt.rzi.u64.f64\t$d, $a",
- [(set RegI64:$d, (fp_to_uint RegF64:$a))]>;
+def : Pat<(i64 (anyext RegPred:$a)), (SELPi64ii RegPred:$a, 1, 0)>;
+def : Pat<(i64 (sext RegPred:$a)), (SELPi64ii RegPred:$a,
+ 0xFFFFFFFFFFFFFFFF, 0)>;
+def : Pat<(i64 (zext RegPred:$a)), (SELPi64ii RegPred:$a, 1, 0)>;
+def : Pat<(i64 (anyext RegI16:$a)), (CVTu64u16 RegI16:$a)>;
+def : Pat<(i64 (sext RegI16:$a)), (CVTs64s16 RegI16:$a)>;
+def : Pat<(i64 (zext RegI16:$a)), (CVTu64u16 RegI16:$a)>;
+def : Pat<(i64 (anyext RegI32:$a)), (CVTu64u32 RegI32:$a)>;
+def : Pat<(i64 (sext RegI32:$a)), (CVTs64s32 RegI32:$a)>;
+def : Pat<(i64 (zext RegI32:$a)), (CVTu64u32 RegI32:$a)>;
+def : Pat<(i64 (fp_to_uint RegF32:$a)), (CVTu64f32 RndDefault, RegF32:$a)>;
+def : Pat<(i64 (fp_to_sint RegF32:$a)), (CVTs64f32 RndDefault, RegF32:$a)>;
+def : Pat<(i64 (fp_to_uint RegF64:$a)), (CVTu64f64 RndDefault, RegF64:$a)>;
+def : Pat<(i64 (fp_to_sint RegF64:$a)), (CVTs64f64 RndDefault, RegF64:$a)>;
+def : Pat<(i64 (bitconvert RegF64:$a)), (MOVi64f64 RegF64:$a)>;
// Conversion to f32
-
-def CVT_f32_pred
- : InstPTX<(outs RegF32:$d), (ins RegPred:$a),
- "selp.f32\t$d, 0F3F800000, 0F00000000, $a", // 1.0
- [(set RegF32:$d, (uint_to_fp RegPred:$a))]>;
-
-def CVT_f32_u16
- : InstPTX<(outs RegF32:$d), (ins RegI16:$a), "cvt.rn.f32.u16\t$d, $a",
- [(set RegF32:$d, (uint_to_fp RegI16:$a))]>;
-
-def CVT_f32_u32
- : InstPTX<(outs RegF32:$d), (ins RegI32:$a), "cvt.rn.f32.u32\t$d, $a",
- [(set RegF32:$d, (uint_to_fp RegI32:$a))]>;
-
-def CVT_f32_u64
- : InstPTX<(outs RegF32:$d), (ins RegI64:$a), "cvt.rn.f32.u64\t$d, $a",
- [(set RegF32:$d, (uint_to_fp RegI64:$a))]>;
-
-def CVT_f32_f64
- : InstPTX<(outs RegF32:$d), (ins RegF64:$a), "cvt.rn.f32.f64\t$d, $a",
- [(set RegF32:$d, (fround RegF64:$a))]>;
+def : Pat<(f32 (uint_to_fp RegPred:$a)), (SELPf32rr RegPred:$a,
+ (MOVf32i32 0x3F800000), (MOVf32i32 0))>;
+def : Pat<(f32 (uint_to_fp RegI16:$a)), (CVTf32u16 RndDefault, RegI16:$a)>;
+def : Pat<(f32 (sint_to_fp RegI16:$a)), (CVTf32s16 RndDefault, RegI16:$a)>;
+def : Pat<(f32 (uint_to_fp RegI32:$a)), (CVTf32u32 RndDefault, RegI32:$a)>;
+def : Pat<(f32 (sint_to_fp RegI32:$a)), (CVTf32s32 RndDefault, RegI32:$a)>;
+def : Pat<(f32 (uint_to_fp RegI64:$a)), (CVTf32u64 RndDefault, RegI64:$a)>;
+def : Pat<(f32 (sint_to_fp RegI64:$a)), (CVTf32s64 RndDefault, RegI64:$a)>;
+def : Pat<(f32 (fround RegF64:$a)), (CVTf32f64 RndDefault, RegF64:$a)>;
+def : Pat<(f32 (bitconvert RegI32:$a)), (MOVf32i32 RegI32:$a)>;
// Conversion to f64
+def : Pat<(f64 (uint_to_fp RegPred:$a)), (SELPf64rr RegPred:$a,
+ (MOVf64i64 0x3F80000000000000), (MOVf64i64 0))>;
+def : Pat<(f64 (uint_to_fp RegI16:$a)), (CVTf64u16 RndDefault, RegI16:$a)>;
+def : Pat<(f64 (sint_to_fp RegI16:$a)), (CVTf64s16 RndDefault, RegI16:$a)>;
+def : Pat<(f64 (uint_to_fp RegI32:$a)), (CVTf64u32 RndDefault, RegI32:$a)>;
+def : Pat<(f64 (sint_to_fp RegI32:$a)), (CVTf64s32 RndDefault, RegI32:$a)>;
+def : Pat<(f64 (uint_to_fp RegI64:$a)), (CVTf64u64 RndDefault, RegI64:$a)>;
+def : Pat<(f64 (sint_to_fp RegI64:$a)), (CVTf64s64 RndDefault, RegI64:$a)>;
+def : Pat<(f64 (fextend RegF32:$a)), (CVTf64f32 RegF32:$a)>;
+def : Pat<(f64 (bitconvert RegI64:$a)), (MOVf64i64 RegI64:$a)>;
-def CVT_f64_pred
- : InstPTX<(outs RegF64:$d), (ins RegPred:$a),
- "selp.f64\t$d, 0D3F80000000000000, 0D0000000000000000, $a", // 1.0
- [(set RegF64:$d, (uint_to_fp RegPred:$a))]>;
-
-def CVT_f64_u16
- : InstPTX<(outs RegF64:$d), (ins RegI16:$a), "cvt.rn.f64.u16\t$d, $a",
- [(set RegF64:$d, (uint_to_fp RegI16:$a))]>;
-
-def CVT_f64_u32
- : InstPTX<(outs RegF64:$d), (ins RegI32:$a), "cvt.rn.f64.u32\t$d, $a",
- [(set RegF64:$d, (uint_to_fp RegI32:$a))]>;
-
-def CVT_f64_u64
- : InstPTX<(outs RegF64:$d), (ins RegI64:$a), "cvt.rn.f64.u64\t$d, $a",
- [(set RegF64:$d, (uint_to_fp RegI64:$a))]>;
-
-def CVT_f64_f32
- : InstPTX<(outs RegF64:$d), (ins RegF32:$a), "cvt.f64.f32\t$d, $a",
- [(set RegF64:$d, (fextend RegF32:$a))]>;
-
-///===- Control Flow Instructions -----------------------------------------===//
-
-let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
- def BRAd
- : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [(br bb:$d)]>;
-}
-
-let isBranch = 1, isTerminator = 1 in {
- // FIXME: The pattern part is blank because I cannot (or do not yet know
- // how to) use the first operand of PredicateOperand (a RegPred register) here
- def BRAdp
- : InstPTX<(outs), (ins brtarget:$d), "bra\t$d",
- [/*(brcond pred:$_p, bb:$d)*/]>;
-}
-
-let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
- def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>;
- def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>;
-}
-
-///===- Spill Instructions ------------------------------------------------===//
-// Special instructions used for stack spilling
-def STACKSTOREI16 : InstPTX<(outs), (ins i32imm:$d, RegI16:$a),
- "mov.u16\ts$d, $a", []>;
-def STACKSTOREI32 : InstPTX<(outs), (ins i32imm:$d, RegI32:$a),
- "mov.u32\ts$d, $a", []>;
-def STACKSTOREI64 : InstPTX<(outs), (ins i32imm:$d, RegI64:$a),
- "mov.u64\ts$d, $a", []>;
-def STACKSTOREF32 : InstPTX<(outs), (ins i32imm:$d, RegF32:$a),
- "mov.f32\ts$d, $a", []>;
-def STACKSTOREF64 : InstPTX<(outs), (ins i32imm:$d, RegF64:$a),
- "mov.f64\ts$d, $a", []>;
-
-def STACKLOADI16 : InstPTX<(outs), (ins RegI16:$d, i32imm:$a),
- "mov.u16\t$d, s$a", []>;
-def STACKLOADI32 : InstPTX<(outs), (ins RegI32:$d, i32imm:$a),
- "mov.u32\t$d, s$a", []>;
-def STACKLOADI64 : InstPTX<(outs), (ins RegI64:$d, i32imm:$a),
- "mov.u64\t$d, s$a", []>;
-def STACKLOADF32 : InstPTX<(outs), (ins RegF32:$d, i32imm:$a),
- "mov.f32\t$d, s$a", []>;
-def STACKLOADF64 : InstPTX<(outs), (ins RegF64:$d, i32imm:$a),
- "mov.f64\t$d, s$a", []>;
///===- Intrinsic Instructions --------------------------------------------===//
-
include "PTXIntrinsicInstrInfo.td"
+
+///===- Load/Store Instructions -------------------------------------------===//
+include "PTXInstrLoadStore.td"
+
diff --git a/lib/Target/PTX/PTXInstrLoadStore.td b/lib/Target/PTX/PTXInstrLoadStore.td
new file mode 100644
index 0000000..9b4f56c
--- /dev/null
+++ b/lib/Target/PTX/PTXInstrLoadStore.td
@@ -0,0 +1,278 @@
+//===- PTXInstrLoadStore.td - PTX Load/Store Instruction Defs -*- tblgen-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PTX load/store instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+
+// Addressing Predicates
+// We have to differentiate between 32- and 64-bit pointer types
+def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">;
+def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">;
+
+//===----------------------------------------------------------------------===//
+// Pattern Fragments for Loads/Stores
+//===----------------------------------------------------------------------===//
+
+def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTXStateSpace::Global;
+ return false;
+}]>;
+
+def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTXStateSpace::Constant;
+ return false;
+}]>;
+
+def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTXStateSpace::Shared;
+ return false;
+}]>;
+
+def store_global
+ : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTXStateSpace::Global;
+ return false;
+}]>;
+
+def store_shared
+ : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTXStateSpace::Shared;
+ return false;
+}]>;
+
+// Addressing modes.
+def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>;
+def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>;
+def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>;
+def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>;
+def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>;
+def ADDRlocal32 : ComplexPattern<i32, 2, "SelectADDRlocal", [], []>;
+def ADDRlocal64 : ComplexPattern<i64, 2, "SelectADDRlocal", [], []>;
+
+// Address operands
+def MEMri32 : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops RegI32, i32imm);
+}
+def MEMri64 : Operand<i64> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops RegI64, i64imm);
+}
+def LOCALri32 : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops i32imm, i32imm);
+}
+def LOCALri64 : Operand<i64> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops i64imm, i64imm);
+}
+def MEMii32 : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops i32imm, i32imm);
+}
+def MEMii64 : Operand<i64> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops i64imm, i64imm);
+}
+// The operand here does not correspond to an actual address, so we
+// can use i32 in 64-bit address modes.
+def MEMpi : Operand<i32> {
+ let PrintMethod = "printParamOperand";
+ let MIOperandInfo = (ops i32imm);
+}
+def MEMret : Operand<i32> {
+ let PrintMethod = "printReturnOperand";
+ let MIOperandInfo = (ops i32imm);
+}
+
+
+// Load/store .param space
+def PTXloadparam
+ : SDNode<"PTXISD::LOAD_PARAM", SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+def PTXstoreparam
+ : SDNode<"PTXISD::STORE_PARAM", SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+
+def PTXreadparam
+ : SDNode<"PTXISD::READ_PARAM", SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+def PTXwriteparam
+ : SDNode<"PTXISD::WRITE_PARAM", SDTypeProfile<0, 1, []>,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+
+
+
+//===----------------------------------------------------------------------===//
+// Classes for loads/stores
+//===----------------------------------------------------------------------===//
+multiclass PTX_LD<string opstr, string typestr,
+ RegisterClass RC, PatFrag pat_load> {
+ def rr32 : InstPTX<(outs RC:$d),
+ (ins MEMri32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRrr32:$a))]>,
+ Requires<[Use32BitAddresses]>;
+ def rr64 : InstPTX<(outs RC:$d),
+ (ins MEMri64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRrr64:$a))]>,
+ Requires<[Use64BitAddresses]>;
+ def ri32 : InstPTX<(outs RC:$d),
+ (ins MEMri32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRri32:$a))]>,
+ Requires<[Use32BitAddresses]>;
+ def ri64 : InstPTX<(outs RC:$d),
+ (ins MEMri64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRri64:$a))]>,
+ Requires<[Use64BitAddresses]>;
+ def ii32 : InstPTX<(outs RC:$d),
+ (ins MEMii32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRii32:$a))]>,
+ Requires<[Use32BitAddresses]>;
+ def ii64 : InstPTX<(outs RC:$d),
+ (ins MEMii64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRii64:$a))]>,
+ Requires<[Use64BitAddresses]>;
+}
+
+multiclass PTX_ST<string opstr, string typestr, RegisterClass RC,
+ PatFrag pat_store> {
+ def rr32 : InstPTX<(outs),
+ (ins RC:$d, MEMri32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRrr32:$a)]>,
+ Requires<[Use32BitAddresses]>;
+ def rr64 : InstPTX<(outs),
+ (ins RC:$d, MEMri64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRrr64:$a)]>,
+ Requires<[Use64BitAddresses]>;
+ def ri32 : InstPTX<(outs),
+ (ins RC:$d, MEMri32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRri32:$a)]>,
+ Requires<[Use32BitAddresses]>;
+ def ri64 : InstPTX<(outs),
+ (ins RC:$d, MEMri64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRri64:$a)]>,
+ Requires<[Use64BitAddresses]>;
+ def ii32 : InstPTX<(outs),
+ (ins RC:$d, MEMii32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRii32:$a)]>,
+ Requires<[Use32BitAddresses]>;
+ def ii64 : InstPTX<(outs),
+ (ins RC:$d, MEMii64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRii64:$a)]>,
+ Requires<[Use64BitAddresses]>;
+}
+
+multiclass PTX_LOCAL_LD_ST<string typestr, RegisterClass RC> {
+ def LDri32 : InstPTX<(outs RC:$d), (ins LOCALri32:$a),
+ !strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (load_global ADDRlocal32:$a))]>;
+ def LDri64 : InstPTX<(outs RC:$d), (ins LOCALri64:$a),
+ !strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (load_global ADDRlocal64:$a))]>;
+ def STri32 : InstPTX<(outs), (ins RC:$d, LOCALri32:$a),
+ !strconcat("st.local", !strconcat(typestr, "\t[$a], $d")),
+ [(store_global RC:$d, ADDRlocal32:$a)]>;
+ def STri64 : InstPTX<(outs), (ins RC:$d, LOCALri64:$a),
+ !strconcat("st.local", !strconcat(typestr, "\t[$a], $d")),
+ [(store_global RC:$d, ADDRlocal64:$a)]>;
+}
+
+multiclass PTX_PARAM_LD_ST<string typestr, RegisterClass RC> {
+ let hasSideEffects = 1 in {
+ def LDpi : InstPTX<(outs RC:$d), (ins i32imm:$a),
+ !strconcat("ld.param", !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (PTXloadparam texternalsym:$a))]>;
+ def STpi : InstPTX<(outs), (ins i32imm:$d, RC:$a),
+ !strconcat("st.param", !strconcat(typestr, "\t[$d], $a")),
+ [(PTXstoreparam texternalsym:$d, RC:$a)]>;
+ }
+}
+
+multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> {
+ defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>;
+ defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>;
+ defm u64 : PTX_LD<opstr, ".u64", RegI64, pat_load>;
+ defm f32 : PTX_LD<opstr, ".f32", RegF32, pat_load>;
+ defm f64 : PTX_LD<opstr, ".f64", RegF64, pat_load>;
+}
+
+multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
+ defm u16 : PTX_ST<opstr, ".u16", RegI16, pat_store>;
+ defm u32 : PTX_ST<opstr, ".u32", RegI32, pat_store>;
+ defm u64 : PTX_ST<opstr, ".u64", RegI64, pat_store>;
+ defm f32 : PTX_ST<opstr, ".f32", RegF32, pat_store>;
+ defm f64 : PTX_ST<opstr, ".f64", RegF64, pat_store>;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Instruction definitions for loads/stores
+//===----------------------------------------------------------------------===//
+
+// Global/shared stores
+defm STg : PTX_ST_ALL<"st.global", store_global>;
+defm STs : PTX_ST_ALL<"st.shared", store_shared>;
+
+// Global/shared/constant loads
+defm LDg : PTX_LD_ALL<"ld.global", load_global>;
+defm LDc : PTX_LD_ALL<"ld.const", load_constant>;
+defm LDs : PTX_LD_ALL<"ld.shared", load_shared>;
+
+// Param loads/stores
+defm PARAMPRED : PTX_PARAM_LD_ST<".pred", RegPred>;
+defm PARAMU16 : PTX_PARAM_LD_ST<".u16", RegI16>;
+defm PARAMU32 : PTX_PARAM_LD_ST<".u32", RegI32>;
+defm PARAMU64 : PTX_PARAM_LD_ST<".u64", RegI64>;
+defm PARAMF32 : PTX_PARAM_LD_ST<".f32", RegF32>;
+defm PARAMF64 : PTX_PARAM_LD_ST<".f64", RegF64>;
+
+// Local loads/stores
+defm LOCALPRED : PTX_LOCAL_LD_ST<".pred", RegPred>;
+defm LOCALU16 : PTX_LOCAL_LD_ST<".u16", RegI16>;
+defm LOCALU32 : PTX_LOCAL_LD_ST<".u32", RegI32>;
+defm LOCALU64 : PTX_LOCAL_LD_ST<".u64", RegI64>;
+defm LOCALF32 : PTX_LOCAL_LD_ST<".f32", RegF32>;
+defm LOCALF64 : PTX_LOCAL_LD_ST<".f64", RegF64>;
+
diff --git a/lib/Target/PTX/PTXIntrinsicInstrInfo.td b/lib/Target/PTX/PTXIntrinsicInstrInfo.td
index 8d97909..9de1cb6 100644
--- a/lib/Target/PTX/PTXIntrinsicInstrInfo.td
+++ b/lib/Target/PTX/PTXIntrinsicInstrInfo.td
@@ -25,37 +25,63 @@ class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop>
// TODO Add read vector-version of special registers
-//def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid", int_ptx_read_tid_r64>;
-def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x", int_ptx_read_tid_x>;
-def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y", int_ptx_read_tid_y>;
-def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z", int_ptx_read_tid_z>;
-def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w", int_ptx_read_tid_w>;
+//def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid",
+// int_ptx_read_tid_r64>;
+def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x",
+ int_ptx_read_tid_x>;
+def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y",
+ int_ptx_read_tid_y>;
+def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z",
+ int_ptx_read_tid_z>;
+def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w",
+ int_ptx_read_tid_w>;
-//def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid", int_ptx_read_ntid_r64>;
-def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x", int_ptx_read_ntid_x>;
-def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y", int_ptx_read_ntid_y>;
-def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z", int_ptx_read_ntid_z>;
-def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w", int_ptx_read_ntid_w>;
+//def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid",
+// int_ptx_read_ntid_r64>;
+def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x",
+ int_ptx_read_ntid_x>;
+def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y",
+ int_ptx_read_ntid_y>;
+def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z",
+ int_ptx_read_ntid_z>;
+def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w",
+ int_ptx_read_ntid_w>;
-def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid", int_ptx_read_laneid>;
-def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid", int_ptx_read_warpid>;
-def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid", int_ptx_read_nwarpid>;
+def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid",
+ int_ptx_read_laneid>;
+def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid",
+ int_ptx_read_warpid>;
+def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid",
+ int_ptx_read_nwarpid>;
-//def PTX_READ_CTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>;
-def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x", int_ptx_read_ctaid_x>;
-def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y", int_ptx_read_ctaid_y>;
-def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z", int_ptx_read_ctaid_z>;
-def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w", int_ptx_read_ctaid_w>;
+//def PTX_READ_CTAID_R64 :
+//PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>;
+def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x",
+ int_ptx_read_ctaid_x>;
+def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y",
+ int_ptx_read_ctaid_y>;
+def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z",
+ int_ptx_read_ctaid_z>;
+def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w",
+ int_ptx_read_ctaid_w>;
-//def PTX_READ_NCTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>;
-def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x", int_ptx_read_nctaid_x>;
-def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y", int_ptx_read_nctaid_y>;
-def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z", int_ptx_read_nctaid_z>;
-def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w", int_ptx_read_nctaid_w>;
+//def PTX_READ_NCTAID_R64 :
+//PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>;
+def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x",
+ int_ptx_read_nctaid_x>;
+def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y",
+ int_ptx_read_nctaid_y>;
+def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z",
+ int_ptx_read_nctaid_z>;
+def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w",
+ int_ptx_read_nctaid_w>;
-def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid", int_ptx_read_smid>;
-def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid", int_ptx_read_nsmid>;
-def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid", int_ptx_read_gridid>;
+def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid",
+ int_ptx_read_smid>;
+def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid",
+ int_ptx_read_nsmid>;
+def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid",
+ int_ptx_read_gridid>;
def PTX_READ_LANEMASK_EQ
: PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>;
diff --git a/lib/Target/PTX/PTXMCAsmStreamer.cpp b/lib/Target/PTX/PTXMCAsmStreamer.cpp
index b13a3da..468ce93 100644
--- a/lib/Target/PTX/PTXMCAsmStreamer.cpp
+++ b/lib/Target/PTX/PTXMCAsmStreamer.cpp
@@ -100,7 +100,7 @@ public:
/// @{
virtual void ChangeSection(const MCSection *Section);
- virtual void InitSections() {}
+ virtual void InitSections() { /* PTX does not use sections */ }
virtual void EmitLabel(MCSymbol *Symbol);
@@ -132,7 +132,9 @@ public:
///
/// @param Symbol - The common symbol to emit.
/// @param Size - The size of the common symbol.
- virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
+ /// @param ByteAlignment - The alignment of the common symbol in bytes.
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment);
virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
unsigned Size = 0, unsigned ByteAlignment = 0);
@@ -233,7 +235,7 @@ void PTXMCAsmStreamer::ChangeSection(const MCSection *Section) {
void PTXMCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
- //assert(getCurrentSection() && "Cannot emit before setting section!");
+ assert(getCurrentSection() && "Cannot emit before setting section!");
OS << *Symbol << MAI.getLabelSuffix();
EmitEOL();
@@ -283,7 +285,8 @@ void PTXMCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
void PTXMCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) {}
-void PTXMCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {}
+void PTXMCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {}
void PTXMCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
unsigned Size, unsigned ByteAlignment) {}
@@ -510,7 +513,7 @@ void PTXMCAsmStreamer::EmitInstruction(const MCInst &Inst) {
// If we have an AsmPrinter, use that to print, otherwise print the MCInst.
if (InstPrinter)
- InstPrinter->printInst(&Inst, OS);
+ InstPrinter->printInst(&Inst, OS, "");
else
Inst.print(OS, &MAI);
EmitEOL();
@@ -533,7 +536,7 @@ namespace llvm {
formatted_raw_ostream &OS,
bool isVerboseAsm, bool useLoc, bool useCFI,
MCInstPrinter *IP,
- MCCodeEmitter *CE, TargetAsmBackend *TAB,
+ MCCodeEmitter *CE, MCAsmBackend *MAB,
bool ShowInst) {
return new PTXMCAsmStreamer(Context, OS, isVerboseAsm, useLoc,
IP, CE, ShowInst);
diff --git a/lib/Target/PTX/PTXMCInstLower.cpp b/lib/Target/PTX/PTXMCInstLower.cpp
new file mode 100644
index 0000000..142e639
--- /dev/null
+++ b/lib/Target/PTX/PTXMCInstLower.cpp
@@ -0,0 +1,32 @@
+//===-- PTXMCInstLower.cpp - Convert PTX MachineInstr to an MCInst --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower PTX MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXAsmPrinter.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+
+void llvm::LowerPTXMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ PTXAsmPrinter &AP) {
+ OutMI.setOpcode(MI->getOpcode());
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ MCOperand MCOp;
+ OutMI.addOperand(AP.lowerOperand(MO));
+ }
+}
+
diff --git a/lib/Target/PTX/PTXMFInfoExtract.cpp b/lib/Target/PTX/PTXMFInfoExtract.cpp
index 6fe9e6c..b33a273 100644
--- a/lib/Target/PTX/PTXMFInfoExtract.cpp
+++ b/lib/Target/PTX/PTXMFInfoExtract.cpp
@@ -52,36 +52,12 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) {
PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
MachineRegisterInfo &MRI = MF.getRegInfo();
- DEBUG(dbgs() << "******** PTX FUNCTION LOCAL VAR REG DEF ********\n");
-
- DEBUG(dbgs()
- << "PTX::NoRegister == " << PTX::NoRegister << "\n"
- << "PTX::NUM_TARGET_REGS == " << PTX::NUM_TARGET_REGS << "\n");
-
- DEBUG(for (unsigned reg = PTX::NoRegister + 1;
- reg < PTX::NUM_TARGET_REGS; ++reg)
- if (MRI.isPhysRegUsed(reg))
- dbgs() << "Used Reg: " << reg << "\n";);
-
- // FIXME: This is a slow linear scanning
- for (unsigned reg = PTX::NoRegister + 1; reg < PTX::NUM_TARGET_REGS; ++reg)
- if (MRI.isPhysRegUsed(reg) &&
- !MFI->isRetReg(reg) &&
- (MFI->isKernel() || !MFI->isArgReg(reg)))
- MFI->addLocalVarReg(reg);
-
- // Notify MachineFunctionInfo that I've done adding local var reg
- MFI->doneAddLocalVar();
-
- DEBUG(for (PTXMachineFunctionInfo::reg_iterator
- i = MFI->argRegBegin(), e = MFI->argRegEnd();
- i != e; ++i)
- dbgs() << "Arg Reg: " << *i << "\n";);
-
- DEBUG(for (PTXMachineFunctionInfo::reg_iterator
- i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd();
- i != e; ++i)
- dbgs() << "Local Var Reg: " << *i << "\n";);
+ // Generate list of all virtual registers used in this function
+ for (unsigned i = 0; i < MRI.getNumVirtRegs(); ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
+ MFI->addVirtualRegister(TRC, Reg);
+ }
return false;
}
diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.h b/lib/Target/PTX/PTXMachineFunctionInfo.h
index 9d65f5b..3b985f7 100644
--- a/lib/Target/PTX/PTXMachineFunctionInfo.h
+++ b/lib/Target/PTX/PTXMachineFunctionInfo.h
@@ -15,75 +15,148 @@
#define PTX_MACHINE_FUNCTION_INFO_H
#include "PTX.h"
+#include "PTXParamManager.h"
+#include "PTXRegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
namespace llvm {
+
/// PTXMachineFunctionInfo - This class is derived from MachineFunction and
/// contains private PTX target-specific information for each MachineFunction.
///
class PTXMachineFunctionInfo : public MachineFunctionInfo {
private:
- bool is_kernel;
- std::vector<unsigned> reg_arg, reg_local_var;
- std::vector<unsigned> reg_ret;
- bool _isDoneAddArg;
+ bool IsKernel;
+ DenseSet<unsigned> RegArgs;
+ DenseSet<unsigned> RegRets;
+
+ typedef std::vector<unsigned> RegisterList;
+ typedef DenseMap<const TargetRegisterClass*, RegisterList> RegisterMap;
+ typedef DenseMap<unsigned, std::string> RegisterNameMap;
+ typedef DenseMap<int, std::string> FrameMap;
+
+ RegisterMap UsedRegs;
+ RegisterNameMap RegNames;
+ FrameMap FrameSymbols;
+
+ PTXParamManager ParamManager;
public:
+ typedef DenseSet<unsigned>::const_iterator reg_iterator;
+
PTXMachineFunctionInfo(MachineFunction &MF)
- : is_kernel(false), reg_ret(PTX::NoRegister), _isDoneAddArg(false) {
- reg_arg.reserve(8);
- reg_local_var.reserve(32);
+ : IsKernel(false) {
+ UsedRegs[PTX::RegPredRegisterClass] = RegisterList();
+ UsedRegs[PTX::RegI16RegisterClass] = RegisterList();
+ UsedRegs[PTX::RegI32RegisterClass] = RegisterList();
+ UsedRegs[PTX::RegI64RegisterClass] = RegisterList();
+ UsedRegs[PTX::RegF32RegisterClass] = RegisterList();
+ UsedRegs[PTX::RegF64RegisterClass] = RegisterList();
}
- void setKernel(bool _is_kernel=true) { is_kernel = _is_kernel; }
-
- void addArgReg(unsigned reg) { reg_arg.push_back(reg); }
- void addLocalVarReg(unsigned reg) { reg_local_var.push_back(reg); }
- void addRetReg(unsigned reg) {
- if (!isRetReg(reg)) {
- reg_ret.push_back(reg);
+ /// getParamManager - Returns the PTXParamManager instance for this function.
+ PTXParamManager& getParamManager() { return ParamManager; }
+ const PTXParamManager& getParamManager() const { return ParamManager; }
+
+ /// setKernel/isKernel - Gets/sets a flag that indicates if this function is
+ /// a PTX kernel function.
+ void setKernel(bool _IsKernel=true) { IsKernel = _IsKernel; }
+ bool isKernel() const { return IsKernel; }
+
+ /// argreg_begin/argreg_end - Returns iterators to the set of registers
+ /// containing function arguments.
+ reg_iterator argreg_begin() const { return RegArgs.begin(); }
+ reg_iterator argreg_end() const { return RegArgs.end(); }
+
+ /// retreg_begin/retreg_end - Returns iterators to the set of registers
+ /// containing the function return values.
+ reg_iterator retreg_begin() const { return RegRets.begin(); }
+ reg_iterator retreg_end() const { return RegRets.end(); }
+
+ /// addRetReg - Adds a register to the set of return-value registers.
+ void addRetReg(unsigned Reg) {
+ if (!RegRets.count(Reg)) {
+ RegRets.insert(Reg);
+ std::string name;
+ name = "%ret";
+ name += utostr(RegRets.size() - 1);
+ RegNames[Reg] = name;
}
}
- void doneAddArg(void) {
- _isDoneAddArg = true;
+ /// addArgReg - Adds a register to the set of function argument registers.
+ void addArgReg(unsigned Reg) {
+ RegArgs.insert(Reg);
+ std::string name;
+ name = "%param";
+ name += utostr(RegArgs.size() - 1);
+ RegNames[Reg] = name;
}
- void doneAddLocalVar(void) {}
-
- bool isKernel() const { return is_kernel; }
- typedef std::vector<unsigned>::const_iterator reg_iterator;
- typedef std::vector<unsigned>::const_reverse_iterator reg_reverse_iterator;
- typedef std::vector<unsigned>::const_iterator ret_iterator;
-
- bool argRegEmpty() const { return reg_arg.empty(); }
- int getNumArg() const { return reg_arg.size(); }
- reg_iterator argRegBegin() const { return reg_arg.begin(); }
- reg_iterator argRegEnd() const { return reg_arg.end(); }
- reg_reverse_iterator argRegReverseBegin() const { return reg_arg.rbegin(); }
- reg_reverse_iterator argRegReverseEnd() const { return reg_arg.rend(); }
-
- bool localVarRegEmpty() const { return reg_local_var.empty(); }
- reg_iterator localVarRegBegin() const { return reg_local_var.begin(); }
- reg_iterator localVarRegEnd() const { return reg_local_var.end(); }
-
- bool retRegEmpty() const { return reg_ret.empty(); }
- int getNumRet() const { return reg_ret.size(); }
- ret_iterator retRegBegin() const { return reg_ret.begin(); }
- ret_iterator retRegEnd() const { return reg_ret.end(); }
+ /// addVirtualRegister - Adds a virtual register to the set of all used
+ /// registers in the function.
+ void addVirtualRegister(const TargetRegisterClass *TRC, unsigned Reg) {
+ std::string name;
+
+ // Do not count registers that are argument/return registers.
+ if (!RegRets.count(Reg) && !RegArgs.count(Reg)) {
+ UsedRegs[TRC].push_back(Reg);
+ if (TRC == PTX::RegPredRegisterClass)
+ name = "%p";
+ else if (TRC == PTX::RegI16RegisterClass)
+ name = "%rh";
+ else if (TRC == PTX::RegI32RegisterClass)
+ name = "%r";
+ else if (TRC == PTX::RegI64RegisterClass)
+ name = "%rd";
+ else if (TRC == PTX::RegF32RegisterClass)
+ name = "%f";
+ else if (TRC == PTX::RegF64RegisterClass)
+ name = "%fd";
+ else
+ llvm_unreachable("Invalid register class");
+
+ name += utostr(UsedRegs[TRC].size() - 1);
+ RegNames[Reg] = name;
+ }
+ }
- bool isArgReg(unsigned reg) const {
- return std::find(reg_arg.begin(), reg_arg.end(), reg) != reg_arg.end();
+ /// getRegisterName - Returns the name of the specified virtual register. This
+ /// name is used during PTX emission.
+ const char *getRegisterName(unsigned Reg) const {
+ if (RegNames.count(Reg))
+ return RegNames.find(Reg)->second.c_str();
+ else if (Reg == PTX::NoRegister)
+ return "%noreg";
+ else
+ llvm_unreachable("Register not in register name map");
}
- bool isRetReg(unsigned reg) const {
- return std::find(reg_ret.begin(), reg_ret.end(), reg) != reg_ret.end();
+ /// getNumRegistersForClass - Returns the number of virtual registers that are
+ /// used for the specified register class.
+ unsigned getNumRegistersForClass(const TargetRegisterClass *TRC) const {
+ return UsedRegs.lookup(TRC).size();
}
- bool isLocalVarReg(unsigned reg) const {
- return std::find(reg_local_var.begin(), reg_local_var.end(), reg)
- != reg_local_var.end();
+ /// getFrameSymbol - Returns the symbol name for the given FrameIndex.
+ const char* getFrameSymbol(int FrameIndex) {
+ if (FrameSymbols.count(FrameIndex)) {
+ return FrameSymbols.lookup(FrameIndex).c_str();
+ } else {
+ std::string Name = "__local";
+ Name += utostr(FrameIndex);
+ // The whole point of caching this name is to ensure the pointer we pass
+ // to any getExternalSymbol() calls will remain valid for the lifetime of
+ // the back-end instance. This is to work around an issue in SelectionDAG
+ // where symbol names are expected to be life-long strings.
+ FrameSymbols[FrameIndex] = Name;
+ return FrameSymbols[FrameIndex].c_str();
+ }
}
}; // class PTXMachineFunctionInfo
} // namespace llvm
diff --git a/lib/Target/PTX/PTXParamManager.cpp b/lib/Target/PTX/PTXParamManager.cpp
new file mode 100644
index 0000000..7753787
--- /dev/null
+++ b/lib/Target/PTX/PTXParamManager.cpp
@@ -0,0 +1,73 @@
+//===- PTXParamManager.cpp - Manager for .param variables -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PTXParamManager class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXParamManager.h"
+#include "llvm/ADT/StringExtras.h"
+
+using namespace llvm;
+
+PTXParamManager::PTXParamManager() {
+}
+
+unsigned PTXParamManager::addArgumentParam(unsigned Size) {
+ PTXParam Param;
+ Param.Type = PTX_PARAM_TYPE_ARGUMENT;
+ Param.Size = Size;
+
+ std::string Name;
+ Name = "__param_";
+ Name += utostr(ArgumentParams.size()+1);
+ Param.Name = Name;
+
+ unsigned Index = AllParams.size();
+ AllParams[Index] = Param;
+ ArgumentParams.push_back(Index);
+
+ return Index;
+}
+
+unsigned PTXParamManager::addReturnParam(unsigned Size) {
+ PTXParam Param;
+ Param.Type = PTX_PARAM_TYPE_RETURN;
+ Param.Size = Size;
+
+ std::string Name;
+ Name = "__ret_";
+ Name += utostr(ReturnParams.size()+1);
+ Param.Name = Name;
+
+ unsigned Index = AllParams.size();
+ AllParams[Index] = Param;
+ ReturnParams.push_back(Index);
+
+ return Index;
+}
+
+unsigned PTXParamManager::addLocalParam(unsigned Size) {
+ PTXParam Param;
+ Param.Type = PTX_PARAM_TYPE_LOCAL;
+ Param.Size = Size;
+
+ std::string Name;
+ Name = "__localparam_";
+ Name += utostr(LocalParams.size()+1);
+ Param.Name = Name;
+
+ unsigned Index = AllParams.size();
+ AllParams[Index] = Param;
+ LocalParams.push_back(Index);
+
+ return Index;
+}
+
diff --git a/lib/Target/PTX/PTXParamManager.h b/lib/Target/PTX/PTXParamManager.h
new file mode 100644
index 0000000..9fd2de5
--- /dev/null
+++ b/lib/Target/PTX/PTXParamManager.h
@@ -0,0 +1,86 @@
+//===- PTXParamManager.h - Manager for .param variables ----------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PTXParamManager class, which manages all defined .param
+// variables for a particular function.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_PARAM_MANAGER_H
+#define PTX_PARAM_MANAGER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+
+/// PTXParamManager - This class manages all .param variables defined for a
+/// particular function.
+class PTXParamManager {
+private:
+
+ /// PTXParamType - Type of a .param variable
+ enum PTXParamType {
+ PTX_PARAM_TYPE_ARGUMENT,
+ PTX_PARAM_TYPE_RETURN,
+ PTX_PARAM_TYPE_LOCAL
+ };
+
+ /// PTXParam - Definition of a PTX .param variable
+ struct PTXParam {
+ PTXParamType Type;
+ unsigned Size;
+ std::string Name;
+ };
+
+ DenseMap<unsigned, PTXParam> AllParams;
+ SmallVector<unsigned, 4> ArgumentParams;
+ SmallVector<unsigned, 4> ReturnParams;
+ SmallVector<unsigned, 4> LocalParams;
+
+public:
+
+ typedef SmallVector<unsigned, 4>::const_iterator param_iterator;
+
+ PTXParamManager();
+
+ param_iterator arg_begin() const { return ArgumentParams.begin(); }
+ param_iterator arg_end() const { return ArgumentParams.end(); }
+ param_iterator ret_begin() const { return ReturnParams.begin(); }
+ param_iterator ret_end() const { return ReturnParams.end(); }
+ param_iterator local_begin() const { return LocalParams.begin(); }
+ param_iterator local_end() const { return LocalParams.end(); }
+
+ /// addArgumentParam - Returns a new .param used as an argument.
+ unsigned addArgumentParam(unsigned Size);
+
+ /// addReturnParam - Returns a new .param used as a return argument.
+ unsigned addReturnParam(unsigned Size);
+
+ /// addLocalParam - Returns a new .param used as a local .param variable.
+ unsigned addLocalParam(unsigned Size);
+
+ /// getParamName - Returns the name of the parameter as a string.
+ const std::string &getParamName(unsigned Param) const {
+ assert(AllParams.count(Param) == 1 && "Param has not been defined!");
+ return AllParams.find(Param)->second.Name;
+ }
+
+ /// getParamSize - Returns the size of the parameter in bits.
+ unsigned getParamSize(unsigned Param) const {
+ assert(AllParams.count(Param) == 1 && "Param has not been defined!");
+ return AllParams.find(Param)->second.Size;
+ }
+
+};
+
+}
+
+#endif
+
diff --git a/lib/Target/PTX/PTXRegAlloc.cpp b/lib/Target/PTX/PTXRegAlloc.cpp
new file mode 100644
index 0000000..2d2d5c3
--- /dev/null
+++ b/lib/Target/PTX/PTXRegAlloc.cpp
@@ -0,0 +1,58 @@
+//===-- PTXRegAlloc.cpp - PTX Register Allocator --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a register allocator for PTX code.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-reg-alloc"
+
+#include "PTX.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+
+using namespace llvm;
+
+namespace {
+ // Special register allocator for PTX.
+ class PTXRegAlloc : public MachineFunctionPass {
+ public:
+ static char ID;
+ PTXRegAlloc() : MachineFunctionPass(ID) {
+ initializePHIEliminationPass(*PassRegistry::getPassRegistry());
+ initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual const char* getPassName() const {
+ return "PTX Register Allocator";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequiredID(PHIEliminationID);
+ AU.addRequiredID(TwoAddressInstructionPassID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ // We do not actually do anything (at least not yet).
+ return false;
+ }
+ };
+
+ char PTXRegAlloc::ID = 0;
+
+ static RegisterRegAlloc
+ ptxRegAlloc("ptx", "PTX register allocator", createPTXRegisterAllocator);
+}
+
+FunctionPass *llvm::createPTXRegisterAllocator() {
+ return new PTXRegAlloc();
+}
+
diff --git a/lib/Target/PTX/PTXRegisterInfo.cpp b/lib/Target/PTX/PTXRegisterInfo.cpp
index cb56ea9..c806266 100644
--- a/lib/Target/PTX/PTXRegisterInfo.cpp
+++ b/lib/Target/PTX/PTXRegisterInfo.cpp
@@ -14,6 +14,9 @@
#include "PTX.h"
#include "PTXRegisterInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -23,15 +26,23 @@
using namespace llvm;
PTXRegisterInfo::PTXRegisterInfo(PTXTargetMachine &TM,
- const TargetInstrInfo &TII)
- : PTXGenRegisterInfo() {
+ const TargetInstrInfo &tii)
+ // PTX does not have a return address register.
+ : PTXGenRegisterInfo(0), TII(tii) {
}
void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj,
RegScavenger *RS) const {
unsigned Index;
- MachineInstr& MI = *II;
+ MachineInstr &MI = *II;
+ //MachineBasicBlock &MBB = *MI.getParent();
+ //DebugLoc dl = MI.getDebugLoc();
+ //MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+
+ //unsigned Reg = MRI.createVirtualRegister(PTX::RegF32RegisterClass);
+
+ llvm_unreachable("FrameIndex should have been previously eliminated!");
Index = 0;
while (!MI.getOperand(Index).isFI()) {
@@ -46,6 +57,18 @@ void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
DEBUG(dbgs() << "- SPAdj: " << SPAdj << "\n");
DEBUG(dbgs() << "- FrameIndex: " << FrameIndex << "\n");
+ //MachineInstr* MI2 = BuildMI(MBB, II, dl, TII.get(PTX::LOAD_LOCAL_F32))
+ //.addReg(Reg, RegState::Define).addImm(FrameIndex);
+ //if (MI2->findFirstPredOperandIdx() == -1) {
+ // MI2->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false));
+ // MI2->addOperand(MachineOperand::CreateImm(PTX::PRED_NORMAL));
+ //}
+ //MI2->dump();
+
+ //MachineOperand ESOp = MachineOperand::CreateES("__local__");
+
// This frame index is post stack slot re-use assignments
+ //MI.getOperand(Index).ChangeToRegister(Reg, false);
MI.getOperand(Index).ChangeToImmediate(FrameIndex);
+ //MI.getOperand(Index) = ESOp;
}
diff --git a/lib/Target/PTX/PTXRegisterInfo.h b/lib/Target/PTX/PTXRegisterInfo.h
index 0b63cb6..55fafe4 100644
--- a/lib/Target/PTX/PTXRegisterInfo.h
+++ b/lib/Target/PTX/PTXRegisterInfo.h
@@ -25,8 +25,12 @@ class PTXTargetMachine;
class MachineFunction;
struct PTXRegisterInfo : public PTXGenRegisterInfo {
+private:
+ const TargetInstrInfo &TII;
+
+public:
PTXRegisterInfo(PTXTargetMachine &TM,
- const TargetInstrInfo &TII);
+ const TargetInstrInfo &tii);
virtual const unsigned
*getCalleeSavedRegs(const MachineFunction *MF = 0) const {
@@ -47,18 +51,6 @@ struct PTXRegisterInfo : public PTXGenRegisterInfo {
llvm_unreachable("PTX does not have a frame register");
return 0;
}
-
- virtual unsigned getRARegister() const {
- llvm_unreachable("PTX does not have a return address register");
- return 0;
- }
-
- virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const {
- return PTXGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
- }
- virtual int getLLVMRegNum(unsigned RegNum, bool isEH) const {
- return PTXGenRegisterInfo::getLLVMRegNumFull(RegNum, 0);
- }
}; // struct PTXRegisterInfo
} // namespace llvm
diff --git a/lib/Target/PTX/PTXRegisterInfo.td b/lib/Target/PTX/PTXRegisterInfo.td
index 1313d24..6ed6d3f 100644
--- a/lib/Target/PTX/PTXRegisterInfo.td
+++ b/lib/Target/PTX/PTXRegisterInfo.td
@@ -20,536 +20,18 @@ class PTXReg<string n> : Register<n> {
// Registers
//===----------------------------------------------------------------------===//
-///===- Predicate Registers -----------------------------------------------===//
-
-def P0 : PTXReg<"p0">;
-def P1 : PTXReg<"p1">;
-def P2 : PTXReg<"p2">;
-def P3 : PTXReg<"p3">;
-def P4 : PTXReg<"p4">;
-def P5 : PTXReg<"p5">;
-def P6 : PTXReg<"p6">;
-def P7 : PTXReg<"p7">;
-def P8 : PTXReg<"p8">;
-def P9 : PTXReg<"p9">;
-def P10 : PTXReg<"p10">;
-def P11 : PTXReg<"p11">;
-def P12 : PTXReg<"p12">;
-def P13 : PTXReg<"p13">;
-def P14 : PTXReg<"p14">;
-def P15 : PTXReg<"p15">;
-def P16 : PTXReg<"p16">;
-def P17 : PTXReg<"p17">;
-def P18 : PTXReg<"p18">;
-def P19 : PTXReg<"p19">;
-def P20 : PTXReg<"p20">;
-def P21 : PTXReg<"p21">;
-def P22 : PTXReg<"p22">;
-def P23 : PTXReg<"p23">;
-def P24 : PTXReg<"p24">;
-def P25 : PTXReg<"p25">;
-def P26 : PTXReg<"p26">;
-def P27 : PTXReg<"p27">;
-def P28 : PTXReg<"p28">;
-def P29 : PTXReg<"p29">;
-def P30 : PTXReg<"p30">;
-def P31 : PTXReg<"p31">;
-def P32 : PTXReg<"p32">;
-def P33 : PTXReg<"p33">;
-def P34 : PTXReg<"p34">;
-def P35 : PTXReg<"p35">;
-def P36 : PTXReg<"p36">;
-def P37 : PTXReg<"p37">;
-def P38 : PTXReg<"p38">;
-def P39 : PTXReg<"p39">;
-def P40 : PTXReg<"p40">;
-def P41 : PTXReg<"p41">;
-def P42 : PTXReg<"p42">;
-def P43 : PTXReg<"p43">;
-def P44 : PTXReg<"p44">;
-def P45 : PTXReg<"p45">;
-def P46 : PTXReg<"p46">;
-def P47 : PTXReg<"p47">;
-def P48 : PTXReg<"p48">;
-def P49 : PTXReg<"p49">;
-def P50 : PTXReg<"p50">;
-def P51 : PTXReg<"p51">;
-def P52 : PTXReg<"p52">;
-def P53 : PTXReg<"p53">;
-def P54 : PTXReg<"p54">;
-def P55 : PTXReg<"p55">;
-def P56 : PTXReg<"p56">;
-def P57 : PTXReg<"p57">;
-def P58 : PTXReg<"p58">;
-def P59 : PTXReg<"p59">;
-def P60 : PTXReg<"p60">;
-def P61 : PTXReg<"p61">;
-def P62 : PTXReg<"p62">;
-def P63 : PTXReg<"p63">;
-def P64 : PTXReg<"p64">;
-def P65 : PTXReg<"p65">;
-def P66 : PTXReg<"p66">;
-def P67 : PTXReg<"p67">;
-def P68 : PTXReg<"p68">;
-def P69 : PTXReg<"p69">;
-def P70 : PTXReg<"p70">;
-def P71 : PTXReg<"p71">;
-def P72 : PTXReg<"p72">;
-def P73 : PTXReg<"p73">;
-def P74 : PTXReg<"p74">;
-def P75 : PTXReg<"p75">;
-def P76 : PTXReg<"p76">;
-def P77 : PTXReg<"p77">;
-def P78 : PTXReg<"p78">;
-def P79 : PTXReg<"p79">;
-def P80 : PTXReg<"p80">;
-def P81 : PTXReg<"p81">;
-def P82 : PTXReg<"p82">;
-def P83 : PTXReg<"p83">;
-def P84 : PTXReg<"p84">;
-def P85 : PTXReg<"p85">;
-def P86 : PTXReg<"p86">;
-def P87 : PTXReg<"p87">;
-def P88 : PTXReg<"p88">;
-def P89 : PTXReg<"p89">;
-def P90 : PTXReg<"p90">;
-def P91 : PTXReg<"p91">;
-def P92 : PTXReg<"p92">;
-def P93 : PTXReg<"p93">;
-def P94 : PTXReg<"p94">;
-def P95 : PTXReg<"p95">;
-def P96 : PTXReg<"p96">;
-def P97 : PTXReg<"p97">;
-def P98 : PTXReg<"p98">;
-def P99 : PTXReg<"p99">;
-def P100 : PTXReg<"p100">;
-def P101 : PTXReg<"p101">;
-def P102 : PTXReg<"p102">;
-def P103 : PTXReg<"p103">;
-def P104 : PTXReg<"p104">;
-def P105 : PTXReg<"p105">;
-def P106 : PTXReg<"p106">;
-def P107 : PTXReg<"p107">;
-def P108 : PTXReg<"p108">;
-def P109 : PTXReg<"p109">;
-def P110 : PTXReg<"p110">;
-def P111 : PTXReg<"p111">;
-def P112 : PTXReg<"p112">;
-def P113 : PTXReg<"p113">;
-def P114 : PTXReg<"p114">;
-def P115 : PTXReg<"p115">;
-def P116 : PTXReg<"p116">;
-def P117 : PTXReg<"p117">;
-def P118 : PTXReg<"p118">;
-def P119 : PTXReg<"p119">;
-def P120 : PTXReg<"p120">;
-def P121 : PTXReg<"p121">;
-def P122 : PTXReg<"p122">;
-def P123 : PTXReg<"p123">;
-def P124 : PTXReg<"p124">;
-def P125 : PTXReg<"p125">;
-def P126 : PTXReg<"p126">;
-def P127 : PTXReg<"p127">;
-
-///===- 16-Bit Registers --------------------------------------------------===//
-
-def RH0 : PTXReg<"rh0">;
-def RH1 : PTXReg<"rh1">;
-def RH2 : PTXReg<"rh2">;
-def RH3 : PTXReg<"rh3">;
-def RH4 : PTXReg<"rh4">;
-def RH5 : PTXReg<"rh5">;
-def RH6 : PTXReg<"rh6">;
-def RH7 : PTXReg<"rh7">;
-def RH8 : PTXReg<"rh8">;
-def RH9 : PTXReg<"rh9">;
-def RH10 : PTXReg<"rh10">;
-def RH11 : PTXReg<"rh11">;
-def RH12 : PTXReg<"rh12">;
-def RH13 : PTXReg<"rh13">;
-def RH14 : PTXReg<"rh14">;
-def RH15 : PTXReg<"rh15">;
-def RH16 : PTXReg<"rh16">;
-def RH17 : PTXReg<"rh17">;
-def RH18 : PTXReg<"rh18">;
-def RH19 : PTXReg<"rh19">;
-def RH20 : PTXReg<"rh20">;
-def RH21 : PTXReg<"rh21">;
-def RH22 : PTXReg<"rh22">;
-def RH23 : PTXReg<"rh23">;
-def RH24 : PTXReg<"rh24">;
-def RH25 : PTXReg<"rh25">;
-def RH26 : PTXReg<"rh26">;
-def RH27 : PTXReg<"rh27">;
-def RH28 : PTXReg<"rh28">;
-def RH29 : PTXReg<"rh29">;
-def RH30 : PTXReg<"rh30">;
-def RH31 : PTXReg<"rh31">;
-def RH32 : PTXReg<"rh32">;
-def RH33 : PTXReg<"rh33">;
-def RH34 : PTXReg<"rh34">;
-def RH35 : PTXReg<"rh35">;
-def RH36 : PTXReg<"rh36">;
-def RH37 : PTXReg<"rh37">;
-def RH38 : PTXReg<"rh38">;
-def RH39 : PTXReg<"rh39">;
-def RH40 : PTXReg<"rh40">;
-def RH41 : PTXReg<"rh41">;
-def RH42 : PTXReg<"rh42">;
-def RH43 : PTXReg<"rh43">;
-def RH44 : PTXReg<"rh44">;
-def RH45 : PTXReg<"rh45">;
-def RH46 : PTXReg<"rh46">;
-def RH47 : PTXReg<"rh47">;
-def RH48 : PTXReg<"rh48">;
-def RH49 : PTXReg<"rh49">;
-def RH50 : PTXReg<"rh50">;
-def RH51 : PTXReg<"rh51">;
-def RH52 : PTXReg<"rh52">;
-def RH53 : PTXReg<"rh53">;
-def RH54 : PTXReg<"rh54">;
-def RH55 : PTXReg<"rh55">;
-def RH56 : PTXReg<"rh56">;
-def RH57 : PTXReg<"rh57">;
-def RH58 : PTXReg<"rh58">;
-def RH59 : PTXReg<"rh59">;
-def RH60 : PTXReg<"rh60">;
-def RH61 : PTXReg<"rh61">;
-def RH62 : PTXReg<"rh62">;
-def RH63 : PTXReg<"rh63">;
-def RH64 : PTXReg<"rh64">;
-def RH65 : PTXReg<"rh65">;
-def RH66 : PTXReg<"rh66">;
-def RH67 : PTXReg<"rh67">;
-def RH68 : PTXReg<"rh68">;
-def RH69 : PTXReg<"rh69">;
-def RH70 : PTXReg<"rh70">;
-def RH71 : PTXReg<"rh71">;
-def RH72 : PTXReg<"rh72">;
-def RH73 : PTXReg<"rh73">;
-def RH74 : PTXReg<"rh74">;
-def RH75 : PTXReg<"rh75">;
-def RH76 : PTXReg<"rh76">;
-def RH77 : PTXReg<"rh77">;
-def RH78 : PTXReg<"rh78">;
-def RH79 : PTXReg<"rh79">;
-def RH80 : PTXReg<"rh80">;
-def RH81 : PTXReg<"rh81">;
-def RH82 : PTXReg<"rh82">;
-def RH83 : PTXReg<"rh83">;
-def RH84 : PTXReg<"rh84">;
-def RH85 : PTXReg<"rh85">;
-def RH86 : PTXReg<"rh86">;
-def RH87 : PTXReg<"rh87">;
-def RH88 : PTXReg<"rh88">;
-def RH89 : PTXReg<"rh89">;
-def RH90 : PTXReg<"rh90">;
-def RH91 : PTXReg<"rh91">;
-def RH92 : PTXReg<"rh92">;
-def RH93 : PTXReg<"rh93">;
-def RH94 : PTXReg<"rh94">;
-def RH95 : PTXReg<"rh95">;
-def RH96 : PTXReg<"rh96">;
-def RH97 : PTXReg<"rh97">;
-def RH98 : PTXReg<"rh98">;
-def RH99 : PTXReg<"rh99">;
-def RH100 : PTXReg<"rh100">;
-def RH101 : PTXReg<"rh101">;
-def RH102 : PTXReg<"rh102">;
-def RH103 : PTXReg<"rh103">;
-def RH104 : PTXReg<"rh104">;
-def RH105 : PTXReg<"rh105">;
-def RH106 : PTXReg<"rh106">;
-def RH107 : PTXReg<"rh107">;
-def RH108 : PTXReg<"rh108">;
-def RH109 : PTXReg<"rh109">;
-def RH110 : PTXReg<"rh110">;
-def RH111 : PTXReg<"rh111">;
-def RH112 : PTXReg<"rh112">;
-def RH113 : PTXReg<"rh113">;
-def RH114 : PTXReg<"rh114">;
-def RH115 : PTXReg<"rh115">;
-def RH116 : PTXReg<"rh116">;
-def RH117 : PTXReg<"rh117">;
-def RH118 : PTXReg<"rh118">;
-def RH119 : PTXReg<"rh119">;
-def RH120 : PTXReg<"rh120">;
-def RH121 : PTXReg<"rh121">;
-def RH122 : PTXReg<"rh122">;
-def RH123 : PTXReg<"rh123">;
-def RH124 : PTXReg<"rh124">;
-def RH125 : PTXReg<"rh125">;
-def RH126 : PTXReg<"rh126">;
-def RH127 : PTXReg<"rh127">;
-
-///===- 32-Bit Registers --------------------------------------------------===//
-
-def R0 : PTXReg<"r0">;
-def R1 : PTXReg<"r1">;
-def R2 : PTXReg<"r2">;
-def R3 : PTXReg<"r3">;
-def R4 : PTXReg<"r4">;
-def R5 : PTXReg<"r5">;
-def R6 : PTXReg<"r6">;
-def R7 : PTXReg<"r7">;
-def R8 : PTXReg<"r8">;
-def R9 : PTXReg<"r9">;
-def R10 : PTXReg<"r10">;
-def R11 : PTXReg<"r11">;
-def R12 : PTXReg<"r12">;
-def R13 : PTXReg<"r13">;
-def R14 : PTXReg<"r14">;
-def R15 : PTXReg<"r15">;
-def R16 : PTXReg<"r16">;
-def R17 : PTXReg<"r17">;
-def R18 : PTXReg<"r18">;
-def R19 : PTXReg<"r19">;
-def R20 : PTXReg<"r20">;
-def R21 : PTXReg<"r21">;
-def R22 : PTXReg<"r22">;
-def R23 : PTXReg<"r23">;
-def R24 : PTXReg<"r24">;
-def R25 : PTXReg<"r25">;
-def R26 : PTXReg<"r26">;
-def R27 : PTXReg<"r27">;
-def R28 : PTXReg<"r28">;
-def R29 : PTXReg<"r29">;
-def R30 : PTXReg<"r30">;
-def R31 : PTXReg<"r31">;
-def R32 : PTXReg<"r32">;
-def R33 : PTXReg<"r33">;
-def R34 : PTXReg<"r34">;
-def R35 : PTXReg<"r35">;
-def R36 : PTXReg<"r36">;
-def R37 : PTXReg<"r37">;
-def R38 : PTXReg<"r38">;
-def R39 : PTXReg<"r39">;
-def R40 : PTXReg<"r40">;
-def R41 : PTXReg<"r41">;
-def R42 : PTXReg<"r42">;
-def R43 : PTXReg<"r43">;
-def R44 : PTXReg<"r44">;
-def R45 : PTXReg<"r45">;
-def R46 : PTXReg<"r46">;
-def R47 : PTXReg<"r47">;
-def R48 : PTXReg<"r48">;
-def R49 : PTXReg<"r49">;
-def R50 : PTXReg<"r50">;
-def R51 : PTXReg<"r51">;
-def R52 : PTXReg<"r52">;
-def R53 : PTXReg<"r53">;
-def R54 : PTXReg<"r54">;
-def R55 : PTXReg<"r55">;
-def R56 : PTXReg<"r56">;
-def R57 : PTXReg<"r57">;
-def R58 : PTXReg<"r58">;
-def R59 : PTXReg<"r59">;
-def R60 : PTXReg<"r60">;
-def R61 : PTXReg<"r61">;
-def R62 : PTXReg<"r62">;
-def R63 : PTXReg<"r63">;
-def R64 : PTXReg<"r64">;
-def R65 : PTXReg<"r65">;
-def R66 : PTXReg<"r66">;
-def R67 : PTXReg<"r67">;
-def R68 : PTXReg<"r68">;
-def R69 : PTXReg<"r69">;
-def R70 : PTXReg<"r70">;
-def R71 : PTXReg<"r71">;
-def R72 : PTXReg<"r72">;
-def R73 : PTXReg<"r73">;
-def R74 : PTXReg<"r74">;
-def R75 : PTXReg<"r75">;
-def R76 : PTXReg<"r76">;
-def R77 : PTXReg<"r77">;
-def R78 : PTXReg<"r78">;
-def R79 : PTXReg<"r79">;
-def R80 : PTXReg<"r80">;
-def R81 : PTXReg<"r81">;
-def R82 : PTXReg<"r82">;
-def R83 : PTXReg<"r83">;
-def R84 : PTXReg<"r84">;
-def R85 : PTXReg<"r85">;
-def R86 : PTXReg<"r86">;
-def R87 : PTXReg<"r87">;
-def R88 : PTXReg<"r88">;
-def R89 : PTXReg<"r89">;
-def R90 : PTXReg<"r90">;
-def R91 : PTXReg<"r91">;
-def R92 : PTXReg<"r92">;
-def R93 : PTXReg<"r93">;
-def R94 : PTXReg<"r94">;
-def R95 : PTXReg<"r95">;
-def R96 : PTXReg<"r96">;
-def R97 : PTXReg<"r97">;
-def R98 : PTXReg<"r98">;
-def R99 : PTXReg<"r99">;
-def R100 : PTXReg<"r100">;
-def R101 : PTXReg<"r101">;
-def R102 : PTXReg<"r102">;
-def R103 : PTXReg<"r103">;
-def R104 : PTXReg<"r104">;
-def R105 : PTXReg<"r105">;
-def R106 : PTXReg<"r106">;
-def R107 : PTXReg<"r107">;
-def R108 : PTXReg<"r108">;
-def R109 : PTXReg<"r109">;
-def R110 : PTXReg<"r110">;
-def R111 : PTXReg<"r111">;
-def R112 : PTXReg<"r112">;
-def R113 : PTXReg<"r113">;
-def R114 : PTXReg<"r114">;
-def R115 : PTXReg<"r115">;
-def R116 : PTXReg<"r116">;
-def R117 : PTXReg<"r117">;
-def R118 : PTXReg<"r118">;
-def R119 : PTXReg<"r119">;
-def R120 : PTXReg<"r120">;
-def R121 : PTXReg<"r121">;
-def R122 : PTXReg<"r122">;
-def R123 : PTXReg<"r123">;
-def R124 : PTXReg<"r124">;
-def R125 : PTXReg<"r125">;
-def R126 : PTXReg<"r126">;
-def R127 : PTXReg<"r127">;
-
-///===- 64-Bit Registers --------------------------------------------------===//
-
-def RD0 : PTXReg<"rd0">;
-def RD1 : PTXReg<"rd1">;
-def RD2 : PTXReg<"rd2">;
-def RD3 : PTXReg<"rd3">;
-def RD4 : PTXReg<"rd4">;
-def RD5 : PTXReg<"rd5">;
-def RD6 : PTXReg<"rd6">;
-def RD7 : PTXReg<"rd7">;
-def RD8 : PTXReg<"rd8">;
-def RD9 : PTXReg<"rd9">;
-def RD10 : PTXReg<"rd10">;
-def RD11 : PTXReg<"rd11">;
-def RD12 : PTXReg<"rd12">;
-def RD13 : PTXReg<"rd13">;
-def RD14 : PTXReg<"rd14">;
-def RD15 : PTXReg<"rd15">;
-def RD16 : PTXReg<"rd16">;
-def RD17 : PTXReg<"rd17">;
-def RD18 : PTXReg<"rd18">;
-def RD19 : PTXReg<"rd19">;
-def RD20 : PTXReg<"rd20">;
-def RD21 : PTXReg<"rd21">;
-def RD22 : PTXReg<"rd22">;
-def RD23 : PTXReg<"rd23">;
-def RD24 : PTXReg<"rd24">;
-def RD25 : PTXReg<"rd25">;
-def RD26 : PTXReg<"rd26">;
-def RD27 : PTXReg<"rd27">;
-def RD28 : PTXReg<"rd28">;
-def RD29 : PTXReg<"rd29">;
-def RD30 : PTXReg<"rd30">;
-def RD31 : PTXReg<"rd31">;
-def RD32 : PTXReg<"rd32">;
-def RD33 : PTXReg<"rd33">;
-def RD34 : PTXReg<"rd34">;
-def RD35 : PTXReg<"rd35">;
-def RD36 : PTXReg<"rd36">;
-def RD37 : PTXReg<"rd37">;
-def RD38 : PTXReg<"rd38">;
-def RD39 : PTXReg<"rd39">;
-def RD40 : PTXReg<"rd40">;
-def RD41 : PTXReg<"rd41">;
-def RD42 : PTXReg<"rd42">;
-def RD43 : PTXReg<"rd43">;
-def RD44 : PTXReg<"rd44">;
-def RD45 : PTXReg<"rd45">;
-def RD46 : PTXReg<"rd46">;
-def RD47 : PTXReg<"rd47">;
-def RD48 : PTXReg<"rd48">;
-def RD49 : PTXReg<"rd49">;
-def RD50 : PTXReg<"rd50">;
-def RD51 : PTXReg<"rd51">;
-def RD52 : PTXReg<"rd52">;
-def RD53 : PTXReg<"rd53">;
-def RD54 : PTXReg<"rd54">;
-def RD55 : PTXReg<"rd55">;
-def RD56 : PTXReg<"rd56">;
-def RD57 : PTXReg<"rd57">;
-def RD58 : PTXReg<"rd58">;
-def RD59 : PTXReg<"rd59">;
-def RD60 : PTXReg<"rd60">;
-def RD61 : PTXReg<"rd61">;
-def RD62 : PTXReg<"rd62">;
-def RD63 : PTXReg<"rd63">;
-def RD64 : PTXReg<"rd64">;
-def RD65 : PTXReg<"rd65">;
-def RD66 : PTXReg<"rd66">;
-def RD67 : PTXReg<"rd67">;
-def RD68 : PTXReg<"rd68">;
-def RD69 : PTXReg<"rd69">;
-def RD70 : PTXReg<"rd70">;
-def RD71 : PTXReg<"rd71">;
-def RD72 : PTXReg<"rd72">;
-def RD73 : PTXReg<"rd73">;
-def RD74 : PTXReg<"rd74">;
-def RD75 : PTXReg<"rd75">;
-def RD76 : PTXReg<"rd76">;
-def RD77 : PTXReg<"rd77">;
-def RD78 : PTXReg<"rd78">;
-def RD79 : PTXReg<"rd79">;
-def RD80 : PTXReg<"rd80">;
-def RD81 : PTXReg<"rd81">;
-def RD82 : PTXReg<"rd82">;
-def RD83 : PTXReg<"rd83">;
-def RD84 : PTXReg<"rd84">;
-def RD85 : PTXReg<"rd85">;
-def RD86 : PTXReg<"rd86">;
-def RD87 : PTXReg<"rd87">;
-def RD88 : PTXReg<"rd88">;
-def RD89 : PTXReg<"rd89">;
-def RD90 : PTXReg<"rd90">;
-def RD91 : PTXReg<"rd91">;
-def RD92 : PTXReg<"rd92">;
-def RD93 : PTXReg<"rd93">;
-def RD94 : PTXReg<"rd94">;
-def RD95 : PTXReg<"rd95">;
-def RD96 : PTXReg<"rd96">;
-def RD97 : PTXReg<"rd97">;
-def RD98 : PTXReg<"rd98">;
-def RD99 : PTXReg<"rd99">;
-def RD100 : PTXReg<"rd100">;
-def RD101 : PTXReg<"rd101">;
-def RD102 : PTXReg<"rd102">;
-def RD103 : PTXReg<"rd103">;
-def RD104 : PTXReg<"rd104">;
-def RD105 : PTXReg<"rd105">;
-def RD106 : PTXReg<"rd106">;
-def RD107 : PTXReg<"rd107">;
-def RD108 : PTXReg<"rd108">;
-def RD109 : PTXReg<"rd109">;
-def RD110 : PTXReg<"rd110">;
-def RD111 : PTXReg<"rd111">;
-def RD112 : PTXReg<"rd112">;
-def RD113 : PTXReg<"rd113">;
-def RD114 : PTXReg<"rd114">;
-def RD115 : PTXReg<"rd115">;
-def RD116 : PTXReg<"rd116">;
-def RD117 : PTXReg<"rd117">;
-def RD118 : PTXReg<"rd118">;
-def RD119 : PTXReg<"rd119">;
-def RD120 : PTXReg<"rd120">;
-def RD121 : PTXReg<"rd121">;
-def RD122 : PTXReg<"rd122">;
-def RD123 : PTXReg<"rd123">;
-def RD124 : PTXReg<"rd124">;
-def RD125 : PTXReg<"rd125">;
-def RD126 : PTXReg<"rd126">;
-def RD127 : PTXReg<"rd127">;
+// The generated register info code throws warnings for empty register classes
+// (e.g. zero-length arrays), so we use a dummy register here just to prevent
+// these warnings.
+def DUMMY_REG : PTXReg<"R0">;
//===----------------------------------------------------------------------===//
// Register classes
//===----------------------------------------------------------------------===//
-def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%u", 0, 127)>;
-def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%u", 0, 127)>;
-def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%u", 0, 127)>;
-def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%u", 0, 127)>;
-def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%u", 0, 127)>;
-def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%u", 0, 127)>;
+def RegPred : RegisterClass<"PTX", [i1], 8, (add DUMMY_REG)>;
+def RegI16 : RegisterClass<"PTX", [i16], 16, (add DUMMY_REG)>;
+def RegI32 : RegisterClass<"PTX", [i32], 32, (add DUMMY_REG)>;
+def RegI64 : RegisterClass<"PTX", [i64], 64, (add DUMMY_REG)>;
+def RegF32 : RegisterClass<"PTX", [f32], 32, (add DUMMY_REG)>;
+def RegF64 : RegisterClass<"PTX", [f64], 64, (add DUMMY_REG)>;
+
diff --git a/lib/Target/PTX/PTXSelectionDAGInfo.cpp b/lib/Target/PTX/PTXSelectionDAGInfo.cpp
new file mode 100644
index 0000000..50ef14a
--- /dev/null
+++ b/lib/Target/PTX/PTXSelectionDAGInfo.cpp
@@ -0,0 +1,149 @@
+//===-- PTXSelectionDAGInfo.cpp - PTX SelectionDAG Info -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PTXSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-selectiondag-info"
+#include "PTXTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+using namespace llvm;
+
+PTXSelectionDAGInfo::PTXSelectionDAGInfo(const TargetMachine &TM)
+ : TargetSelectionDAGInfo(TM),
+ Subtarget(&TM.getSubtarget<PTXSubtarget>()) {
+}
+
+PTXSelectionDAGInfo::~PTXSelectionDAGInfo() {
+}
+
+SDValue
+PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const {
+ // Do repeated 4-byte loads and stores. To be improved.
+ // This requires 4-byte alignment.
+ if ((Align & 3) != 0)
+ return SDValue();
+ // This requires the copy size to be a constant, preferably
+ // within a subtarget-specific limit.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (!ConstantSize)
+ return SDValue();
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ // Always inline memcpys. In PTX, we do not have a C library that provides
+ // a memcpy function.
+ //if (!AlwaysInline)
+ // return SDValue();
+
+ unsigned BytesLeft = SizeVal & 3;
+ unsigned NumMemOps = SizeVal >> 2;
+ unsigned EmittedNumMemOps = 0;
+ EVT VT = MVT::i32;
+ unsigned VTSize = 4;
+ unsigned i = 0;
+ const unsigned MAX_LOADS_IN_LDM = 6;
+ SDValue TFOps[MAX_LOADS_IN_LDM];
+ SDValue Loads[MAX_LOADS_IN_LDM];
+ uint64_t SrcOff = 0, DstOff = 0;
+ EVT PointerType = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
+
+ // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
+ // same number of stores. The loads and stores will get combined into
+ // ldm/stm later on.
+ while (EmittedNumMemOps < NumMemOps) {
+ for (i = 0;
+ i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+ Loads[i] = DAG.getLoad(VT, dl, Chain,
+ DAG.getNode(ISD::ADD, dl, PointerType, Src,
+ DAG.getConstant(SrcOff, PointerType)),
+ SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
+ false, 0);
+ TFOps[i] = Loads[i].getValue(1);
+ SrcOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ for (i = 0;
+ i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+ TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+ DAG.getNode(ISD::ADD, dl, PointerType, Dst,
+ DAG.getConstant(DstOff, PointerType)),
+ DstPtrInfo.getWithOffset(DstOff),
+ isVolatile, false, 0);
+ DstOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ EmittedNumMemOps += i;
+ }
+
+ if (BytesLeft == 0)
+ return Chain;
+
+ // Issue loads / stores for the trailing (1 - 3) bytes.
+ unsigned BytesLeftSave = BytesLeft;
+ i = 0;
+ while (BytesLeft) {
+ if (BytesLeft >= 2) {
+ VT = MVT::i16;
+ VTSize = 2;
+ } else {
+ VT = MVT::i8;
+ VTSize = 1;
+ }
+
+ Loads[i] = DAG.getLoad(VT, dl, Chain,
+ DAG.getNode(ISD::ADD, dl, PointerType, Src,
+ DAG.getConstant(SrcOff, PointerType)),
+ SrcPtrInfo.getWithOffset(SrcOff), false, false, 0);
+ TFOps[i] = Loads[i].getValue(1);
+ ++i;
+ SrcOff += VTSize;
+ BytesLeft -= VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ i = 0;
+ BytesLeft = BytesLeftSave;
+ while (BytesLeft) {
+ if (BytesLeft >= 2) {
+ VT = MVT::i16;
+ VTSize = 2;
+ } else {
+ VT = MVT::i8;
+ VTSize = 1;
+ }
+
+ TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+ DAG.getNode(ISD::ADD, dl, PointerType, Dst,
+ DAG.getConstant(DstOff, PointerType)),
+ DstPtrInfo.getWithOffset(DstOff), false, false, 0);
+ ++i;
+ DstOff += VTSize;
+ BytesLeft -= VTSize;
+ }
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+}
+
+SDValue PTXSelectionDAGInfo::
+EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst,
+ SDValue Src, SDValue Size,
+ unsigned Align, bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const {
+ llvm_unreachable("memset lowering not implemented for PTX yet");
+}
+
diff --git a/lib/Target/PTX/PTXSelectionDAGInfo.h b/lib/Target/PTX/PTXSelectionDAGInfo.h
new file mode 100644
index 0000000..e0c7167
--- /dev/null
+++ b/lib/Target/PTX/PTXSelectionDAGInfo.h
@@ -0,0 +1,53 @@
+//===-- PTXSelectionDAGInfo.h - PTX SelectionDAG Info -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PTX subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTXSELECTIONDAGINFO_H
+#define PTXSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+/// PTXSelectionDAGInfo - TargetSelectionDAGInfo sub-class for the PTX target.
+/// At the moment, this is mostly just a copy of ARMSelectionDAGInfo.
+class PTXSelectionDAGInfo : public TargetSelectionDAGInfo {
+ /// Subtarget - Keep a pointer to the PTXSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const PTXSubtarget *Subtarget;
+
+public:
+ explicit PTXSelectionDAGInfo(const TargetMachine &TM);
+ ~PTXSelectionDAGInfo();
+
+ virtual
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const;
+
+ virtual
+ SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3, unsigned Align,
+ bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const;
+};
+
+}
+
+#endif
+
diff --git a/lib/Target/PTX/PTXSubtarget.cpp b/lib/Target/PTX/PTXSubtarget.cpp
index 8ec646e..1eb57d2 100644
--- a/lib/Target/PTX/PTXSubtarget.cpp
+++ b/lib/Target/PTX/PTXSubtarget.cpp
@@ -14,7 +14,7 @@
#include "PTXSubtarget.h"
#include "PTX.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
diff --git a/lib/Target/PTX/PTXSubtarget.h b/lib/Target/PTX/PTXSubtarget.h
index 0921f1f..b946d7c 100644
--- a/lib/Target/PTX/PTXSubtarget.h
+++ b/lib/Target/PTX/PTXSubtarget.h
@@ -114,7 +114,16 @@ class StringRef;
(PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE);
}
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ bool callsAreHandled() const {
+ return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) ||
+ (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE);
+ }
+
+ bool emitPtrAttribute() const {
+ return PTXVersion >= PTX_VERSION_2_2;
+ }
+
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
}; // class PTXSubtarget
} // namespace llvm
diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp
index ab926e0..449a3d9 100644
--- a/lib/Target/PTX/PTXTargetMachine.cpp
+++ b/lib/Target/PTX/PTXTargetMachine.cpp
@@ -14,8 +14,32 @@
#include "PTX.h"
#include "PTXTargetMachine.h"
#include "llvm/PassManager.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TargetRegistry.h"
+
using namespace llvm;
@@ -25,7 +49,7 @@ namespace llvm {
bool useCFI,
MCInstPrinter *InstPrint,
MCCodeEmitter *CE,
- TargetAsmBackend *TAB,
+ MCAsmBackend *MAB,
bool ShowInst);
}
@@ -43,34 +67,47 @@ namespace {
"e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
const char* DataLayout64 =
"e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
+
+ // Copied from LLVMTargetMachine.cpp
+ void printNoVerify(PassManagerBase &PM, const char *Banner) {
+ if (PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
+ }
+
+ void printAndVerify(PassManagerBase &PM,
+ const char *Banner) {
+ if (PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
+
+ //if (VerifyMachineCode)
+ // PM.add(createMachineVerifierPass(Banner));
+ }
}
// DataLayout and FrameLowering are filled with dummy data
PTXTargetMachine::PTXTargetMachine(const Target &T,
- const std::string &TT,
- const std::string &CPU,
- const std::string &FS,
+ StringRef TT, StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM,
bool is64Bit)
- : LLVMTargetMachine(T, TT, CPU, FS),
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
DataLayout(is64Bit ? DataLayout64 : DataLayout32),
Subtarget(TT, CPU, FS, is64Bit),
FrameLowering(Subtarget),
InstrInfo(*this),
+ TSInfo(*this),
TLInfo(*this) {
}
-PTX32TargetMachine::PTX32TargetMachine(const Target &T,
- const std::string& TT,
- const std::string& CPU,
- const std::string& FS)
- : PTXTargetMachine(T, TT, CPU, FS, false) {
+PTX32TargetMachine::PTX32TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : PTXTargetMachine(T, TT, CPU, FS, RM, CM, false) {
}
-PTX64TargetMachine::PTX64TargetMachine(const Target &T,
- const std::string& TT,
- const std::string& CPU,
- const std::string& FS)
- : PTXTargetMachine(T, TT, CPU, FS, true) {
+PTX64TargetMachine::PTX64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : PTXTargetMachine(T, TT, CPU, FS, RM, CM, true) {
}
bool PTXTargetMachine::addInstSelector(PassManagerBase &PM,
@@ -82,6 +119,255 @@ bool PTXTargetMachine::addInstSelector(PassManagerBase &PM,
bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
// PTXMFInfoExtract must after register allocation!
+ //PM.add(createPTXMFInfoExtract(*this, OptLevel));
+ return false;
+}
+
+bool PTXTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &Out,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
+ // This is mostly based on LLVMTargetMachine::addPassesToEmitFile
+
+ // Add common CodeGen passes.
+ MCContext *Context = 0;
+ if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context))
+ return true;
+ assert(Context != 0 && "Failed to get MCContext");
+
+ if (hasMCSaveTempLabels())
+ Context->setAllowTemporaryLabels(false);
+
+ const MCAsmInfo &MAI = *getMCAsmInfo();
+ const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+ OwningPtr<MCStreamer> AsmStreamer;
+
+ switch (FileType) {
+ default: return true;
+ case CGFT_AssemblyFile: {
+ MCInstPrinter *InstPrinter =
+ getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, STI);
+
+ // Create a code emitter if asked to show the encoding.
+ MCCodeEmitter *MCE = 0;
+ MCAsmBackend *MAB = 0;
+
+ MCStreamer *S = getTarget().createAsmStreamer(*Context, Out,
+ true, /* verbose asm */
+ hasMCUseLoc(),
+ hasMCUseCFI(),
+ InstPrinter,
+ MCE, MAB,
+ false /* show MC encoding */);
+ AsmStreamer.reset(S);
+ break;
+ }
+ case CGFT_ObjectFile: {
+ llvm_unreachable("Object file emission is not supported with PTX");
+ }
+ case CGFT_Null:
+ // The Null output is intended for use for performance analysis and testing,
+ // not real users.
+ AsmStreamer.reset(createNullStreamer(*Context));
+ break;
+ }
+
+ // MC Logging
+ //AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs()));
+
+ // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
+ FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
+ if (Printer == 0)
+ return true;
+
+ // If successful, createAsmPrinter took ownership of AsmStreamer.
+ AsmStreamer.take();
+
+ PM.add(Printer);
+
+ PM.add(createGCInfoDeleter());
+ return false;
+}
+
+bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify,
+ MCContext *&OutContext) {
+ // Add standard LLVM codegen passes.
+ // This is derived from LLVMTargetMachine::addCommonCodeGenPasses, with some
+ // modifications for the PTX target.
+
+ // Standard LLVM-Level Passes.
+
+ // Basic AliasAnalysis support.
+ // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
+ // BasicAliasAnalysis wins if they disagree. This is intended to help
+ // support "obvious" type-punning idioms.
+ PM.add(createTypeBasedAliasAnalysisPass());
+ PM.add(createBasicAliasAnalysisPass());
+
+ // Before running any passes, run the verifier to determine if the input
+ // coming from the front-end and/or optimizer is valid.
+ if (!DisableVerify)
+ PM.add(createVerifierPass());
+
+ // Run loop strength reduction before anything else.
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createLoopStrengthReducePass(getTargetLowering()));
+ //PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
+ }
+
+ PM.add(createGCLoweringPass());
+
+ // Make sure that no unreachable blocks are instruction selected.
+ PM.add(createUnreachableBlockEliminationPass());
+
+ PM.add(createLowerInvokePass(getTargetLowering()));
+ // The lower invoke pass may create unreachable code. Remove it.
+ PM.add(createUnreachableBlockEliminationPass());
+
+ if (OptLevel != CodeGenOpt::None)
+ PM.add(createCodeGenPreparePass(getTargetLowering()));
+
+ PM.add(createStackProtectorPass(getTargetLowering()));
+
+ addPreISel(PM, OptLevel);
+
+ //PM.add(createPrintFunctionPass("\n\n"
+ // "*** Final LLVM Code input to ISel ***\n",
+ // &dbgs()));
+
+ // All passes which modify the LLVM IR are now complete; run the verifier
+ // to ensure that the IR is valid.
+ if (!DisableVerify)
+ PM.add(createVerifierPass());
+
+ // Standard Lower-Level Passes.
+
+ // Install a MachineModuleInfo class, which is an immutable pass that holds
+ // all the per-module stuff we're generating, including MCContext.
+ MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(),
+ *getRegisterInfo(),
+ &getTargetLowering()->getObjFileLowering());
+ PM.add(MMI);
+ OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref.
+
+ // Set up a MachineFunction for the rest of CodeGen to work on.
+ PM.add(new MachineFunctionAnalysis(*this, OptLevel));
+
+ // Ask the target for an isel.
+ if (addInstSelector(PM, OptLevel))
+ return true;
+
+ // Print the instruction selected machine code...
+ printAndVerify(PM, "After Instruction Selection");
+
+ // Expand pseudo-instructions emitted by ISel.
+ PM.add(createExpandISelPseudosPass());
+
+ // Pre-ra tail duplication.
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createTailDuplicatePass(true));
+ printAndVerify(PM, "After Pre-RegAlloc TailDuplicate");
+ }
+
+ // Optimize PHIs before DCE: removing dead PHI cycles may make more
+ // instructions dead.
+ if (OptLevel != CodeGenOpt::None)
+ PM.add(createOptimizePHIsPass());
+
+ // If the target requests it, assign local variables to stack slots relative
+ // to one another and simplify frame index references where possible.
+ PM.add(createLocalStackSlotAllocationPass());
+
+ if (OptLevel != CodeGenOpt::None) {
+ // With optimization, dead code should already be eliminated. However
+ // there is one known exception: lowered code for arguments that are only
+ // used by tail calls, where the tail calls reuse the incoming stack
+ // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
+ PM.add(createDeadMachineInstructionElimPass());
+ printAndVerify(PM, "After codegen DCE pass");
+
+ PM.add(createMachineLICMPass());
+ PM.add(createMachineCSEPass());
+ PM.add(createMachineSinkingPass());
+ printAndVerify(PM, "After Machine LICM, CSE and Sinking passes");
+
+ PM.add(createPeepholeOptimizerPass());
+ printAndVerify(PM, "After codegen peephole optimization pass");
+ }
+
+ // Run pre-ra passes.
+ if (addPreRegAlloc(PM, OptLevel))
+ printAndVerify(PM, "After PreRegAlloc passes");
+
+ // Perform register allocation.
+ PM.add(createPTXRegisterAllocator());
+ printAndVerify(PM, "After Register Allocation");
+
+ // Perform stack slot coloring and post-ra machine LICM.
+ if (OptLevel != CodeGenOpt::None) {
+ // FIXME: Re-enable coloring with register when it's capable of adding
+ // kill markers.
+ PM.add(createStackSlotColoringPass(false));
+
+ // FIXME: Post-RA LICM has asserts that fire on virtual registers.
+ // Run post-ra machine LICM to hoist reloads / remats.
+ //if (!DisablePostRAMachineLICM)
+ // PM.add(createMachineLICMPass(false));
+
+ printAndVerify(PM, "After StackSlotColoring and postra Machine LICM");
+ }
+
+ // Run post-ra passes.
+ if (addPostRegAlloc(PM, OptLevel))
+ printAndVerify(PM, "After PostRegAlloc passes");
+
+ PM.add(createExpandPostRAPseudosPass());
+ printAndVerify(PM, "After ExpandPostRAPseudos");
+
+ // Insert prolog/epilog code. Eliminate abstract frame index references...
+ PM.add(createPrologEpilogCodeInserter());
+ printAndVerify(PM, "After PrologEpilogCodeInserter");
+
+ // Run pre-sched2 passes.
+ if (addPreSched2(PM, OptLevel))
+ printAndVerify(PM, "After PreSched2 passes");
+
+ // Second pass scheduler.
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createPostRAScheduler(OptLevel));
+ printAndVerify(PM, "After PostRAScheduler");
+ }
+
+ // Branch folding must be run after regalloc and prolog/epilog insertion.
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
+ printNoVerify(PM, "After BranchFolding");
+ }
+
+ // Tail duplication.
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createTailDuplicatePass(false));
+ printNoVerify(PM, "After TailDuplicate");
+ }
+
+ PM.add(createGCMachineCodeAnalysisPass());
+
+ //if (PrintGCInfo)
+ // PM.add(createGCInfoPrinter(dbgs()));
+
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createCodePlacementOptPass());
+ printNoVerify(PM, "After CodePlacementOpt");
+ }
+
+ if (addPreEmitPass(PM, OptLevel))
+ printNoVerify(PM, "After PreEmit passes");
+
PM.add(createPTXMFInfoExtract(*this, OptLevel));
+ PM.add(createPTXFPRoundingModePass(*this, OptLevel));
+
return false;
}
diff --git a/lib/Target/PTX/PTXTargetMachine.h b/lib/Target/PTX/PTXTargetMachine.h
index ae42153..5b7c82b 100644
--- a/lib/Target/PTX/PTXTargetMachine.h
+++ b/lib/Target/PTX/PTXTargetMachine.h
@@ -17,6 +17,7 @@
#include "PTXISelLowering.h"
#include "PTXInstrInfo.h"
#include "PTXFrameLowering.h"
+#include "PTXSelectionDAGInfo.h"
#include "PTXSubtarget.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameLowering.h"
@@ -25,15 +26,17 @@
namespace llvm {
class PTXTargetMachine : public LLVMTargetMachine {
private:
- const TargetData DataLayout;
- PTXSubtarget Subtarget; // has to be initialized before FrameLowering
- PTXFrameLowering FrameLowering;
- PTXInstrInfo InstrInfo;
- PTXTargetLowering TLInfo;
+ const TargetData DataLayout;
+ PTXSubtarget Subtarget; // has to be initialized before FrameLowering
+ PTXFrameLowering FrameLowering;
+ PTXInstrInfo InstrInfo;
+ PTXSelectionDAGInfo TSInfo;
+ PTXTargetLowering TLInfo;
public:
- PTXTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS,
+ PTXTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM,
bool is64Bit);
virtual const TargetData *getTargetData() const { return &DataLayout; }
@@ -49,27 +52,62 @@ class PTXTargetMachine : public LLVMTargetMachine {
virtual const PTXTargetLowering *getTargetLowering() const {
return &TLInfo; }
+ virtual const PTXSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
virtual bool addInstSelector(PassManagerBase &PM,
CodeGenOpt::Level OptLevel);
virtual bool addPostRegAlloc(PassManagerBase &PM,
CodeGenOpt::Level OptLevel);
+
+ // We override this method to supply our own set of codegen passes.
+ virtual bool addPassesToEmitFile(PassManagerBase &,
+ formatted_raw_ostream &,
+ CodeGenFileType,
+ CodeGenOpt::Level,
+ bool = true);
+
+ // Emission of machine code through JITCodeEmitter is not supported.
+ virtual bool addPassesToEmitMachineCode(PassManagerBase &,
+ JITCodeEmitter &,
+ CodeGenOpt::Level,
+ bool = true) {
+ return true;
+ }
+
+ // Emission of machine code through MCJIT is not supported.
+ virtual bool addPassesToEmitMC(PassManagerBase &,
+ MCContext *&,
+ raw_ostream &,
+ CodeGenOpt::Level,
+ bool = true) {
+ return true;
+ }
+
+ private:
+
+ bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level,
+ bool DisableVerify, MCContext *&OutCtx);
}; // class PTXTargetMachine
class PTX32TargetMachine : public PTXTargetMachine {
public:
- PTX32TargetMachine(const Target &T, const std::string &TT,
- const std::string& CPU, const std::string& FS);
+ PTX32TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
}; // class PTX32TargetMachine
class PTX64TargetMachine : public PTXTargetMachine {
public:
- PTX64TargetMachine(const Target &T, const std::string &TT,
- const std::string& CPU, const std::string& FS);
+ PTX64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
}; // class PTX32TargetMachine
} // namespace llvm
diff --git a/lib/Target/PTX/TargetInfo/CMakeLists.txt b/lib/Target/PTX/TargetInfo/CMakeLists.txt
index 4b09cf5..2366e45 100644
--- a/lib/Target/PTX/TargetInfo/CMakeLists.txt
+++ b/lib/Target/PTX/TargetInfo/CMakeLists.txt
@@ -4,4 +4,10 @@ add_llvm_library(LLVMPTXInfo
PTXTargetInfo.cpp
)
-add_dependencies(LLVMPTXInfo PTXCodeGenTable_gen)
+add_llvm_library_dependencies(LLVMPTXInfo
+ LLVMMC
+ LLVMSupport
+ LLVMTarget
+ )
+
+add_dependencies(LLVMPTXInfo PTXCommonTableGen)
diff --git a/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp b/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
index 9df6c75..09a2735 100644
--- a/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
+++ b/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
@@ -9,7 +9,7 @@
#include "PTX.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/PTX/generate-register-td.py b/lib/Target/PTX/generate-register-td.py
deleted file mode 100755
index 1528690..0000000
--- a/lib/Target/PTX/generate-register-td.py
+++ /dev/null
@@ -1,163 +0,0 @@
-#!/usr/bin/env python
-##===- generate-register-td.py --------------------------------*-python-*--===##
-##
-## The LLVM Compiler Infrastructure
-##
-## This file is distributed under the University of Illinois Open Source
-## License. See LICENSE.TXT for details.
-##
-##===----------------------------------------------------------------------===##
-##
-## This file describes the PTX register file generator.
-##
-##===----------------------------------------------------------------------===##
-
-from sys import argv, exit, stdout
-
-
-if len(argv) != 5:
- print('Usage: generate-register-td.py <num_preds> <num_16> <num_32> <num_64>')
- exit(1)
-
-try:
- num_pred = int(argv[1])
- num_16bit = int(argv[2])
- num_32bit = int(argv[3])
- num_64bit = int(argv[4])
-except:
- print('ERROR: Invalid integer parameter')
- exit(1)
-
-## Print the register definition file
-td_file = open('PTXRegisterInfo.td', 'w')
-
-td_file.write('''
-//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Declarations that describe the PTX register file
-//===----------------------------------------------------------------------===//
-
-class PTXReg<string n> : Register<n> {
- let Namespace = "PTX";
-}
-
-//===----------------------------------------------------------------------===//
-// Registers
-//===----------------------------------------------------------------------===//
-''')
-
-
-# Print predicate registers
-td_file.write('\n///===- Predicate Registers -----------------------------------------------===//\n\n')
-for r in range(0, num_pred):
- td_file.write('def P%d : PTXReg<"p%d">;\n' % (r, r))
-
-# Print 16-bit registers
-td_file.write('\n///===- 16-Bit Registers --------------------------------------------------===//\n\n')
-for r in range(0, num_16bit):
- td_file.write('def RH%d : PTXReg<"rh%d">;\n' % (r, r))
-
-# Print 32-bit registers
-td_file.write('\n///===- 32-Bit Registers --------------------------------------------------===//\n\n')
-for r in range(0, num_32bit):
- td_file.write('def R%d : PTXReg<"r%d">;\n' % (r, r))
-
-# Print 64-bit registers
-td_file.write('\n///===- 64-Bit Registers --------------------------------------------------===//\n\n')
-for r in range(0, num_64bit):
- td_file.write('def RD%d : PTXReg<"rd%d">;\n' % (r, r))
-
-
-td_file.write('''
-//===----------------------------------------------------------------------===//
-// Register classes
-//===----------------------------------------------------------------------===//
-''')
-
-
-# Print register classes
-
-td_file.write('def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%%u", 0, %d)>;\n' % (num_pred-1))
-td_file.write('def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%%u", 0, %d)>;\n' % (num_16bit-1))
-td_file.write('def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1))
-td_file.write('def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1))
-td_file.write('def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1))
-td_file.write('def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1))
-
-
-td_file.close()
-
-## Now write the PTXCallingConv.td file
-td_file = open('PTXCallingConv.td', 'w')
-
-# Reserve 10% of the available registers for return values, and the other 90%
-# for parameters
-num_ret_pred = int(0.1 * num_pred)
-num_ret_16bit = int(0.1 * num_16bit)
-num_ret_32bit = int(0.1 * num_32bit)
-num_ret_64bit = int(0.1 * num_64bit)
-num_param_pred = num_pred - num_ret_pred
-num_param_16bit = num_16bit - num_ret_16bit
-num_param_32bit = num_32bit - num_ret_32bit
-num_param_64bit = num_64bit - num_ret_64bit
-
-param_regs_pred = [('P%d' % (i+num_ret_pred)) for i in range(0, num_param_pred)]
-ret_regs_pred = ['P%d' % i for i in range(0, num_ret_pred)]
-param_regs_16bit = [('RH%d' % (i+num_ret_16bit)) for i in range(0, num_param_16bit)]
-ret_regs_16bit = ['RH%d' % i for i in range(0, num_ret_16bit)]
-param_regs_32bit = [('R%d' % (i+num_ret_32bit)) for i in range(0, num_param_32bit)]
-ret_regs_32bit = ['R%d' % i for i in range(0, num_ret_32bit)]
-param_regs_64bit = [('RD%d' % (i+num_ret_64bit)) for i in range(0, num_param_64bit)]
-ret_regs_64bit = ['RD%d' % i for i in range(0, num_ret_64bit)]
-
-param_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_pred)
-ret_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_pred)
-param_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_16bit)
-ret_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_16bit)
-param_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_32bit)
-ret_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_32bit)
-param_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_64bit)
-ret_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_64bit)
-
-td_file.write('''
-//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This describes the calling conventions for the PTX architecture.
-//
-//===----------------------------------------------------------------------===//
-
-// PTX Formal Parameter Calling Convention
-def CC_PTX : CallingConv<[
- CCIfType<[i1], CCAssignToReg<[%s]>>,
- CCIfType<[i16], CCAssignToReg<[%s]>>,
- CCIfType<[i32,f32], CCAssignToReg<[%s]>>,
- CCIfType<[i64,f64], CCAssignToReg<[%s]>>
-]>;
-
-// PTX Return Value Calling Convention
-def RetCC_PTX : CallingConv<[
- CCIfType<[i1], CCAssignToReg<[%s]>>,
- CCIfType<[i16], CCAssignToReg<[%s]>>,
- CCIfType<[i32,f32], CCAssignToReg<[%s]>>,
- CCIfType<[i64,f64], CCAssignToReg<[%s]>>
-]>;
-''' % (param_list_pred, param_list_16bit, param_list_32bit, param_list_64bit,
- ret_list_pred, ret_list_16bit, ret_list_32bit, ret_list_64bit))
-
-
-td_file.close()
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index d1dda37..73b4aba 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -1,16 +1,16 @@
set(LLVM_TARGET_DEFINITIONS PPC.td)
-tablegen(PPCGenAsmWriter.inc -gen-asm-writer)
-tablegen(PPCGenCodeEmitter.inc -gen-emitter)
-tablegen(PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
-tablegen(PPCGenRegisterInfo.inc -gen-register-info)
-tablegen(PPCGenInstrInfo.inc -gen-instr-info)
-tablegen(PPCGenDAGISel.inc -gen-dag-isel)
-tablegen(PPCGenCallingConv.inc -gen-callingconv)
-tablegen(PPCGenSubtargetInfo.inc -gen-subtarget)
+llvm_tablegen(PPCGenAsmWriter.inc -gen-asm-writer)
+llvm_tablegen(PPCGenCodeEmitter.inc -gen-emitter)
+llvm_tablegen(PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+llvm_tablegen(PPCGenRegisterInfo.inc -gen-register-info)
+llvm_tablegen(PPCGenInstrInfo.inc -gen-instr-info)
+llvm_tablegen(PPCGenDAGISel.inc -gen-dag-isel)
+llvm_tablegen(PPCGenCallingConv.inc -gen-callingconv)
+llvm_tablegen(PPCGenSubtargetInfo.inc -gen-subtarget)
+add_public_tablegen_target(PowerPCCommonTableGen)
add_llvm_target(PowerPCCodeGen
- PPCAsmBackend.cpp
PPCAsmPrinter.cpp
PPCBranchSelector.cpp
PPCCodeEmitter.cpp
@@ -20,15 +20,27 @@ add_llvm_target(PowerPCCodeGen
PPCISelLowering.cpp
PPCFrameLowering.cpp
PPCJITInfo.cpp
- PPCMCCodeEmitter.cpp
PPCMCInstLower.cpp
- PPCPredicates.cpp
PPCRegisterInfo.cpp
PPCSubtarget.cpp
PPCTargetMachine.cpp
PPCSelectionDAGInfo.cpp
)
+add_llvm_library_dependencies(LLVMPowerPCCodeGen
+ LLVMAnalysis
+ LLVMAsmPrinter
+ LLVMCodeGen
+ LLVMCore
+ LLVMMC
+ LLVMPowerPCAsmPrinter
+ LLVMPowerPCDesc
+ LLVMPowerPCInfo
+ LLVMSelectionDAG
+ LLVMSupport
+ LLVMTarget
+ )
+
add_subdirectory(InstPrinter)
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
index 389ea77..1d857e2 100644
--- a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
+++ b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
@@ -3,4 +3,10 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
add_llvm_library(LLVMPowerPCAsmPrinter
PPCInstPrinter.cpp
)
-add_dependencies(LLVMPowerPCAsmPrinter PowerPCCodeGenTable_gen)
+
+add_llvm_library_dependencies(LLVMPowerPCAsmPrinter
+ LLVMMC
+ LLVMSupport
+ )
+
+add_dependencies(LLVMPowerPCAsmPrinter PowerPCCommonTableGen)
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 1a9bd76..b6a0835 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -13,7 +13,8 @@
#define DEBUG_TYPE "asm-printer"
#include "PPCInstPrinter.h"
-#include "PPCPredicates.h"
+#include "MCTargetDesc/PPCBaseInfo.h"
+#include "MCTargetDesc/PPCPredicates.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/raw_ostream.h"
@@ -30,7 +31,8 @@ void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
OS << getRegisterName(RegNo);
}
-void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
// Check for slwi/srwi mnemonics.
if (MI->getOpcode() == PPC::RLWINM) {
unsigned char SH = MI->getOperand(2).getImm();
@@ -49,6 +51,8 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
O << ", ";
printOperand(MI, 1, O);
O << ", " << (unsigned int)SH;
+
+ printAnnotation(O, Annot);
return;
}
}
@@ -59,6 +63,7 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
printOperand(MI, 0, O);
O << ", ";
printOperand(MI, 1, O);
+ printAnnotation(O, Annot);
return;
}
@@ -72,11 +77,13 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
O << ", ";
printOperand(MI, 1, O);
O << ", " << (unsigned int)SH;
+ printAnnotation(O, Annot);
return;
}
}
printInstruction(MI, O);
+ printAnnotation(O, Annot);
}
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index d022a44..4ed4b76 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -32,7 +32,7 @@ public:
}
virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
- virtual void printInst(const MCInst *MI, raw_ostream &O);
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
virtual StringRef getOpcodeName(unsigned Opcode) const;
static const char *getInstructionName(unsigned Opcode);
diff --git a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
index a1b8166..c4041db 100644
--- a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
@@ -1,4 +1,16 @@
add_llvm_library(LLVMPowerPCDesc
+ PPCAsmBackend.cpp
PPCMCTargetDesc.cpp
PPCMCAsmInfo.cpp
+ PPCMCCodeEmitter.cpp
+ PPCPredicates.cpp
)
+
+add_llvm_library_dependencies(LLVMPowerPCDesc
+ LLVMMC
+ LLVMPowerPCAsmPrinter
+ LLVMPowerPCInfo
+ LLVMSupport
+ )
+
+add_dependencies(LLVMPowerPCDesc PowerPCCommonTableGen)
diff --git a/lib/Target/PowerPC/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 4b8cbb7..9f2fd6d 100644
--- a/lib/Target/PowerPC/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -7,17 +7,43 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Target/TargetAsmBackend.h"
-#include "PPC.h"
-#include "PPCFixupKinds.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Object/MachOFormat.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
+static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unknown fixup kind!");
+ case FK_Data_1:
+ case FK_Data_2:
+ case FK_Data_4:
+ return Value;
+ case PPC::fixup_ppc_brcond14:
+ return Value & 0x3ffc;
+ case PPC::fixup_ppc_br24:
+ return Value & 0x3fffffc;
+#if 0
+ case PPC::fixup_ppc_hi16:
+ return (Value >> 16) & 0xffff;
+#endif
+ case PPC::fixup_ppc_ha16:
+ return ((Value >> 16) + ((Value & 0x8000) ? 1 : 0)) & 0xffff;
+ case PPC::fixup_ppc_lo16:
+ return Value & 0xffff;
+ }
+}
+
namespace {
class PPCMachObjectWriter : public MCMachObjectTargetWriter {
public:
@@ -31,10 +57,17 @@ public:
MCValue Target, uint64_t &FixedValue) {}
};
-class PPCAsmBackend : public TargetAsmBackend {
+class PPCELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ PPCELFObjectWriter(bool Is64Bit, Triple::OSType OSType, uint16_t EMachine,
+ bool HasRelocationAddend, bool isLittleEndian)
+ : MCELFObjectTargetWriter(Is64Bit, OSType, EMachine, HasRelocationAddend) {}
+};
+
+class PPCAsmBackend : public MCAsmBackend {
const Target &TheTarget;
public:
- PPCAsmBackend(const Target &T) : TargetAsmBackend(), TheTarget(T) {}
+ PPCAsmBackend(const Target &T) : MCAsmBackend(), TheTarget(T) {}
unsigned getNumFixupKinds() const { return PPC::NumTargetFixupKinds; }
@@ -49,7 +82,7 @@ public:
};
if (Kind < FirstTargetFixupKind)
- return TargetAsmBackend::getFixupKindInfo(Kind);
+ return MCAsmBackend::getFixupKindInfo(Kind);
assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
"Invalid kind!");
@@ -109,15 +142,50 @@ namespace {
return false;
}
};
+
+ class ELFPPCAsmBackend : public PPCAsmBackend {
+ Triple::OSType OSType;
+ public:
+ ELFPPCAsmBackend(const Target &T, Triple::OSType OSType) :
+ PPCAsmBackend(T), OSType(OSType) { }
+
+ void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const {
+ Value = adjustFixupValue(Fixup.getKind(), Value);
+ if (!Value) return; // Doesn't change encoding.
+
+ unsigned Offset = Fixup.getOffset();
+
+ // For each byte of the fragment that the fixup touches, mask in the bits from
+ // the fixup value. The Value has been "split up" into the appropriate
+ // bitfields above.
+ for (unsigned i = 0; i != 4; ++i)
+ Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff);
+ }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ bool is64 = getPointerSize() == 8;
+ return createELFObjectWriter(new PPCELFObjectWriter(
+ /*Is64Bit=*/is64,
+ OSType,
+ is64 ? ELF::EM_PPC64 : ELF::EM_PPC,
+ /*addend*/ true, /*isLittleEndian*/ false),
+ OS, /*IsLittleEndian=*/false);
+ }
+
+ virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+ return false;
+ }
+ };
+
} // end anonymous namespace
-TargetAsmBackend *llvm::createPPCAsmBackend(const Target &T,
- const std::string &TT) {
+MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, StringRef TT) {
if (Triple(TT).isOSDarwin())
return new DarwinPPCAsmBackend(T);
- return 0;
+ return new ELFPPCAsmBackend(T, Triple(TT).getOS());
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h
new file mode 100644
index 0000000..369bbdc
--- /dev/null
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h
@@ -0,0 +1,70 @@
+//===-- PPCBaseInfo.h - Top level definitions for PPC -------- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the PPC target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCBASEINFO_H
+#define PPCBASEINFO_H
+
+#include "PPCMCTargetDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+/// getPPCRegisterNumbering - Given the enum value for some register, e.g.
+/// PPC::F14, return the number that it corresponds to (e.g. 14).
+inline static unsigned getPPCRegisterNumbering(unsigned RegEnum) {
+ using namespace PPC;
+ switch (RegEnum) {
+ case 0: return 0;
+ case R0 : case X0 : case F0 : case V0 : case CR0: case CR0LT: return 0;
+ case R1 : case X1 : case F1 : case V1 : case CR1: case CR0GT: return 1;
+ case R2 : case X2 : case F2 : case V2 : case CR2: case CR0EQ: return 2;
+ case R3 : case X3 : case F3 : case V3 : case CR3: case CR0UN: return 3;
+ case R4 : case X4 : case F4 : case V4 : case CR4: case CR1LT: return 4;
+ case R5 : case X5 : case F5 : case V5 : case CR5: case CR1GT: return 5;
+ case R6 : case X6 : case F6 : case V6 : case CR6: case CR1EQ: return 6;
+ case R7 : case X7 : case F7 : case V7 : case CR7: case CR1UN: return 7;
+ case R8 : case X8 : case F8 : case V8 : case CR2LT: return 8;
+ case R9 : case X9 : case F9 : case V9 : case CR2GT: return 9;
+ case R10: case X10: case F10: case V10: case CR2EQ: return 10;
+ case R11: case X11: case F11: case V11: case CR2UN: return 11;
+ case R12: case X12: case F12: case V12: case CR3LT: return 12;
+ case R13: case X13: case F13: case V13: case CR3GT: return 13;
+ case R14: case X14: case F14: case V14: case CR3EQ: return 14;
+ case R15: case X15: case F15: case V15: case CR3UN: return 15;
+ case R16: case X16: case F16: case V16: case CR4LT: return 16;
+ case R17: case X17: case F17: case V17: case CR4GT: return 17;
+ case R18: case X18: case F18: case V18: case CR4EQ: return 18;
+ case R19: case X19: case F19: case V19: case CR4UN: return 19;
+ case R20: case X20: case F20: case V20: case CR5LT: return 20;
+ case R21: case X21: case F21: case V21: case CR5GT: return 21;
+ case R22: case X22: case F22: case V22: case CR5EQ: return 22;
+ case R23: case X23: case F23: case V23: case CR5UN: return 23;
+ case R24: case X24: case F24: case V24: case CR6LT: return 24;
+ case R25: case X25: case F25: case V25: case CR6GT: return 25;
+ case R26: case X26: case F26: case V26: case CR6EQ: return 26;
+ case R27: case X27: case F27: case V27: case CR6UN: return 27;
+ case R28: case X28: case F28: case V28: case CR7LT: return 28;
+ case R29: case X29: case F29: case V29: case CR7GT: return 29;
+ case R30: case X30: case F30: case V30: case CR7EQ: return 30;
+ case R31: case X31: case F31: case V31: case CR7UN: return 31;
+ default:
+ llvm_unreachable("Unhandled reg in PPCRegisterInfo::getRegisterNumbering!");
+ }
+}
+
+} // end namespace llvm;
+
+#endif
diff --git a/lib/Target/PowerPC/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index b3c889e..b3c889e 100644
--- a/lib/Target/PowerPC/PPCFixupKinds.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index b6dca83..e9424d8 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -31,6 +31,10 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
}
PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
+ if (is64Bit)
+ PointerSize = 8;
+ IsLittleEndian = false;
+
// ".comm align is in bytes but .align is pow-2."
AlignmentIsInBytes = false;
@@ -56,7 +60,7 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
ZeroDirective = "\t.space\t";
Data64bitsDirective = is64Bit ? "\t.quad\t" : 0;
- HasLCOMMDirective = true;
+ LCOMMDirectiveType = LCOMM::NoAlignment;
AssemblerDialect = 0; // Old-Style mnemonics.
}
diff --git a/lib/Target/PowerPC/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index cf73d86..262f97c3 100644
--- a/lib/Target/PowerPC/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -12,9 +12,8 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mccodeemitter"
-#include "PPC.h"
-#include "PPCRegisterInfo.h"
-#include "PPCFixupKinds.h"
+#include "MCTargetDesc/PPCBaseInfo.h"
+#include "MCTargetDesc/PPCFixupKinds.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCInst.h"
#include "llvm/ADT/Statistic.h"
@@ -170,7 +169,7 @@ get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
const MCOperand &MO = MI.getOperand(OpNo);
assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) &&
(MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
- return 0x80 >> PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+ return 0x80 >> getPPCRegisterNumbering(MO.getReg());
}
@@ -182,7 +181,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
// The GPR operand should come through here though.
assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) ||
MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
- return PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+ return getPPCRegisterNumbering(MO.getReg());
}
assert(MO.isImm() &&
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 02b887f..d5c8a9e 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -13,10 +13,14 @@
#include "PPCMCTargetDesc.h"
#include "PPCMCAsmInfo.h"
+#include "InstPrinter/PPCInstPrinter.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "PPCGenInstrInfo.inc"
@@ -35,11 +39,16 @@ static MCInstrInfo *createPPCMCInstrInfo() {
return X;
}
-extern "C" void LLVMInitializePowerPCMCInstrInfo() {
- TargetRegistry::RegisterMCInstrInfo(ThePPC32Target, createPPCMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(ThePPC64Target, createPPCMCInstrInfo);
-}
+static MCRegisterInfo *createPPCMCRegisterInfo(StringRef TT) {
+ Triple TheTriple(TT);
+ bool isPPC64 = (TheTriple.getArch() == Triple::ppc64);
+ unsigned Flavour = isPPC64 ? 0 : 1;
+ unsigned RA = isPPC64 ? PPC::LR8 : PPC::LR;
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitPPCMCRegisterInfo(X, RA, Flavour, Flavour);
+ return X;
+}
static MCSubtargetInfo *createPPCMCSubtargetInfo(StringRef TT, StringRef CPU,
StringRef FS) {
@@ -48,23 +57,95 @@ static MCSubtargetInfo *createPPCMCSubtargetInfo(StringRef TT, StringRef CPU,
return X;
}
-extern "C" void LLVMInitializePowerPCMCSubtargetInfo() {
+static MCAsmInfo *createPPCMCAsmInfo(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+ bool isPPC64 = TheTriple.getArch() == Triple::ppc64;
+
+ MCAsmInfo *MAI;
+ if (TheTriple.isOSDarwin())
+ MAI = new PPCMCAsmInfoDarwin(isPPC64);
+ else
+ MAI = new PPCLinuxMCAsmInfo(isPPC64);
+
+ // Initial state of the frame pointer is R1.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(PPC::R1, 0);
+ MAI->addInitialFrameState(0, Dst, Src);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+
+ if (RM == Reloc::Default) {
+ Triple T(TT);
+ if (T.isOSDarwin())
+ RM = Reloc::DynamicNoPIC;
+ else
+ RM = Reloc::Static;
+ }
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
+}
+
+// This is duplicated code. Refactor this.
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ if (Triple(TT).isOSDarwin())
+ return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
+
+ return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+}
+
+static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCSubtargetInfo &STI) {
+ return new PPCInstPrinter(MAI, SyntaxVariant);
+}
+
+extern "C" void LLVMInitializePowerPCTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn C(ThePPC32Target, createPPCMCAsmInfo);
+ RegisterMCAsmInfoFn D(ThePPC64Target, createPPCMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(ThePPC32Target, createPPCMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(ThePPC64Target, createPPCMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(ThePPC32Target, createPPCMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(ThePPC64Target, createPPCMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(ThePPC32Target, createPPCMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(ThePPC64Target, createPPCMCRegisterInfo);
+
+ // Register the MC subtarget info.
TargetRegistry::RegisterMCSubtargetInfo(ThePPC32Target,
createPPCMCSubtargetInfo);
TargetRegistry::RegisterMCSubtargetInfo(ThePPC64Target,
createPPCMCSubtargetInfo);
-}
-static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
- Triple TheTriple(TT);
- bool isPPC64 = TheTriple.getArch() == Triple::ppc64;
- if (TheTriple.isOSDarwin())
- return new PPCMCAsmInfoDarwin(isPPC64);
- return new PPCLinuxMCAsmInfo(isPPC64);
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterMCCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter);
-}
+ // Register the asm backend.
+ TargetRegistry::RegisterMCAsmBackend(ThePPC32Target, createPPCAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(ThePPC64Target, createPPCAsmBackend);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterMCObjectStreamer(ThePPC32Target, createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(ThePPC64Target, createMCStreamer);
-extern "C" void LLVMInitializePowerPCMCAsmInfo() {
- RegisterMCAsmInfoFn C(ThePPC32Target, createMCAsmInfo);
- RegisterMCAsmInfoFn D(ThePPC64Target, createMCAsmInfo);
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter);
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index cee2350..e5bf2a9 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -15,6 +15,10 @@
#define PPCMCTARGETDESC_H
namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
class MCSubtargetInfo;
class Target;
class StringRef;
@@ -22,6 +26,12 @@ class StringRef;
extern Target ThePPC32Target;
extern Target ThePPC64Target;
+MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCAsmBackend *createPPCAsmBackend(const Target &T, StringRef TT);
+
} // End llvm namespace
// Defines symbolic names for PowerPC registers. This defines a mapping from
diff --git a/lib/Target/PowerPC/PPCPredicates.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
index 12bb0a1..12bb0a1 100644
--- a/lib/Target/PowerPC/PPCPredicates.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
diff --git a/lib/Target/PowerPC/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index b2c8315..f872e86 100644
--- a/lib/Target/PowerPC/PPCPredicates.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -14,8 +14,6 @@
#ifndef LLVM_TARGET_POWERPC_PPCPREDICATES_H
#define LLVM_TARGET_POWERPC_PPCPREDICATES_H
-#include "PPC.h"
-
namespace llvm {
namespace PPC {
/// Predicate - These are "(BI << 5) | BO" for various predicates.
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index 7191dd1..5dc1863 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -15,6 +15,7 @@
#ifndef LLVM_TARGET_POWERPC_H
#define LLVM_TARGET_POWERPC_H
+#include "MCTargetDesc/PPCBaseInfo.h"
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include <string>
@@ -30,22 +31,12 @@ namespace llvm {
class MachineInstr;
class AsmPrinter;
class MCInst;
- class MCCodeEmitter;
- class MCContext;
- class MCInstrInfo;
- class MCSubtargetInfo;
class TargetMachine;
- class TargetAsmBackend;
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
JITCodeEmitter &MCE);
- MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
- const MCSubtargetInfo &STI,
- MCContext &Ctx);
- TargetAsmBackend *createPPCAsmBackend(const Target &, const std::string &);
-
void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP, bool isDarwin);
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index aabf494..2d5d302 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -43,9 +43,9 @@ def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true",
def FeatureGPUL : SubtargetFeature<"gpul","IsGigaProcessor", "true",
"Enable GPUL instructions">;
def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
- "Enable the fsqrt instruction">;
+ "Enable the fsqrt instruction">;
def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
- "Enable the stfiwx instruction">;
+ "Enable the stfiwx instruction">;
//===----------------------------------------------------------------------===//
// Register File Description
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 9de2200..9528459 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -18,9 +18,9 @@
#define DEBUG_TYPE "asmprinter"
#include "PPC.h"
-#include "PPCPredicates.h"
#include "PPCTargetMachine.h"
#include "PPCSubtarget.h"
+#include "MCTargetDesc/PPCPredicates.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
@@ -43,11 +43,11 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSet.h"
@@ -679,18 +679,8 @@ static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm,
return new PPCLinuxAsmPrinter(tm, Streamer);
}
-static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
- unsigned SyntaxVariant,
- const MCAsmInfo &MAI) {
- return new PPCInstPrinter(MAI, SyntaxVariant);
-}
-
-
// Force static initialization.
extern "C" void LLVMInitializePowerPCAsmPrinter() {
TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass);
TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass);
-
- TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter);
}
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index e161d23..475edf3 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -19,7 +19,7 @@
#include "PPC.h"
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
-#include "PPCPredicates.h"
+#include "MCTargetDesc/PPCPredicates.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/ADT/Statistic.h"
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index 42232a0..4a1f182 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -140,7 +140,7 @@ unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI,
const MachineOperand &MO = MI.getOperand(OpNo);
assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) &&
(MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
- return 0x80 >> PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+ return 0x80 >> getPPCRegisterNumbering(MO.getReg());
}
MachineRelocation PPCCodeEmitter::GetRelocation(const MachineOperand &MO,
@@ -250,7 +250,7 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
// The GPR operand should come through here though.
assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) ||
MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
- return PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+ return getPPCRegisterNumbering(MO.getReg());
}
assert(MO.isImm() &&
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 375e000..7dead10 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -109,14 +109,14 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
for (MachineRegisterInfo::livein_iterator
I = MF->getRegInfo().livein_begin(),
E = MF->getRegInfo().livein_end(); I != E; ++I) {
- unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(I->first);
+ unsigned RegNo = getPPCRegisterNumbering(I->first);
if (VRRegNo[RegNo] == I->first) // If this really is a vector reg.
UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
}
for (MachineRegisterInfo::liveout_iterator
I = MF->getRegInfo().liveout_begin(),
E = MF->getRegInfo().liveout_end(); I != E; ++I) {
- unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(*I);
+ unsigned RegNo = getPPCRegisterNumbering(*I);
if (VRRegNo[RegNo] == *I) // If this really is a vector reg.
UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
}
@@ -712,13 +712,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
-void PPCFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const {
- // Initial state of the frame pointer is R1.
- MachineLocation Dst(MachineLocation::VirtualFP);
- MachineLocation Src(PPC::R1, 0);
- Moves.push_back(MachineMove(0, Dst, Src));
-}
-
static bool spillsCR(const MachineFunction &MF) {
const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
return FuncInfo->isCRSpilled();
@@ -885,7 +878,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
}
- LowerBound -= (31 - PPCRegisterInfo::getRegisterNumbering(MinFPR) + 1) * 8;
+ LowerBound -= (31 - getPPCRegisterNumbering(MinFPR) + 1) * 8;
}
// Check whether the frame pointer register is allocated. If so, make sure it
@@ -919,8 +912,8 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
}
unsigned MinReg =
- std::min<unsigned>(PPCRegisterInfo::getRegisterNumbering(MinGPR),
- PPCRegisterInfo::getRegisterNumbering(MinG8R));
+ std::min<unsigned>(getPPCRegisterNumbering(MinGPR),
+ getPPCRegisterNumbering(MinG8R));
if (Subtarget.isPPC64()) {
LowerBound -= (31 - MinReg + 1) * 8;
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index 0c18de1..20faa71 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -40,7 +40,6 @@ public:
bool hasFP(const MachineFunction &MF) const;
bool needsFP(const MachineFunction &MF) const;
- void getInitialFrameState(std::vector<MachineMove> &Moves) const;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 2176c02..6f204cc 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -14,8 +14,8 @@
#define DEBUG_TYPE "ppc-codegen"
#include "PPC.h"
-#include "PPCPredicates.h"
#include "PPCTargetMachine.h"
+#include "MCTargetDesc/PPCPredicates.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 9741a39..d6b8a9e 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -14,8 +14,8 @@
#include "PPCISelLowering.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCPerfectShuffle.h"
-#include "PPCPredicates.h"
#include "PPCTargetMachine.h"
+#include "MCTargetDesc/PPCPredicates.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/VectorExtras.h"
#include "llvm/CodeGen/CallingConvLower.h"
@@ -211,7 +211,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::TRAP, MVT::Other, Legal);
// TRAMPOLINE is custom lowered.
- setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
@@ -365,7 +366,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
}
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
+
setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
setStackPointerRegisterToSaveRestore(PPC::X1);
@@ -401,12 +406,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
if (PPCSubTarget.isDarwin())
setPrefFunctionAlignment(4);
+ setInsertFencesForAtomic(true);
+
computeRegisterProperties();
}
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area.
-unsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const {
+unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
const TargetMachine &TM = getTargetMachine();
// Darwin passes everything on 4 byte boundary.
if (TM.getSubtarget<PPCSubtarget>().isDarwin())
@@ -463,7 +470,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
}
-MVT::SimpleValueType PPCTargetLowering::getSetCCResultType(EVT VT) const {
+EVT PPCTargetLowering::getSetCCResultType(EVT VT) const {
return MVT::i32;
}
@@ -1368,8 +1375,13 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), false, false, 0);
}
-SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+ return Op.getOperand(0);
+}
+
+SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue Trmp = Op.getOperand(1); // trampoline
SDValue FPtr = Op.getOperand(2); // nested function
@@ -1378,7 +1390,7 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op,
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
bool isPPC64 = (PtrVT == MVT::i64);
- const Type *IntPtrTy =
+ Type *IntPtrTy =
DAG.getTargetLoweringInfo().getTargetData()->getIntPtrType(
*DAG.getContext());
@@ -1398,16 +1410,13 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op,
// Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
std::pair<SDValue, SDValue> CallResult =
- LowerCallTo(Chain, Op.getValueType().getTypeForEVT(*DAG.getContext()),
+ LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()),
false, false, false, false, 0, CallingConv::C, false,
/*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__trampoline_setup", PtrVT),
Args, DAG, dl);
- SDValue Ops[] =
- { CallResult.first, CallResult.second };
-
- return DAG.getMergeValues(Ops, 2, dl);
+ return CallResult.second;
}
SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
@@ -2550,7 +2559,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
unsigned OpFlags = 0;
if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
- (!PPCSubTarget.getTargetTriple().isMacOSX() ||
+ (PPCSubTarget.getTargetTriple().isMacOSX() &&
PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
(G->getGlobal()->isDeclaration() ||
G->getGlobal()->isWeakForLinker())) {
@@ -2574,7 +2583,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
unsigned char OpFlags = 0;
if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
- (!PPCSubTarget.getTargetTriple().isMacOSX() ||
+ (PPCSubTarget.getTargetTriple().isMacOSX() &&
PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
@@ -2941,6 +2950,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
SmallVector<SDValue, 8> MemOpChains;
+ bool seenFloatArg = false;
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, j = 0, e = ArgLocs.size();
i != e;
@@ -2985,6 +2995,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
}
if (VA.isRegLoc()) {
+ seenFloatArg |= VA.getLocVT().isFloatingPoint();
// Put argument in a physical register.
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else {
@@ -3011,9 +3022,11 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains[0], MemOpChains.size());
- // Set CR6 to true if this is a vararg call.
+ // Set CR6 to true if this is a vararg call with floating args passed in
+ // registers.
if (isVarArg) {
- SDValue SetCR(DAG.getMachineNode(PPC::CRSET, dl, MVT::i32), 0);
+ SDValue SetCR(DAG.getMachineNode(seenFloatArg ? PPC::CRSET : PPC::CRUNSET,
+ dl, MVT::i32), 0);
RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR));
}
@@ -3403,6 +3416,17 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
Ins, InVals);
}
+bool
+PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+ RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, RetCC_PPC);
+}
+
SDValue
PPCTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
@@ -4490,7 +4514,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::GlobalTLSAddress: llvm_unreachable("TLS not implemented for PPC");
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
- case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG);
+ case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
+ case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::VASTART:
return LowerVASTART(Op, DAG, PPCSubTarget);
@@ -5504,7 +5529,7 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
// but allow it at the lowest weight.
if (CallOperandVal == NULL)
return CW_Default;
- const Type *type = CallOperandVal->getType();
+ Type *type = CallOperandVal->getType();
// Look at the constraint type.
switch (*constraint) {
default:
@@ -5634,7 +5659,7 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
// isLegalAddressingMode - Return true if the addressing mode represented
// by AM is legal for this target, for a load/store of the specified type.
bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- const Type *Ty) const {
+ Type *Ty) const {
// FIXME: PPC does not allow r+i addressing modes for vectors!
// PPC allows a sign-extended 16-bit immediate field.
@@ -5670,7 +5695,7 @@ bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
/// isLegalAddressImmediate - Return true if the integer value can be used
/// as the offset of the target addressing mode for load / store of the
/// given type.
-bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,const Type *Ty) const{
+bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,Type *Ty) const{
// PPC allows a sign-extended 16-bit immediate field.
return (V > -(1 << 16) && V < (1 << 16)-1);
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 986b4e7..430e45e 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -246,7 +246,7 @@ namespace llvm {
virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
/// getSetCCResultType - Return the ISD::SETCC ValueType
- virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+ virtual EVT getSetCCResultType(EVT VT) const;
/// getPreIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if the node's address
@@ -323,7 +323,7 @@ namespace llvm {
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. This is the actual
/// alignment, not its logarithm.
- unsigned getByValTypeAlignment(const Type *Ty) const;
+ unsigned getByValTypeAlignment(Type *Ty) const;
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
@@ -334,12 +334,12 @@ namespace llvm {
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
- virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
+ virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;
/// isLegalAddressImmediate - Return true if the integer value can be used
/// as the offset of the target addressing mode for load / store of the
/// given type.
- virtual bool isLegalAddressImmediate(int64_t V, const Type *Ty) const;
+ virtual bool isLegalAddressImmediate(int64_t V, Type *Ty) const;
/// isLegalAddressImmediate - Return true if the GlobalValue can be used as
/// the offset of the target addressing mode.
@@ -390,7 +390,8 @@ namespace llvm {
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
const PPCSubtarget &Subtarget) const;
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG,
@@ -444,6 +445,12 @@ namespace llvm {
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
+ virtual bool
+ CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const;
+
virtual SDValue
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 143444f..2bc109c 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -15,18 +15,18 @@
#include "PPC.h"
#include "PPCInstrBuilder.h"
#include "PPCMachineFunctionInfo.h"
-#include "PPCPredicates.h"
#include "PPCTargetMachine.h"
#include "PPCHazardRecognizers.h"
+#include "MCTargetDesc/PPCPredicates.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/STLExtras.h"
@@ -334,7 +334,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr*> &NewMIs) const{
DebugLoc DL;
- if (RC == PPC::GPRCRegisterClass) {
+ if (PPC::GPRCRegisterClass->hasSubClassEq(RC)) {
if (SrcReg != PPC::LR) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
.addReg(SrcReg,
@@ -350,7 +350,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)),
FrameIdx));
}
- } else if (RC == PPC::G8RCRegisterClass) {
+ } else if (PPC::G8RCRegisterClass->hasSubClassEq(RC)) {
if (SrcReg != PPC::LR8) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
.addReg(SrcReg,
@@ -366,17 +366,17 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)),
FrameIdx));
}
- } else if (RC == PPC::F8RCRegisterClass) {
+ } else if (PPC::F8RCRegisterClass->hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFD))
.addReg(SrcReg,
getKillRegState(isKill)),
FrameIdx));
- } else if (RC == PPC::F4RCRegisterClass) {
+ } else if (PPC::F4RCRegisterClass->hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFS))
.addReg(SrcReg,
getKillRegState(isKill)),
FrameIdx));
- } else if (RC == PPC::CRRCRegisterClass) {
+ } else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) {
if ((EnablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
(EnablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
// FIXME (64-bit): Enable
@@ -402,7 +402,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
// If the saved register wasn't CR0, shift the bits left so that they are
// in CR0's slot.
if (SrcReg != PPC::CR0) {
- unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(SrcReg)*4;
+ unsigned ShiftBits = getPPCRegisterNumbering(SrcReg)*4;
// rlwinm scratch, scratch, ShiftBits, 0, 31.
NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
.addReg(ScratchReg).addImm(ShiftBits)
@@ -414,7 +414,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)),
FrameIdx));
}
- } else if (RC == PPC::CRBITRCRegisterClass) {
+ } else if (PPC::CRBITRCRegisterClass->hasSubClassEq(RC)) {
// FIXME: We use CRi here because there is no mtcrf on a bit. Since the
// backend currently only uses CR1EQ as an individual bit, this should
// not cause any bug. If we need other uses of CR bits, the following
@@ -448,7 +448,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx,
PPC::CRRCRegisterClass, NewMIs);
- } else if (RC == PPC::VRRCRegisterClass) {
+ } else if (PPC::VRRCRegisterClass->hasSubClassEq(RC)) {
// We don't have indexed addressing for vector loads. Emit:
// R0 = ADDI FI#
// STVX VAL, 0, R0
@@ -499,7 +499,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr*> &NewMIs)const{
- if (RC == PPC::GPRCRegisterClass) {
+ if (PPC::GPRCRegisterClass->hasSubClassEq(RC)) {
if (DestReg != PPC::LR) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
DestReg), FrameIdx));
@@ -508,7 +508,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
PPC::R11), FrameIdx));
NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR)).addReg(PPC::R11));
}
- } else if (RC == PPC::G8RCRegisterClass) {
+ } else if (PPC::G8RCRegisterClass->hasSubClassEq(RC)) {
if (DestReg != PPC::LR8) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
FrameIdx));
@@ -517,13 +517,13 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
PPC::R11), FrameIdx));
NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::R11));
}
- } else if (RC == PPC::F8RCRegisterClass) {
+ } else if (PPC::F8RCRegisterClass->hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg),
FrameIdx));
- } else if (RC == PPC::F4RCRegisterClass) {
+ } else if (PPC::F4RCRegisterClass->hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
FrameIdx));
- } else if (RC == PPC::CRRCRegisterClass) {
+ } else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) {
// FIXME: We need a scatch reg here. The trouble with using R0 is that
// it's possible for the stack frame to be so big the save location is
// out of range of immediate offsets, necessitating another register.
@@ -537,7 +537,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
// If the reloaded register isn't CR0, shift the bits right so that they are
// in the right CR's slot.
if (DestReg != PPC::CR0) {
- unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(DestReg)*4;
+ unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
// rlwinm r11, r11, 32-ShiftBits, 0, 31.
NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
.addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
@@ -546,7 +546,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg)
.addReg(ScratchReg));
- } else if (RC == PPC::CRBITRCRegisterClass) {
+ } else if (PPC::CRBITRCRegisterClass->hasSubClassEq(RC)) {
unsigned Reg = 0;
if (DestReg == PPC::CR0LT || DestReg == PPC::CR0GT ||
@@ -577,7 +577,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx,
PPC::CRRCRegisterClass, NewMIs);
- } else if (RC == PPC::VRRCRegisterClass) {
+ } else if (PPC::VRRCRegisterClass->hasSubClassEq(RC)) {
// We don't have indexed addressing for vector loads. Emit:
// R0 = ADDI FI#
// Dest = LVX 0, R0
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 773578c..f248b5b 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1053,6 +1053,10 @@ def CRSET : XLForm_1_ext<19, 289, (outs CRBITRC:$dst), (ins),
"creqv $dst, $dst, $dst", BrCR,
[]>;
+def CRUNSET: XLForm_1_ext<19, 193, (outs CRBITRC:$dst), (ins),
+ "crxor $dst, $dst, $dst", BrCR,
+ []>;
+
// XFX-Form instructions. Instructions that deal with SPRs.
//
let Uses = [CTR] in {
@@ -1472,5 +1476,7 @@ def : Pat<(membarrier (i32 imm /*ll*/),
(i32 imm /*device*/)),
(SYNC)>;
+def : Pat<(atomic_fence (imm), (imm)), (SYNC)>;
+
include "PPCInstrAltivec.td"
include "PPCInstr64Bit.td"
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 9c2428b..2e90b7a 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -28,7 +28,6 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Target/TargetFrameLowering.h"
@@ -68,52 +67,12 @@ PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const {
(EnablePPC64RS && Subtarget.isPPC64()));
}
-/// getRegisterNumbering - Given the enum value for some register, e.g.
-/// PPC::F14, return the number that it corresponds to (e.g. 14).
-unsigned PPCRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
- using namespace PPC;
- switch (RegEnum) {
- case 0: return 0;
- case R0 : case X0 : case F0 : case V0 : case CR0: case CR0LT: return 0;
- case R1 : case X1 : case F1 : case V1 : case CR1: case CR0GT: return 1;
- case R2 : case X2 : case F2 : case V2 : case CR2: case CR0EQ: return 2;
- case R3 : case X3 : case F3 : case V3 : case CR3: case CR0UN: return 3;
- case R4 : case X4 : case F4 : case V4 : case CR4: case CR1LT: return 4;
- case R5 : case X5 : case F5 : case V5 : case CR5: case CR1GT: return 5;
- case R6 : case X6 : case F6 : case V6 : case CR6: case CR1EQ: return 6;
- case R7 : case X7 : case F7 : case V7 : case CR7: case CR1UN: return 7;
- case R8 : case X8 : case F8 : case V8 : case CR2LT: return 8;
- case R9 : case X9 : case F9 : case V9 : case CR2GT: return 9;
- case R10: case X10: case F10: case V10: case CR2EQ: return 10;
- case R11: case X11: case F11: case V11: case CR2UN: return 11;
- case R12: case X12: case F12: case V12: case CR3LT: return 12;
- case R13: case X13: case F13: case V13: case CR3GT: return 13;
- case R14: case X14: case F14: case V14: case CR3EQ: return 14;
- case R15: case X15: case F15: case V15: case CR3UN: return 15;
- case R16: case X16: case F16: case V16: case CR4LT: return 16;
- case R17: case X17: case F17: case V17: case CR4GT: return 17;
- case R18: case X18: case F18: case V18: case CR4EQ: return 18;
- case R19: case X19: case F19: case V19: case CR4UN: return 19;
- case R20: case X20: case F20: case V20: case CR5LT: return 20;
- case R21: case X21: case F21: case V21: case CR5GT: return 21;
- case R22: case X22: case F22: case V22: case CR5EQ: return 22;
- case R23: case X23: case F23: case V23: case CR5UN: return 23;
- case R24: case X24: case F24: case V24: case CR6LT: return 24;
- case R25: case X25: case F25: case V25: case CR6GT: return 25;
- case R26: case X26: case F26: case V26: case CR6EQ: return 26;
- case R27: case X27: case F27: case V27: case CR6UN: return 27;
- case R28: case X28: case F28: case V28: case CR7LT: return 28;
- case R29: case X29: case F29: case V29: case CR7GT: return 29;
- case R30: case X30: case F30: case V30: case CR7EQ: return 30;
- case R31: case X31: case F31: case V31: case CR7UN: return 31;
- default:
- llvm_unreachable("Unhandled reg in PPCRegisterInfo::getRegisterNumbering!");
- }
-}
-
PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
const TargetInstrInfo &tii)
- : PPCGenRegisterInfo(), Subtarget(ST), TII(tii) {
+ : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR,
+ ST.isPPC64() ? 0 : 1,
+ ST.isPPC64() ? 0 : 1),
+ Subtarget(ST), TII(tii) {
ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX;
ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX;
ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX;
@@ -519,7 +478,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
// rlwinm rA, rA, ShiftBits, 0, 31.
BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg)
.addReg(Reg, RegState::Kill)
- .addImm(PPCRegisterInfo::getRegisterNumbering(SrcReg) * 4)
+ .addImm(getPPCRegisterNumbering(SrcReg) * 4)
.addImm(0)
.addImm(31);
@@ -668,10 +627,6 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false);
}
-unsigned PPCRegisterInfo::getRARegister() const {
- return !Subtarget.isPPC64() ? PPC::LR : PPC::LR8;
-}
-
unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
@@ -688,27 +643,3 @@ unsigned PPCRegisterInfo::getEHExceptionRegister() const {
unsigned PPCRegisterInfo::getEHHandlerRegister() const {
return !Subtarget.isPPC64() ? PPC::R4 : PPC::X4;
}
-
-/// DWARFFlavour - Flavour of dwarf regnumbers
-///
-namespace DWARFFlavour {
- enum {
- PPC64 = 0, PPC32 = 1
- };
-}
-
-int PPCRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
- // FIXME: Most probably dwarf numbers differs for Linux and Darwin
- unsigned Flavour = Subtarget.isPPC64() ?
- DWARFFlavour::PPC64 : DWARFFlavour::PPC32;
-
- return PPCGenRegisterInfo::getDwarfRegNumFull(RegNum, Flavour);
-}
-
-int PPCRegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const {
- // FIXME: Most probably dwarf numbers differs for Linux and Darwin
- unsigned Flavour = Subtarget.isPPC64() ?
- DWARFFlavour::PPC64 : DWARFFlavour::PPC32;
-
- return PPCGenRegisterInfo::getLLVMRegNumFull(RegNum, Flavour);
-}
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 33fe5eb..1cc7213 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -33,10 +33,6 @@ class PPCRegisterInfo : public PPCGenRegisterInfo {
public:
PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii);
- /// getRegisterNumbering - Given the enum value for some register, e.g.
- /// PPC::F14, return the number that it corresponds to (e.g. 14).
- static unsigned getRegisterNumbering(unsigned RegEnum);
-
/// getPointerRegClass - Return the register class to use to hold pointers.
/// This is used for addressing modes.
virtual const TargetRegisterClass *getPointerRegClass(unsigned Kind=0) const;
@@ -62,15 +58,11 @@ public:
int SPAdj, RegScavenger *RS = NULL) const;
// Debug information queries.
- unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
// Exception handling queries.
unsigned getEHExceptionRegister() const;
unsigned getEHHandlerRegister() const;
-
- int getDwarfRegNum(unsigned RegNum, bool isEH) const;
- int getLLVMRegNum(unsigned RegNum, bool isEH) const;
};
} // end namespace llvm
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 5ea9b0f..cf194de 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -15,7 +15,7 @@
#include "PPC.h"
#include "llvm/GlobalValue.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#include <cstdlib>
#define GET_SUBTARGETINFO_TARGET_DESC
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index e0ea5ad..f5744b8 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -16,76 +16,43 @@
#include "llvm/PassManager.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
-// This is duplicated code. Refactor this.
-static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
- MCContext &Ctx, TargetAsmBackend &TAB,
- raw_ostream &OS,
- MCCodeEmitter *Emitter,
- bool RelaxAll,
- bool NoExecStack) {
- if (Triple(TT).isOSDarwin())
- return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
-
- return NULL;
-}
-
extern "C" void LLVMInitializePowerPCTarget() {
// Register the targets
RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target);
-
- // Register the MC Code Emitter
- TargetRegistry::RegisterCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter);
- TargetRegistry::RegisterCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter);
-
-
- // Register the asm backend.
- TargetRegistry::RegisterAsmBackend(ThePPC32Target, createPPCAsmBackend);
- TargetRegistry::RegisterAsmBackend(ThePPC64Target, createPPCAsmBackend);
-
- // Register the object streamer.
- TargetRegistry::RegisterObjectStreamer(ThePPC32Target, createMCStreamer);
- TargetRegistry::RegisterObjectStreamer(ThePPC64Target, createMCStreamer);
}
-
-PPCTargetMachine::PPCTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU,
- const std::string &FS, bool is64Bit)
- : LLVMTargetMachine(T, TT, CPU, FS),
+PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM,
+ bool is64Bit)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
Subtarget(TT, CPU, FS, is64Bit),
DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this),
FrameLowering(Subtarget), JITInfo(*this, is64Bit),
TLInfo(*this), TSInfo(*this),
InstrItins(Subtarget.getInstrItineraryData()) {
-
- if (getRelocationModel() == Reloc::Default) {
- if (Subtarget.isDarwin())
- setRelocationModel(Reloc::DynamicNoPIC);
- else
- setRelocationModel(Reloc::Static);
- }
}
/// Override this for PowerPC. Tail merging happily breaks up instruction issue
/// groups, which typically degrades performance.
bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; }
-PPC32TargetMachine::PPC32TargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : PPCTargetMachine(T, TT, CPU, FS, false) {
+PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : PPCTargetMachine(T, TT, CPU, FS, RM, CM, false) {
}
-PPC64TargetMachine::PPC64TargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : PPCTargetMachine(T, TT, CPU, FS, true) {
+PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : PPCTargetMachine(T, TT, CPU, FS, RM, CM, true) {
}
@@ -110,19 +77,11 @@ bool PPCTargetMachine::addPreEmitPass(PassManagerBase &PM,
bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
CodeGenOpt::Level OptLevel,
JITCodeEmitter &JCE) {
- // The JIT should use the static relocation model in ppc32 mode, PIC in ppc64.
// FIXME: This should be moved to TargetJITInfo!!
- if (Subtarget.isPPC64()) {
- // We use PIC codegen in ppc64 mode, because otherwise we'd have to use many
- // instructions to materialize arbitrary global variable + function +
- // constant pool addresses.
- setRelocationModel(Reloc::PIC_);
+ if (Subtarget.isPPC64())
// Temporary workaround for the inability of PPC64 JIT to handle jump
// tables.
DisableJumpTables = true;
- } else {
- setRelocationModel(Reloc::Static);
- }
// Inform the subtarget that we are in JIT mode. FIXME: does this break macho
// writing?
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index baf07e3..d06f084 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -40,9 +40,9 @@ class PPCTargetMachine : public LLVMTargetMachine {
InstrItineraryData InstrItins;
public:
- PPCTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS,
- bool is64Bit);
+ PPCTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM, bool is64Bit);
virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const PPCFrameLowering *getFrameLowering() const {
@@ -77,16 +77,18 @@ public:
///
class PPC32TargetMachine : public PPCTargetMachine {
public:
- PPC32TargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ PPC32TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
};
/// PPC64TargetMachine - PowerPC 64-bit target machine.
///
class PPC64TargetMachine : public PPCTargetMachine {
public:
- PPC64TargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ PPC64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
};
} // end namespace llvm
diff --git a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
index 058d599..f63111f 100644
--- a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
+++ b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
@@ -4,4 +4,10 @@ add_llvm_library(LLVMPowerPCInfo
PowerPCTargetInfo.cpp
)
-add_dependencies(LLVMPowerPCInfo PowerPCCodeGenTable_gen)
+add_llvm_library_dependencies(LLVMPowerPCInfo
+ LLVMMC
+ LLVMSupport
+ LLVMTarget
+ )
+
+add_dependencies(LLVMPowerPCInfo PowerPCCommonTableGen)
diff --git a/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
index ad607d0..5dc8568 100644
--- a/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
+++ b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
@@ -9,7 +9,7 @@
#include "PPC.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
Target llvm::ThePPC32Target, llvm::ThePPC64Target;
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 4cc9534..1f69ffb 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -2348,3 +2348,13 @@ which can do this in a single operation (instruction or libcall). It is
probably best to do this in the code generator.
//===---------------------------------------------------------------------===//
+
+unsigned foo(unsigned x, unsigned y) { return (x & y) == 0 || x == 0; }
+should fold to (x & y) == 0.
+
+//===---------------------------------------------------------------------===//
+
+unsigned foo(unsigned x, unsigned y) { return x > y && x != 0; }
+should fold to x > y.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt
index c77ded4..5b87849 100644
--- a/lib/Target/Sparc/CMakeLists.txt
+++ b/lib/Target/Sparc/CMakeLists.txt
@@ -1,11 +1,12 @@
set(LLVM_TARGET_DEFINITIONS Sparc.td)
-tablegen(SparcGenRegisterInfo.inc -gen-register-info)
-tablegen(SparcGenInstrInfo.inc -gen-instr-info)
-tablegen(SparcGenAsmWriter.inc -gen-asm-writer)
-tablegen(SparcGenDAGISel.inc -gen-dag-isel)
-tablegen(SparcGenSubtargetInfo.inc -gen-subtarget)
-tablegen(SparcGenCallingConv.inc -gen-callingconv)
+llvm_tablegen(SparcGenRegisterInfo.inc -gen-register-info)
+llvm_tablegen(SparcGenInstrInfo.inc -gen-instr-info)
+llvm_tablegen(SparcGenAsmWriter.inc -gen-asm-writer)
+llvm_tablegen(SparcGenDAGISel.inc -gen-dag-isel)
+llvm_tablegen(SparcGenSubtargetInfo.inc -gen-subtarget)
+llvm_tablegen(SparcGenCallingConv.inc -gen-callingconv)
+add_public_tablegen_target(SparcCommonTableGen)
add_llvm_target(SparcCodeGen
DelaySlotFiller.cpp
@@ -21,5 +22,17 @@ add_llvm_target(SparcCodeGen
SparcSelectionDAGInfo.cpp
)
+add_llvm_library_dependencies(LLVMSparcCodeGen
+ LLVMAsmPrinter
+ LLVMCodeGen
+ LLVMCore
+ LLVMMC
+ LLVMSelectionDAG
+ LLVMSparcDesc
+ LLVMSparcInfo
+ LLVMSupport
+ LLVMTarget
+ )
+
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt b/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
index 1e8c029..d3bdf0b 100644
--- a/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
@@ -2,3 +2,11 @@ add_llvm_library(LLVMSparcDesc
SparcMCTargetDesc.cpp
SparcMCAsmInfo.cpp
)
+
+add_llvm_library_dependencies(LLVMSparcDesc
+ LLVMMC
+ LLVMSparcInfo
+ LLVMSupport
+ )
+
+add_dependencies(LLVMSparcDesc SparcCommonTableGen)
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index cb92a2b..cb2a7df 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -13,10 +13,11 @@
#include "SparcMCTargetDesc.h"
#include "SparcMCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "SparcGenInstrInfo.inc"
@@ -35,8 +36,10 @@ static MCInstrInfo *createSparcMCInstrInfo() {
return X;
}
-extern "C" void LLVMInitializeSparcMCInstrInfo() {
- TargetRegistry::RegisterMCInstrInfo(TheSparcTarget, createSparcMCInstrInfo);
+static MCRegisterInfo *createSparcMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitSparcMCRegisterInfo(X, SP::I7);
+ return X;
}
static MCSubtargetInfo *createSparcMCSubtargetInfo(StringRef TT, StringRef CPU,
@@ -46,12 +49,31 @@ static MCSubtargetInfo *createSparcMCSubtargetInfo(StringRef TT, StringRef CPU,
return X;
}
-extern "C" void LLVMInitializeSparcMCSubtargetInfo() {
- TargetRegistry::RegisterMCSubtargetInfo(TheSparcTarget,
- createSparcMCSubtargetInfo);
+static MCCodeGenInfo *createSparcMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
}
-extern "C" void LLVMInitializeSparcMCAsmInfo() {
+extern "C" void LLVMInitializeSparcTargetMC() {
+ // Register the MC asm info.
RegisterMCAsmInfo<SparcELFMCAsmInfo> X(TheSparcTarget);
RegisterMCAsmInfo<SparcELFMCAsmInfo> Y(TheSparcV9Target);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheSparcTarget,
+ createSparcMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheSparcV9Target,
+ createSparcMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheSparcTarget, createSparcMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheSparcTarget, createSparcMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheSparcTarget,
+ createSparcMCSubtargetInfo);
}
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
index edde842..345e1bc 100644
--- a/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -22,9 +22,9 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 6f30d3f..d70b163 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -631,8 +631,8 @@ SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const
assert(CalleeFn->hasStructRetAttr() &&
"Callee does not have the StructRet attribute.");
- const PointerType *Ty = cast<PointerType>(CalleeFn->arg_begin()->getType());
- const Type *ElementTy = Ty->getElementType();
+ PointerType *Ty = cast<PointerType>(CalleeFn->arg_begin()->getType());
+ Type *ElementTy = Ty->getElementType();
return getTargetData()->getTypeAllocSize(ElementTy);
}
@@ -748,8 +748,10 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
- // SPARC has no intrinsics for these particular operations.
+ // FIXME: There are instructions available for ATOMIC_FENCE
+ // on SparcV8 and later.
setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index 4e3ddf8..7a6bf50 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -17,8 +17,8 @@
#include "SparcSubtarget.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index 0acdd2c..8c16251 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -17,7 +17,6 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Type.h"
@@ -31,7 +30,7 @@ using namespace llvm;
SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st,
const TargetInstrInfo &tii)
- : SparcGenRegisterInfo(), Subtarget(st), TII(tii) {
+ : SparcGenRegisterInfo(SP::I7), Subtarget(st), TII(tii) {
}
const unsigned* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
@@ -113,10 +112,6 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
void SparcRegisterInfo::
processFunctionBeforeFrameFinalized(MachineFunction &MF) const {}
-unsigned SparcRegisterInfo::getRARegister() const {
- return SP::I7;
-}
-
unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return SP::I6;
}
@@ -130,11 +125,3 @@ unsigned SparcRegisterInfo::getEHHandlerRegister() const {
llvm_unreachable("What is the exception handler register");
return 0;
}
-
-int SparcRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
- return SparcGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
-}
-
-int SparcRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
- return SparcGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0);
-}
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index ec9e63a..f845667 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -46,15 +46,11 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
// Debug information queries.
- unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
// Exception handling queries.
unsigned getEHExceptionRegister() const;
unsigned getEHHandlerRegister() const;
-
- int getDwarfRegNum(unsigned RegNum, bool isEH) const;
- int getLLVMRegNum(unsigned RegNum, bool isEH) const;
};
} // end namespace llvm
diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp
index de647e8..6c501cf 100644
--- a/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/lib/Target/Sparc/SparcSubtarget.cpp
@@ -13,7 +13,7 @@
#include "SparcSubtarget.h"
#include "Sparc.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index cbe6d87..3d7b4a4 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -13,7 +13,7 @@
#include "Sparc.h"
#include "SparcTargetMachine.h"
#include "llvm/PassManager.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
extern "C" void LLVMInitializeSparcTarget() {
@@ -24,10 +24,11 @@ extern "C" void LLVMInitializeSparcTarget() {
/// SparcTargetMachine ctor - Create an ILP32 architecture model
///
-SparcTargetMachine::SparcTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU,
- const std::string &FS, bool is64bit)
- : LLVMTargetMachine(T, TT, CPU, FS),
+SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM,
+ bool is64bit)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
Subtarget(TT, CPU, FS, is64bit),
DataLayout(Subtarget.getDataLayout()),
TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget),
@@ -51,15 +52,15 @@ bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM,
}
SparcV8TargetMachine::SparcV8TargetMachine(const Target &T,
- const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : SparcTargetMachine(T, TT, CPU, FS, false) {
+ StringRef TT, StringRef CPU,
+ StringRef FS, Reloc::Model RM,
+ CodeModel::Model CM)
+ : SparcTargetMachine(T, TT, CPU, FS, RM, CM, false) {
}
SparcV9TargetMachine::SparcV9TargetMachine(const Target &T,
- const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : SparcTargetMachine(T, TT, CPU, FS, true) {
+ StringRef TT, StringRef CPU,
+ StringRef FS, Reloc::Model RM,
+ CodeModel::Model CM)
+ : SparcTargetMachine(T, TT, CPU, FS, RM, CM, true) {
}
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 799fc49..3c907dd 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -33,9 +33,9 @@ class SparcTargetMachine : public LLVMTargetMachine {
SparcInstrInfo InstrInfo;
SparcFrameLowering FrameLowering;
public:
- SparcTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS,
- bool is64bit);
+ SparcTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM, bool is64bit);
virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const TargetFrameLowering *getFrameLowering() const {
@@ -62,16 +62,18 @@ public:
///
class SparcV8TargetMachine : public SparcTargetMachine {
public:
- SparcV8TargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ SparcV8TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
};
/// SparcV9TargetMachine - Sparc 64-bit target machine
///
class SparcV9TargetMachine : public SparcTargetMachine {
public:
- SparcV9TargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ SparcV9TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
};
} // end namespace llvm
diff --git a/lib/Target/Sparc/TargetInfo/CMakeLists.txt b/lib/Target/Sparc/TargetInfo/CMakeLists.txt
index 870b56a..a076023 100644
--- a/lib/Target/Sparc/TargetInfo/CMakeLists.txt
+++ b/lib/Target/Sparc/TargetInfo/CMakeLists.txt
@@ -4,4 +4,10 @@ add_llvm_library(LLVMSparcInfo
SparcTargetInfo.cpp
)
-add_dependencies(LLVMSparcInfo SparcCodeGenTable_gen)
+add_llvm_library_dependencies(LLVMSparcInfo
+ LLVMMC
+ LLVMSupport
+ LLVMTarget
+ )
+
+add_dependencies(LLVMSparcInfo SparcCommonTableGen)
diff --git a/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
index 5c06f07..c9d5b7b 100644
--- a/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
+++ b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
@@ -9,7 +9,7 @@
#include "Sparc.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
Target llvm::TheSparcTarget;
diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt
index f4bdbd8..7c09c0e 100644
--- a/lib/Target/SystemZ/CMakeLists.txt
+++ b/lib/Target/SystemZ/CMakeLists.txt
@@ -1,11 +1,12 @@
set(LLVM_TARGET_DEFINITIONS SystemZ.td)
-tablegen(SystemZGenRegisterInfo.inc -gen-register-info)
-tablegen(SystemZGenInstrInfo.inc -gen-instr-info)
-tablegen(SystemZGenAsmWriter.inc -gen-asm-writer)
-tablegen(SystemZGenDAGISel.inc -gen-dag-isel)
-tablegen(SystemZGenCallingConv.inc -gen-callingconv)
-tablegen(SystemZGenSubtargetInfo.inc -gen-subtarget)
+llvm_tablegen(SystemZGenRegisterInfo.inc -gen-register-info)
+llvm_tablegen(SystemZGenInstrInfo.inc -gen-instr-info)
+llvm_tablegen(SystemZGenAsmWriter.inc -gen-asm-writer)
+llvm_tablegen(SystemZGenDAGISel.inc -gen-dag-isel)
+llvm_tablegen(SystemZGenCallingConv.inc -gen-callingconv)
+llvm_tablegen(SystemZGenSubtargetInfo.inc -gen-subtarget)
+add_public_tablegen_target(SystemZCommonTableGen)
add_llvm_target(SystemZCodeGen
SystemZAsmPrinter.cpp
@@ -19,5 +20,17 @@ add_llvm_target(SystemZCodeGen
SystemZSelectionDAGInfo.cpp
)
+add_llvm_library_dependencies(LLVMSystemZCodeGen
+ LLVMAsmPrinter
+ LLVMCodeGen
+ LLVMCore
+ LLVMMC
+ LLVMSelectionDAG
+ LLVMSupport
+ LLVMSystemZDesc
+ LLVMSystemZInfo
+ LLVMTarget
+ )
+
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt b/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt
index 2ac9016..822df09 100644
--- a/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt
@@ -3,5 +3,12 @@ add_llvm_library(LLVMSystemZDesc
SystemZMCAsmInfo.cpp
)
+add_llvm_library_dependencies(LLVMSystemZDesc
+ LLVMMC
+ LLVMSystemZInfo
+ )
+
+add_dependencies(LLVMSystemZDesc SystemZCommonTableGen)
+
# Hack: we need to include 'main' target directory to grab private headers
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index 5a826a6..23fb1e0 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -13,10 +13,11 @@
#include "SystemZMCTargetDesc.h"
#include "SystemZMCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "SystemZGenInstrInfo.inc"
@@ -35,9 +36,10 @@ static MCInstrInfo *createSystemZMCInstrInfo() {
return X;
}
-extern "C" void LLVMInitializeSystemZMCInstrInfo() {
- TargetRegistry::RegisterMCInstrInfo(TheSystemZTarget,
- createSystemZMCInstrInfo);
+static MCRegisterInfo *createSystemZMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitSystemZMCRegisterInfo(X, 0);
+ return X;
}
static MCSubtargetInfo *createSystemZMCSubtargetInfo(StringRef TT,
@@ -48,11 +50,32 @@ static MCSubtargetInfo *createSystemZMCSubtargetInfo(StringRef TT,
return X;
}
-extern "C" void LLVMInitializeSystemZMCSubtargetInfo() {
- TargetRegistry::RegisterMCSubtargetInfo(TheSystemZTarget,
- createSystemZMCSubtargetInfo);
+static MCCodeGenInfo *createSystemZMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ if (RM == Reloc::Default)
+ RM = Reloc::Static;
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
}
-extern "C" void LLVMInitializeSystemZMCAsmInfo() {
+extern "C" void LLVMInitializeSystemZTargetMC() {
+ // Register the MC asm info.
RegisterMCAsmInfo<SystemZMCAsmInfo> X(TheSystemZTarget);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheSystemZTarget,
+ createSystemZMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheSystemZTarget,
+ createSystemZMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheSystemZTarget,
+ createSystemZMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheSystemZTarget,
+ createSystemZMCSubtargetInfo);
}
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index fd4d8b7..43dcdfc 100644
--- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -28,10 +28,8 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 871c297..48ca99f 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -81,6 +81,7 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) :
setSchedulingPreference(Sched::RegPressure);
setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 99e2730..5f3dd80 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -21,8 +21,8 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_CTOR
#include "SystemZGenInstrInfo.inc"
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index 11a39fc..580d65b 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -478,7 +478,7 @@ def MOV64rmm : RSYI<0x04EB,
"lmg\t{$from, $to, $dst}",
[]>;
-let isReMaterializable = 1, isAsCheapAsAMove = 1,
+let isReMaterializable = 1, neverHasSideEffects = 1, isAsCheapAsAMove = 1,
Constraints = "$src = $dst" in {
def MOV64Pr0_even : Pseudo<(outs GR64P:$dst), (ins GR64P:$src),
"lhi\t${dst:subreg_even}, 0",
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 59692e8..b1050d4 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -33,7 +33,7 @@ using namespace llvm;
SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm,
const SystemZInstrInfo &tii)
- : SystemZGenRegisterInfo(), TM(tm), TII(tii) {
+ : SystemZGenRegisterInfo(0), TM(tm), TII(tii) {
}
const unsigned*
@@ -126,11 +126,6 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(i+1).ChangeToImmediate(Offset);
}
-unsigned SystemZRegisterInfo::getRARegister() const {
- assert(0 && "What is the return address register");
- return 0;
-}
-
unsigned
SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
assert(0 && "What is the frame register");
@@ -146,13 +141,3 @@ unsigned SystemZRegisterInfo::getEHHandlerRegister() const {
assert(0 && "What is the exception handler register");
return 0;
}
-
-int SystemZRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
- assert(0 && "What is the dwarf register number");
- return -1;
-}
-
-int SystemZRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
- assert(0 && "What is the dwarf register number");
- return -1;
-}
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index 2e262e1..03935b2 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -48,15 +48,11 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
int SPAdj, RegScavenger *RS = NULL) const;
// Debug information queries.
- unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
// Exception handling queries.
unsigned getEHExceptionRegister() const;
unsigned getEHHandlerRegister() const;
-
- int getDwarfRegNum(unsigned RegNum, bool isEH) const;
- int getLLVMRegNum(unsigned RegNum, bool isEH) const;
};
} // end namespace llvm
diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp
index b3ed066..0845510 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -15,7 +15,7 @@
#include "SystemZ.h"
#include "llvm/GlobalValue.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 48298cc..e390f06 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -10,7 +10,7 @@
#include "SystemZTargetMachine.h"
#include "SystemZ.h"
#include "llvm/PassManager.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
extern "C" void LLVMInitializeSystemZTarget() {
@@ -21,18 +21,15 @@ extern "C" void LLVMInitializeSystemZTarget() {
/// SystemZTargetMachine ctor - Create an ILP64 architecture model
///
SystemZTargetMachine::SystemZTargetMachine(const Target &T,
- const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : LLVMTargetMachine(T, TT, CPU, FS),
+ StringRef TT, StringRef CPU,
+ StringRef FS, Reloc::Model RM,
+ CodeModel::Model CM)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
Subtarget(TT, CPU, FS),
DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32"
"-f64:64:64-f128:128:128-a0:16:16-n32:64"),
InstrInfo(*this), TLInfo(*this), TSInfo(*this),
FrameLowering(Subtarget) {
-
- if (getRelocationModel() == Reloc::Default)
- setRelocationModel(Reloc::Static);
}
bool SystemZTargetMachine::addInstSelector(PassManagerBase &PM,
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index e40b556..43dce4b 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -37,8 +37,9 @@ class SystemZTargetMachine : public LLVMTargetMachine {
SystemZSelectionDAGInfo TSInfo;
SystemZFrameLowering FrameLowering;
public:
- SystemZTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ SystemZTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
virtual const TargetFrameLowering *getFrameLowering() const {
return &FrameLowering;
diff --git a/lib/Target/SystemZ/TargetInfo/CMakeLists.txt b/lib/Target/SystemZ/TargetInfo/CMakeLists.txt
index 743d8d3..3180708 100644
--- a/lib/Target/SystemZ/TargetInfo/CMakeLists.txt
+++ b/lib/Target/SystemZ/TargetInfo/CMakeLists.txt
@@ -4,4 +4,10 @@ add_llvm_library(LLVMSystemZInfo
SystemZTargetInfo.cpp
)
-add_dependencies(LLVMSystemZInfo SystemZCodeGenTable_gen)
+add_llvm_library_dependencies(LLVMSystemZInfo
+ LLVMMC
+ LLVMSupport
+ LLVMTarget
+ )
+
+add_dependencies(LLVMSystemZInfo SystemZCommonTableGen)
diff --git a/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
index 8272b11..da99282 100644
--- a/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
+++ b/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
@@ -9,7 +9,7 @@
#include "SystemZ.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
Target llvm::TheSystemZTarget;
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
index a42ce54..a2b83bc 100644
--- a/lib/Target/Target.cpp
+++ b/lib/Target/Target.cpp
@@ -17,6 +17,7 @@
#include "llvm/InitializePasses.h"
#include "llvm/PassManager.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/LLVMContext.h"
#include <cstring>
@@ -39,6 +40,11 @@ void LLVMAddTargetData(LLVMTargetDataRef TD, LLVMPassManagerRef PM) {
unwrap(PM)->add(new TargetData(*unwrap(TD)));
}
+void LLVMAddTargetLibraryInfo(LLVMTargetLibraryInfoRef TLI,
+ LLVMPassManagerRef PM) {
+ unwrap(PM)->add(new TargetLibraryInfo(*unwrap(TLI)));
+}
+
char *LLVMCopyStringRepOfTargetData(LLVMTargetDataRef TD) {
std::string StringRep = unwrap(TD)->getStringRepresentation();
return strdup(StringRep.c_str());
@@ -87,13 +93,13 @@ unsigned LLVMPreferredAlignmentOfGlobal(LLVMTargetDataRef TD,
unsigned LLVMElementAtOffset(LLVMTargetDataRef TD, LLVMTypeRef StructTy,
unsigned long long Offset) {
- const StructType *STy = unwrap<StructType>(StructTy);
+ StructType *STy = unwrap<StructType>(StructTy);
return unwrap(TD)->getStructLayout(STy)->getElementContainingOffset(Offset);
}
unsigned long long LLVMOffsetOfElement(LLVMTargetDataRef TD, LLVMTypeRef StructTy,
unsigned Element) {
- const StructType *STy = unwrap<StructType>(StructTy);
+ StructType *STy = unwrap<StructType>(StructTy);
return unwrap(TD)->getStructLayout(STy)->getElementOffset(Element);
}
diff --git a/lib/Target/TargetAsmInfo.cpp b/lib/Target/TargetAsmInfo.cpp
deleted file mode 100644
index a97b0e8..0000000
--- a/lib/Target/TargetAsmInfo.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-//===-- llvm/Target/TargetAsmInfo.cpp - Target Assembly Info --------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Target/TargetAsmInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-using namespace llvm;
-
-TargetAsmInfo::TargetAsmInfo(const TargetMachine &TM) {
- TLOF = &TM.getTargetLowering()->getObjFileLowering();
- TFI = TM.getFrameLowering();
- TRI = TM.getRegisterInfo();
- TFI->getInitialFrameState(InitialFrameState);
-}
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
index 17d022a..bd6a6b6 100644
--- a/lib/Target/TargetData.cpp
+++ b/lib/Target/TargetData.cpp
@@ -41,7 +41,7 @@ char TargetData::ID = 0;
// Support for StructLayout
//===----------------------------------------------------------------------===//
-StructLayout::StructLayout(const StructType *ST, const TargetData &TD) {
+StructLayout::StructLayout(StructType *ST, const TargetData &TD) {
assert(!ST->isOpaque() && "Cannot get layout of opaque structs");
StructAlignment = 0;
StructSize = 0;
@@ -49,7 +49,7 @@ StructLayout::StructLayout(const StructType *ST, const TargetData &TD) {
// Loop over each of the elements, placing them in memory.
for (unsigned i = 0, e = NumElements; i != e; ++i) {
- const Type *Ty = ST->getElementType(i);
+ Type *Ty = ST->getElementType(i);
unsigned TyAlign = ST->isPacked() ? 1 : TD.getABITypeAlignment(Ty);
// Add padding if necessary to align the data element properly.
@@ -139,6 +139,7 @@ void TargetData::init(StringRef Desc) {
PointerMemSize = 8;
PointerABIAlign = 8;
PointerPrefAlign = PointerABIAlign;
+ StackNaturalAlign = 0;
// Default alignments
setAlignment(INTEGER_ALIGN, 1, 1, 1); // i1
@@ -218,7 +219,12 @@ void TargetData::init(StringRef Desc) {
Token = Split.second;
} while (!Specifier.empty() || !Token.empty());
break;
-
+ case 'S': // Stack natural alignment.
+ StackNaturalAlign = getInt(Specifier.substr(1));
+ StackNaturalAlign /= 8;
+ // FIXME: Should we really be truncating these alingments and
+ // sizes silently?
+ break;
default:
break;
}
@@ -261,7 +267,7 @@ TargetData::setAlignment(AlignTypeEnum align_type, unsigned abi_align,
/// preferred if ABIInfo = false) the target wants for the specified datatype.
unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
uint32_t BitWidth, bool ABIInfo,
- const Type *Ty) const {
+ Type *Ty) const {
// Check to see if we have an exact match and remember the best match we see.
int BestMatchIdx = -1;
int LargestInt = -1;
@@ -315,7 +321,7 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
namespace {
class StructLayoutMap {
- typedef DenseMap<const StructType*, StructLayout*> LayoutInfoTy;
+ typedef DenseMap<StructType*, StructLayout*> LayoutInfoTy;
LayoutInfoTy LayoutInfo;
public:
@@ -329,7 +335,7 @@ public:
}
}
- StructLayout *&operator[](const StructType *STy) {
+ StructLayout *&operator[](StructType *STy) {
return LayoutInfo[STy];
}
@@ -343,7 +349,7 @@ TargetData::~TargetData() {
delete static_cast<StructLayoutMap*>(LayoutMap);
}
-const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
+const StructLayout *TargetData::getStructLayout(StructType *Ty) const {
if (!LayoutMap)
LayoutMap = new StructLayoutMap();
@@ -372,7 +378,9 @@ std::string TargetData::getStringRepresentation() const {
OS << (LittleEndian ? "e" : "E")
<< "-p:" << PointerMemSize*8 << ':' << PointerABIAlign*8
- << ':' << PointerPrefAlign*8;
+ << ':' << PointerPrefAlign*8
+ << "-S" << StackNaturalAlign*8;
+
for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
const TargetAlignElem &AI = Alignments[i];
OS << '-' << (char)AI.AlignType << AI.TypeBitWidth << ':'
@@ -389,14 +397,14 @@ std::string TargetData::getStringRepresentation() const {
}
-uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const {
+uint64_t TargetData::getTypeSizeInBits(Type *Ty) const {
assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
switch (Ty->getTypeID()) {
case Type::LabelTyID:
case Type::PointerTyID:
return getPointerSizeInBits();
case Type::ArrayTyID: {
- const ArrayType *ATy = cast<ArrayType>(Ty);
+ ArrayType *ATy = cast<ArrayType>(Ty);
return getTypeAllocSizeInBits(ATy->getElementType())*ATy->getNumElements();
}
case Type::StructTyID:
@@ -435,7 +443,7 @@ uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const {
Get the ABI (\a abi_or_pref == true) or preferred alignment (\a abi_or_pref
== false) for the requested type \a Ty.
*/
-unsigned TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const {
+unsigned TargetData::getAlignment(Type *Ty, bool abi_or_pref) const {
int AlignType = -1;
assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
@@ -485,7 +493,7 @@ unsigned TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const {
abi_or_pref, Ty);
}
-unsigned TargetData::getABITypeAlignment(const Type *Ty) const {
+unsigned TargetData::getABITypeAlignment(Type *Ty) const {
return getAlignment(Ty, true);
}
@@ -496,7 +504,7 @@ unsigned TargetData::getABIIntegerTypeAlignment(unsigned BitWidth) const {
}
-unsigned TargetData::getCallFrameTypeAlignment(const Type *Ty) const {
+unsigned TargetData::getCallFrameTypeAlignment(Type *Ty) const {
for (unsigned i = 0, e = Alignments.size(); i != e; ++i)
if (Alignments[i].AlignType == STACK_ALIGN)
return Alignments[i].ABIAlign;
@@ -504,11 +512,11 @@ unsigned TargetData::getCallFrameTypeAlignment(const Type *Ty) const {
return getABITypeAlignment(Ty);
}
-unsigned TargetData::getPrefTypeAlignment(const Type *Ty) const {
+unsigned TargetData::getPrefTypeAlignment(Type *Ty) const {
return getAlignment(Ty, false);
}
-unsigned TargetData::getPreferredTypeAlignmentShift(const Type *Ty) const {
+unsigned TargetData::getPreferredTypeAlignmentShift(Type *Ty) const {
unsigned Align = getPrefTypeAlignment(Ty);
assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
return Log2_32(Align);
@@ -521,16 +529,17 @@ IntegerType *TargetData::getIntPtrType(LLVMContext &C) const {
}
-uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices,
- unsigned NumIndices) const {
- const Type *Ty = ptrTy;
+uint64_t TargetData::getIndexedOffset(Type *ptrTy,
+ ArrayRef<Value *> Indices) const {
+ Type *Ty = ptrTy;
assert(Ty->isPointerTy() && "Illegal argument for getIndexedOffset()");
uint64_t Result = 0;
generic_gep_type_iterator<Value* const*>
- TI = gep_type_begin(ptrTy, Indices, Indices+NumIndices);
- for (unsigned CurIDX = 0; CurIDX != NumIndices; ++CurIDX, ++TI) {
- if (const StructType *STy = dyn_cast<StructType>(*TI)) {
+ TI = gep_type_begin(ptrTy, Indices);
+ for (unsigned CurIDX = 0, EndIDX = Indices.size(); CurIDX != EndIDX;
+ ++CurIDX, ++TI) {
+ if (StructType *STy = dyn_cast<StructType>(*TI)) {
assert(Indices[CurIDX]->getType() ==
Type::getInt32Ty(ptrTy->getContext()) &&
"Illegal struct idx");
@@ -561,7 +570,7 @@ uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices,
/// global. This includes an explicitly requested alignment (if the global
/// has one).
unsigned TargetData::getPreferredAlignment(const GlobalVariable *GV) const {
- const Type *ElemType = GV->getType()->getElementType();
+ Type *ElemType = GV->getType()->getElementType();
unsigned Alignment = getPrefTypeAlignment(ElemType);
unsigned GVAlignment = GV->getAlignment();
if (GVAlignment >= Alignment) {
diff --git a/lib/Target/TargetFrameLowering.cpp b/lib/Target/TargetFrameLowering.cpp
index 19fd581..122f869 100644
--- a/lib/Target/TargetFrameLowering.cpp
+++ b/lib/Target/TargetFrameLowering.cpp
@@ -23,14 +23,6 @@ using namespace llvm;
TargetFrameLowering::~TargetFrameLowering() {
}
-/// getInitialFrameState - Returns a list of machine moves that are assumed
-/// on entry to a function.
-void
-TargetFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves)
- const {
- // Default is to do nothing.
-}
-
/// getFrameIndexOffset - Returns the displacement from the frame register to
/// the stack frame of the specified index. This is the default implementation
/// which is overridden for some targets.
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 703431b..56b7b69 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -35,34 +35,16 @@ using namespace llvm;
// Generic Code
//===----------------------------------------------------------------------===//
-TargetLoweringObjectFile::TargetLoweringObjectFile() :
- Ctx(0),
- TextSection(0),
- DataSection(0),
- BSSSection(0),
- ReadOnlySection(0),
- StaticCtorSection(0),
- StaticDtorSection(0),
- LSDASection(0),
- CompactUnwindSection(0),
- DwarfAbbrevSection(0),
- DwarfInfoSection(0),
- DwarfLineSection(0),
- DwarfFrameSection(0),
- DwarfPubNamesSection(0),
- DwarfPubTypesSection(0),
- DwarfDebugInlineSection(0),
- DwarfStrSection(0),
- DwarfLocSection(0),
- DwarfARangesSection(0),
- DwarfRangesSection(0),
- DwarfMacroInfoSection(0),
- TLSExtraDataSection(0),
- CommDirectiveSupportsAlignment(true),
- SupportsWeakOmittedEHFrame(true),
- IsFunctionEHFrameSymbolPrivate(true) {
+/// Initialize - this method must be called before any actual lowering is
+/// done. This specifies the current context for codegen, and gives the
+/// lowering implementations a chance to set up their default sections.
+void TargetLoweringObjectFile::Initialize(MCContext &ctx,
+ const TargetMachine &TM) {
+ Ctx = &ctx;
+ InitMCObjectFileInfo(TM.getTargetTriple(),
+ TM.getRelocationModel(), TM.getCodeModel(), *Ctx);
}
-
+
TargetLoweringObjectFile::~TargetLoweringObjectFile() {
}
@@ -93,7 +75,7 @@ static bool isSuitableForBSS(const GlobalVariable *GV) {
/// known to have a type that is an array of 1/2/4 byte elements) ends with a
/// nul value and contains no other nuls in it.
static bool IsNullTerminatedString(const Constant *C) {
- const ArrayType *ATy = cast<ArrayType>(C->getType());
+ ArrayType *ATy = cast<ArrayType>(C->getType());
// First check: is we have constant array of i8 terminated with zero
if (const ConstantArray *CVA = dyn_cast<ConstantArray>(C)) {
@@ -188,8 +170,8 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
// If initializer is a null-terminated string, put it in a "cstring"
// section of the right width.
- if (const ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) {
- if (const IntegerType *ITy =
+ if (ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) {
+ if (IntegerType *ITy =
dyn_cast<IntegerType>(ATy->getElementType())) {
if ((ITy->getBitWidth() == 8 || ITy->getBitWidth() == 16 ||
ITy->getBitWidth() == 32) &&
@@ -341,20 +323,3 @@ getExprForDwarfReference(const MCSymbol *Sym, unsigned Encoding,
}
}
}
-
-unsigned TargetLoweringObjectFile::getPersonalityEncoding() const {
- return dwarf::DW_EH_PE_absptr;
-}
-
-unsigned TargetLoweringObjectFile::getLSDAEncoding() const {
- return dwarf::DW_EH_PE_absptr;
-}
-
-unsigned TargetLoweringObjectFile::getFDEEncoding(bool CFI) const {
- return dwarf::DW_EH_PE_absptr;
-}
-
-unsigned TargetLoweringObjectFile::getTTypeEncoding() const {
- return dwarf::DW_EH_PE_absptr;
-}
-
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 74a1f4e..fe8a7ce 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -40,8 +40,6 @@ namespace llvm {
bool JITExceptionHandling;
bool JITEmitDebugInfo;
bool JITEmitDebugInfoToDisk;
- Reloc::Model RelocationModel;
- CodeModel::Model CMModel;
bool GuaranteedTailCallOpt;
unsigned StackAlignmentOverride;
bool RealignStack;
@@ -49,6 +47,7 @@ namespace llvm {
bool StrongPHIElim;
bool HasDivModLibcall;
bool AsmVerbosityDefault(false);
+ bool EnableSegmentedStacks;
}
static cl::opt<bool, true>
@@ -143,38 +142,6 @@ EmitJitDebugInfoToDisk("jit-emit-debug-to-disk",
cl::location(JITEmitDebugInfoToDisk),
cl::init(false));
-static cl::opt<llvm::Reloc::Model, true>
-DefRelocationModel("relocation-model",
- cl::desc("Choose relocation model"),
- cl::location(RelocationModel),
- cl::init(Reloc::Default),
- cl::values(
- clEnumValN(Reloc::Default, "default",
- "Target default relocation model"),
- clEnumValN(Reloc::Static, "static",
- "Non-relocatable code"),
- clEnumValN(Reloc::PIC_, "pic",
- "Fully relocatable, position independent code"),
- clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic",
- "Relocatable external references, non-relocatable code"),
- clEnumValEnd));
-static cl::opt<llvm::CodeModel::Model, true>
-DefCodeModel("code-model",
- cl::desc("Choose code model"),
- cl::location(CMModel),
- cl::init(CodeModel::Default),
- cl::values(
- clEnumValN(CodeModel::Default, "default",
- "Target default code model"),
- clEnumValN(CodeModel::Small, "small",
- "Small code model"),
- clEnumValN(CodeModel::Kernel, "kernel",
- "Kernel code model"),
- clEnumValN(CodeModel::Medium, "medium",
- "Medium code model"),
- clEnumValN(CodeModel::Large, "large",
- "Large code model"),
- clEnumValEnd));
static cl::opt<bool, true>
EnableGuaranteedTailCallOpt("tailcallopt",
cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."),
@@ -212,13 +179,20 @@ static cl::opt<bool>
FunctionSections("ffunction-sections",
cl::desc("Emit functions into separate sections"),
cl::init(false));
+static cl::opt<bool, true>
+SegmentedStacks("segmented-stacks",
+ cl::desc("Use segmented stacks if possible."),
+ cl::location(EnableSegmentedStacks),
+ cl::init(false));
+
//---------------------------------------------------------------------------
// TargetMachine Class
//
TargetMachine::TargetMachine(const Target &T,
StringRef TT, StringRef CPU, StringRef FS)
- : TheTarget(T), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS), AsmInfo(0),
+ : TheTarget(T), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS),
+ CodeGenInfo(0), AsmInfo(0),
MCRelaxAll(false),
MCNoExecStack(false),
MCSaveTempLabels(false),
@@ -231,29 +205,24 @@ TargetMachine::TargetMachine(const Target &T,
}
TargetMachine::~TargetMachine() {
+ delete CodeGenInfo;
delete AsmInfo;
}
/// getRelocationModel - Returns the code generation relocation model. The
/// choices are static, PIC, and dynamic-no-pic, and target default.
-Reloc::Model TargetMachine::getRelocationModel() {
- return RelocationModel;
-}
-
-/// setRelocationModel - Sets the code generation relocation model.
-void TargetMachine::setRelocationModel(Reloc::Model Model) {
- RelocationModel = Model;
+Reloc::Model TargetMachine::getRelocationModel() const {
+ if (!CodeGenInfo)
+ return Reloc::Default;
+ return CodeGenInfo->getRelocationModel();
}
/// getCodeModel - Returns the code model. The choices are small, kernel,
/// medium, large, and target default.
-CodeModel::Model TargetMachine::getCodeModel() {
- return CMModel;
-}
-
-/// setCodeModel - Sets the code model.
-void TargetMachine::setCodeModel(CodeModel::Model Model) {
- CMModel = Model;
+CodeModel::Model TargetMachine::getCodeModel() const {
+ if (!CodeGenInfo)
+ return CodeModel::Default;
+ return CodeGenInfo->getCodeModel();
}
bool TargetMachine::getAsmVerbosityDefault() {
diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp
index 90a8f8d..67239b8 100644
--- a/lib/Target/TargetRegisterInfo.cpp
+++ b/lib/Target/TargetRegisterInfo.cpp
@@ -98,44 +98,25 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
}
const TargetRegisterClass *
-llvm::getCommonSubClass(const TargetRegisterClass *A,
- const TargetRegisterClass *B) {
- // First take care of the trivial cases
+TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B) const {
+ // First take care of the trivial cases.
if (A == B)
return A;
if (!A || !B)
return 0;
- // If B is a subclass of A, it will be handled in the loop below
- if (B->hasSubClass(A))
- return A;
+ // Register classes are ordered topologically, so the largest common
+ // sub-class it the common sub-class with the smallest ID.
+ const unsigned *SubA = A->getSubClassMask();
+ const unsigned *SubB = B->getSubClassMask();
- const TargetRegisterClass *Best = 0;
- for (TargetRegisterClass::sc_iterator I = A->subclasses_begin();
- const TargetRegisterClass *X = *I; ++I) {
- if (X == B)
- return B; // B is a subclass of A
-
- // X must be a common subclass of A and B
- if (!B->hasSubClass(X))
- continue;
-
- // A superclass is definitely better.
- if (!Best || Best->hasSuperClass(X)) {
- Best = X;
- continue;
- }
-
- // A subclass is definitely worse
- if (Best->hasSubClass(X))
- continue;
-
- // Best and *I have no super/sub class relation - pick the larger class, or
- // the smaller spill size.
- int nb = std::distance(Best->begin(), Best->end());
- int ni = std::distance(X->begin(), X->end());
- if (ni>nb || (ni==nb && X->getSize() < Best->getSize()))
- Best = X;
- }
- return Best;
+ // We could start the search from max(A.ID, B.ID), but we are only going to
+ // execute 2-3 iterations anyway.
+ for (unsigned Base = 0, BaseE = getNumRegClasses(); Base < BaseE; Base += 32)
+ if (unsigned Common = *SubA++ & *SubB++)
+ return getRegClass(Base + CountTrailingZeros_32(Common));
+
+ // No common sub-class exists.
+ return NULL;
}
diff --git a/lib/Target/X86/AsmParser/CMakeLists.txt b/lib/Target/X86/AsmParser/CMakeLists.txt
index 40dbdd7..94aca7a 100644
--- a/lib/Target/X86/AsmParser/CMakeLists.txt
+++ b/lib/Target/X86/AsmParser/CMakeLists.txt
@@ -4,4 +4,13 @@ add_llvm_library(LLVMX86AsmParser
X86AsmLexer.cpp
X86AsmParser.cpp
)
-add_dependencies(LLVMX86AsmParser X86CodeGenTable_gen)
+
+add_llvm_library_dependencies(LLVMX86AsmParser
+ LLVMMC
+ LLVMMCParser
+ LLVMSupport
+ LLVMX86Desc
+ LLVMX86Info
+ )
+
+add_dependencies(LLVMX86AsmParser X86CommonTableGen)
diff --git a/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/lib/Target/X86/AsmParser/X86AsmLexer.cpp
index ec73087..1eaccff 100644
--- a/lib/Target/X86/AsmParser/X86AsmLexer.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmLexer.cpp
@@ -7,20 +7,20 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Target/TargetAsmLexer.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "MCTargetDesc/X86BaseInfo.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-#include "X86.h"
+#include "llvm/MC/MCTargetAsmLexer.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
using namespace llvm;
namespace {
-class X86AsmLexer : public TargetAsmLexer {
+class X86AsmLexer : public MCTargetAsmLexer {
const MCAsmInfo &AsmInfo;
bool tentativeIsValid;
@@ -60,8 +60,8 @@ protected:
}
}
public:
- X86AsmLexer(const Target &T, const MCAsmInfo &MAI)
- : TargetAsmLexer(T), AsmInfo(MAI), tentativeIsValid(false) {
+ X86AsmLexer(const Target &T, const MCRegisterInfo &MRI, const MCAsmInfo &MAI)
+ : MCTargetAsmLexer(T), AsmInfo(MAI), tentativeIsValid(false) {
}
};
@@ -160,6 +160,6 @@ AsmToken X86AsmLexer::LexTokenIntel() {
}
extern "C" void LLVMInitializeX86AsmLexer() {
- RegisterAsmLexer<X86AsmLexer> X(TheX86_32Target);
- RegisterAsmLexer<X86AsmLexer> Y(TheX86_64Target);
+ RegisterMCAsmLexer<X86AsmLexer> X(TheX86_32Target);
+ RegisterMCAsmLexer<X86AsmLexer> Y(TheX86_64Target);
}
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index d45dd35..cb4f15f 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -7,14 +7,12 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Target/TargetAsmParser.h"
-#include "X86.h"
-#include "X86Subtarget.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Target/TargetAsmParser.h"
+#include "MCTargetDesc/X86BaseInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
@@ -26,6 +24,7 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -33,7 +32,7 @@ using namespace llvm;
namespace {
struct X86Operand;
-class X86ATTAsmParser : public TargetAsmParser {
+class X86ATTAsmParser : public MCTargetAsmParser {
MCSubtargetInfo &STI;
MCAsmParser &Parser;
@@ -48,6 +47,7 @@ private:
X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
bool ParseDirectiveWord(unsigned Size, SMLoc L);
+ bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
bool MatchAndEmitInstruction(SMLoc IDLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
@@ -65,6 +65,10 @@ private:
// FIXME: Can tablegen auto-generate this?
return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
}
+ void SwitchMode() {
+ unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(X86::Mode64Bit));
+ setAvailableFeatures(FB);
+ }
/// @name Auto-generated Matcher Functions
/// {
@@ -76,7 +80,7 @@ private:
public:
X86ATTAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
- : TargetAsmParser(), STI(sti), Parser(parser) {
+ : MCTargetAsmParser(), STI(sti), Parser(parser) {
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
@@ -223,6 +227,21 @@ struct X86Operand : public MCParsedAsmOperand {
(0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
(0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
}
+ bool isImmZExtu32u8() const {
+ if (!isImm())
+ return false;
+
+ // If this isn't a constant expr, just assume it fits and let relaxation
+ // handle it.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return true;
+
+ // Otherwise, check the value is in a range that makes sense for this
+ // extension.
+ uint64_t Value = CE->getValue();
+ return (Value <= 0x00000000000000FFULL);
+ }
bool isImmSExti64i8() const {
if (!isImm())
return false;
@@ -382,19 +401,25 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
if (Tok.isNot(AsmToken::Identifier))
return Error(Tok.getLoc(), "invalid register name");
- // FIXME: Validate register for the current architecture; we have to do
- // validation later, so maybe there is no need for this here.
RegNo = MatchRegisterName(Tok.getString());
// If the match failed, try the register name as lowercase.
if (RegNo == 0)
RegNo = MatchRegisterName(LowercaseString(Tok.getString()));
- // FIXME: This should be done using Requires<In32BitMode> and
- // Requires<In64BitMode> so "eiz" usage in 64-bit instructions
- // can be also checked.
- if (RegNo == X86::RIZ && !is64BitMode())
- return Error(Tok.getLoc(), "riz register in 64-bit mode only");
+ if (!is64BitMode()) {
+ // FIXME: This should be done using Requires<In32BitMode> and
+ // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
+ // checked.
+ // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
+ // REX prefix.
+ if (RegNo == X86::RIZ ||
+ X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
+ X86II::isX86_64NonExtLowByteReg(RegNo) ||
+ X86II::isX86_64ExtendedReg(RegNo))
+ return Error(Tok.getLoc(), "register %"
+ + Tok.getString() + " is only available in 64-bit mode");
+ }
// Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
@@ -472,7 +497,7 @@ X86Operand *X86ATTAsmParser::ParseOperand() {
SMLoc Start, End;
if (ParseRegister(RegNo, Start, End)) return 0;
if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
- Error(Start, "eiz and riz can only be used as index registers");
+ Error(Start, "%eiz and %riz can only be used as index registers");
return 0;
}
@@ -956,6 +981,7 @@ MatchAndEmitInstruction(SMLoc IDLoc,
// First, try a direct match.
switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo)) {
+ default: break;
case Match_Success:
Out.EmitInstruction(Inst);
return false;
@@ -994,7 +1020,7 @@ MatchAndEmitInstruction(SMLoc IDLoc,
// Check for the various suffix matches.
Tmp[Base.size()] = Suffixes[0];
unsigned ErrorInfoIgnore;
- MatchResultTy Match1, Match2, Match3, Match4;
+ unsigned Match1, Match2, Match3, Match4;
Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
Tmp[Base.size()] = Suffixes[1];
@@ -1096,6 +1122,8 @@ bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getIdentifier();
if (IDVal == ".word")
return ParseDirectiveWord(2, DirectiveID.getLoc());
+ else if (IDVal.startswith(".code"))
+ return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
return true;
}
@@ -1124,15 +1152,35 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
return false;
}
+/// ParseDirectiveCode
+/// ::= .code32 | .code64
+bool X86ATTAsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
+ if (IDVal == ".code32") {
+ Parser.Lex();
+ if (is64BitMode()) {
+ SwitchMode();
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
+ }
+ } else if (IDVal == ".code64") {
+ Parser.Lex();
+ if (!is64BitMode()) {
+ SwitchMode();
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
+ }
+ } else {
+ return Error(L, "unexpected directive " + IDVal);
+ }
+ return false;
+}
extern "C" void LLVMInitializeX86AsmLexer();
// Force static initialization.
extern "C" void LLVMInitializeX86AsmParser() {
- RegisterAsmParser<X86ATTAsmParser> X(TheX86_32Target);
- RegisterAsmParser<X86ATTAsmParser> Y(TheX86_64Target);
+ RegisterMCAsmParser<X86ATTAsmParser> X(TheX86_32Target);
+ RegisterMCAsmParser<X86ATTAsmParser> Y(TheX86_64Target);
LLVMInitializeX86AsmLexer();
}
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index b112f9f..351e767 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -1,20 +1,19 @@
set(LLVM_TARGET_DEFINITIONS X86.td)
-tablegen(X86GenRegisterInfo.inc -gen-register-info)
-tablegen(X86GenDisassemblerTables.inc -gen-disassembler)
-tablegen(X86GenInstrInfo.inc -gen-instr-info)
-tablegen(X86GenAsmWriter.inc -gen-asm-writer)
-tablegen(X86GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
-tablegen(X86GenAsmMatcher.inc -gen-asm-matcher)
-tablegen(X86GenDAGISel.inc -gen-dag-isel)
-tablegen(X86GenFastISel.inc -gen-fast-isel)
-tablegen(X86GenCallingConv.inc -gen-callingconv)
-tablegen(X86GenSubtargetInfo.inc -gen-subtarget)
-tablegen(X86GenEDInfo.inc -gen-enhanced-disassembly-info)
+llvm_tablegen(X86GenRegisterInfo.inc -gen-register-info)
+llvm_tablegen(X86GenDisassemblerTables.inc -gen-disassembler)
+llvm_tablegen(X86GenInstrInfo.inc -gen-instr-info)
+llvm_tablegen(X86GenAsmWriter.inc -gen-asm-writer)
+llvm_tablegen(X86GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
+llvm_tablegen(X86GenAsmMatcher.inc -gen-asm-matcher)
+llvm_tablegen(X86GenDAGISel.inc -gen-dag-isel)
+llvm_tablegen(X86GenFastISel.inc -gen-fast-isel)
+llvm_tablegen(X86GenCallingConv.inc -gen-callingconv)
+llvm_tablegen(X86GenSubtargetInfo.inc -gen-subtarget)
+llvm_tablegen(X86GenEDInfo.inc -gen-enhanced-disassembly-info)
+add_public_tablegen_target(X86CommonTableGen)
set(sources
- SSEDomainFix.cpp
- X86AsmBackend.cpp
X86AsmPrinter.cpp
X86COFFMachineModuleInfo.cpp
X86CodeEmitter.cpp
@@ -26,14 +25,13 @@ set(sources
X86ISelLowering.cpp
X86InstrInfo.cpp
X86JITInfo.cpp
- X86MachObjectWriter.cpp
- X86MCCodeEmitter.cpp
X86MCInstLower.cpp
X86RegisterInfo.cpp
X86SelectionDAGInfo.cpp
X86Subtarget.cpp
X86TargetMachine.cpp
X86TargetObjectFile.cpp
+ X86VZeroUpper.cpp
)
if( CMAKE_CL_64 )
@@ -53,6 +51,19 @@ endif()
add_llvm_target(X86CodeGen ${sources})
+add_llvm_library_dependencies(LLVMX86CodeGen
+ LLVMAnalysis
+ LLVMAsmPrinter
+ LLVMCodeGen
+ LLVMCore
+ LLVMMC
+ LLVMSelectionDAG
+ LLVMSupport
+ LLVMTarget
+ LLVMX86AsmPrinter
+ LLVMX86Desc
+ )
+
add_subdirectory(AsmParser)
add_subdirectory(Disassembler)
add_subdirectory(InstPrinter)
diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt
index 972a0d9..4f570d5 100644
--- a/lib/Target/X86/Disassembler/CMakeLists.txt
+++ b/lib/Target/X86/Disassembler/CMakeLists.txt
@@ -4,6 +4,13 @@ add_llvm_library(LLVMX86Disassembler
X86Disassembler.cpp
X86DisassemblerDecoder.c
)
+
+add_llvm_library_dependencies(LLVMX86Disassembler
+ LLVMMC
+ LLVMSupport
+ LLVMX86Info
+ )
+
# workaround for hanging compilation on MSVC9 and 10
if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
set_property(
@@ -11,4 +18,5 @@ set_property(
PROPERTY COMPILE_FLAGS "/Od"
)
endif()
-add_dependencies(LLVMX86Disassembler X86CodeGenTable_gen)
+
+add_dependencies(LLVMX86Disassembler X86CommonTableGen)
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 4a0d2ec..3aacb20 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -21,13 +21,16 @@
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#define GET_REGINFO_ENUM
#include "X86GenRegisterInfo.inc"
+#define GET_INSTRINFO_ENUM
+#include "X86GenInstrInfo.inc"
#include "X86GenEDInfo.inc"
using namespace llvm;
@@ -64,8 +67,8 @@ extern Target TheX86_32Target, TheX86_64Target;
static bool translateInstruction(MCInst &target,
InternalInstruction &source);
-X86GenericDisassembler::X86GenericDisassembler(DisassemblerMode mode) :
- MCDisassembler(),
+X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode) :
+ MCDisassembler(STI),
fMode(mode) {
}
@@ -106,28 +109,34 @@ static void logger(void* arg, const char* log) {
// Public interface for the disassembler
//
-bool X86GenericDisassembler::getInstruction(MCInst &instr,
- uint64_t &size,
- const MemoryObject &region,
- uint64_t address,
- raw_ostream &vStream) const {
+MCDisassembler::DecodeStatus
+X86GenericDisassembler::getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const {
InternalInstruction internalInstr;
+
+ dlog_t loggerFn = logger;
+ if (&vStream == &nulls())
+ loggerFn = 0; // Disable logging completely if it's going to nulls().
int ret = decodeInstruction(&internalInstr,
regionReader,
(void*)&region,
- logger,
+ loggerFn,
(void*)&vStream,
address,
fMode);
if (ret) {
size = internalInstr.readerCursor - address;
- return false;
+ return Fail;
}
else {
size = internalInstr.length;
- return !translateInstruction(instr, internalInstr);
+ return (!translateInstruction(instr, internalInstr)) ? Success : Fail;
}
}
@@ -183,8 +192,46 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
break;
}
}
+ // By default sign-extend all X86 immediates based on their encoding.
+ else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 ||
+ type == TYPE_IMM64) {
+ uint32_t Opcode = mcInst.getOpcode();
+ switch (operand.encoding) {
+ default:
+ break;
+ case ENCODING_IB:
+ // Special case those X86 instructions that use the imm8 as a set of
+ // bits, bit count, etc. and are not sign-extend.
+ if (Opcode != X86::BLENDPSrri && Opcode != X86::BLENDPDrri &&
+ Opcode != X86::PBLENDWrri && Opcode != X86::MPSADBWrri &&
+ Opcode != X86::DPPSrri && Opcode != X86::DPPDrri &&
+ Opcode != X86::INSERTPSrr && Opcode != X86::VBLENDPSYrri &&
+ Opcode != X86::VBLENDPSYrmi && Opcode != X86::VBLENDPDYrri &&
+ Opcode != X86::VBLENDPDYrmi && Opcode != X86::VPBLENDWrri &&
+ Opcode != X86::VMPSADBWrri && Opcode != X86::VDPPSYrri &&
+ Opcode != X86::VDPPSYrmi && Opcode != X86::VDPPDrri &&
+ Opcode != X86::VINSERTPSrr)
+ type = TYPE_MOFFS8;
+ break;
+ case ENCODING_IW:
+ type = TYPE_MOFFS16;
+ break;
+ case ENCODING_ID:
+ type = TYPE_MOFFS32;
+ break;
+ case ENCODING_IO:
+ type = TYPE_MOFFS64;
+ break;
+ }
+ }
switch (type) {
+ case TYPE_XMM128:
+ mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4)));
+ return;
+ case TYPE_XMM256:
+ mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4)));
+ return;
case TYPE_MOFFS8:
case TYPE_REL8:
if(immediate & 0x80)
@@ -543,12 +590,12 @@ static bool translateInstruction(MCInst &mcInst,
return false;
}
-static MCDisassembler *createX86_32Disassembler(const Target &T) {
- return new X86Disassembler::X86_32Disassembler;
+static MCDisassembler *createX86_32Disassembler(const Target &T, const MCSubtargetInfo &STI) {
+ return new X86Disassembler::X86_32Disassembler(STI);
}
-static MCDisassembler *createX86_64Disassembler(const Target &T) {
- return new X86Disassembler::X86_64Disassembler;
+static MCDisassembler *createX86_64Disassembler(const Target &T, const MCSubtargetInfo &STI) {
+ return new X86Disassembler::X86_64Disassembler(STI);
}
extern "C" void LLVMInitializeX86Disassembler() {
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h
index 550cf9d..6ac9a0f 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.h
+++ b/lib/Target/X86/Disassembler/X86Disassembler.h
@@ -92,6 +92,7 @@ struct InternalInstruction;
namespace llvm {
class MCInst;
+class MCSubtargetInfo;
class MemoryObject;
class raw_ostream;
@@ -107,16 +108,17 @@ protected:
/// Constructor - Initializes the disassembler.
///
/// @param mode - The X86 architecture mode to decode for.
- X86GenericDisassembler(DisassemblerMode mode);
+ X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode);
public:
~X86GenericDisassembler();
/// getInstruction - See MCDisassembler.
- bool getInstruction(MCInst &instr,
- uint64_t &size,
- const MemoryObject &region,
- uint64_t address,
- raw_ostream &vStream) const;
+ DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const;
/// getEDInfo - See MCDisassembler.
EDInstInfo *getEDInfo() const;
@@ -127,24 +129,24 @@ private:
/// X86_16Disassembler - 16-bit X86 disassembler.
class X86_16Disassembler : public X86GenericDisassembler {
public:
- X86_16Disassembler() :
- X86GenericDisassembler(MODE_16BIT) {
+ X86_16Disassembler(const MCSubtargetInfo &STI) :
+ X86GenericDisassembler(STI, MODE_16BIT) {
}
};
/// X86_16Disassembler - 32-bit X86 disassembler.
class X86_32Disassembler : public X86GenericDisassembler {
public:
- X86_32Disassembler() :
- X86GenericDisassembler(MODE_32BIT) {
+ X86_32Disassembler(const MCSubtargetInfo &STI) :
+ X86GenericDisassembler(STI, MODE_32BIT) {
}
};
/// X86_16Disassembler - 64-bit X86 disassembler.
class X86_64Disassembler : public X86GenericDisassembler {
public:
- X86_64Disassembler() :
- X86GenericDisassembler(MODE_64BIT) {
+ X86_64Disassembler(const MCSubtargetInfo &STI) :
+ X86GenericDisassembler(STI, MODE_64BIT) {
}
};
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index de1610b..f9b0fe5 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -58,8 +58,8 @@ static InstructionContext contextForAttrs(uint8_t attrMask) {
* @return - TRUE if the ModR/M byte is required, FALSE otherwise.
*/
static int modRMRequired(OpcodeType type,
- InstructionContext insnContext,
- uint8_t opcode) {
+ InstructionContext insnContext,
+ uint8_t opcode) {
const struct ContextDecision* decision = 0;
switch (type) {
@@ -391,7 +391,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
return -1;
}
- if (insn->mode == MODE_64BIT || byte1 & 0x8) {
+ if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
insn->vexSize = 3;
insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
@@ -406,12 +406,14 @@ static int readPrefixes(struct InternalInstruction* insn) {
consumeByte(insn, &insn->vexPrefix[2]);
/* We simulate the REX prefix for simplicity's sake */
-
- insn->rexPrefix = 0x40
- | (wFromVEX3of3(insn->vexPrefix[2]) << 3)
- | (rFromVEX2of3(insn->vexPrefix[1]) << 2)
- | (xFromVEX2of3(insn->vexPrefix[1]) << 1)
- | (bFromVEX2of3(insn->vexPrefix[1]) << 0);
+
+ if (insn->mode == MODE_64BIT) {
+ insn->rexPrefix = 0x40
+ | (wFromVEX3of3(insn->vexPrefix[2]) << 3)
+ | (rFromVEX2of3(insn->vexPrefix[1]) << 2)
+ | (xFromVEX2of3(insn->vexPrefix[1]) << 1)
+ | (bFromVEX2of3(insn->vexPrefix[1]) << 0);
+ }
switch (ppFromVEX3of3(insn->vexPrefix[2]))
{
@@ -433,7 +435,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
return -1;
}
- if (insn->mode == MODE_64BIT || byte1 & 0x8) {
+ if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
insn->vexSize = 2;
}
else {
@@ -444,8 +446,10 @@ static int readPrefixes(struct InternalInstruction* insn) {
insn->vexPrefix[0] = byte;
consumeByte(insn, &insn->vexPrefix[1]);
- insn->rexPrefix = 0x40
- | (rFromVEX2of2(insn->vexPrefix[1]) << 2);
+ if (insn->mode == MODE_64BIT) {
+ insn->rexPrefix = 0x40
+ | (rFromVEX2of2(insn->vexPrefix[1]) << 2);
+ }
switch (ppFromVEX2of2(insn->vexPrefix[1]))
{
@@ -700,34 +704,6 @@ static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
}
/*
- * is64BitEquivalent - Determines whether two instruction names refer to
- * equivalent instructions but one is 64-bit whereas the other is not.
- *
- * @param orig - The instruction that is not 64-bit
- * @param equiv - The instruction that is 64-bit
- */
-static BOOL is64BitEquivalent(const char* orig, const char* equiv) {
- off_t i;
-
- for (i = 0;; i++) {
- if (orig[i] == '\0' && equiv[i] == '\0')
- return TRUE;
- if (orig[i] == '\0' || equiv[i] == '\0')
- return FALSE;
- if (orig[i] != equiv[i]) {
- if ((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q')
- continue;
- if ((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6')
- continue;
- if ((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4')
- continue;
- return FALSE;
- }
- }
-}
-
-
-/*
* getID - Determines the ID of an instruction, consuming the ModR/M byte as
* appropriate for extended and escape opcodes. Determines the attributes and
* context for the instruction before doing so.
@@ -763,8 +739,6 @@ static int getID(struct InternalInstruction* insn) {
break;
}
- if (wFromVEX3of3(insn->vexPrefix[2]))
- attrMask |= ATTR_REXW;
if (lFromVEX3of3(insn->vexPrefix[2]))
attrMask |= ATTR_VEXL;
}
@@ -789,63 +763,55 @@ static int getID(struct InternalInstruction* insn) {
}
}
else {
- if (insn->rexPrefix & 0x08)
- attrMask |= ATTR_REXW;
-
if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
attrMask |= ATTR_OPSIZE;
else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
attrMask |= ATTR_XS;
else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
attrMask |= ATTR_XD;
-
}
+ if (insn->rexPrefix & 0x08)
+ attrMask |= ATTR_REXW;
+
if (getIDWithAttrMask(&instructionID, insn, attrMask))
return -1;
/* The following clauses compensate for limitations of the tables. */
- if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) {
+ if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW)) {
/*
- * Although for SSE instructions it is usually necessary to treat REX.W+F2
- * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is
- * an occasional instruction where F2 is incidental and REX.W is the more
- * significant. If the decoded instruction is 32-bit and adding REX.W
- * instead of F2 changes a 32 to a 64, we adopt the new encoding.
+ * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit
+ * has precedence since there are no L-bit with W-bit entries in the tables.
+ * So if the L-bit isn't significant we should use the W-bit instead.
*/
-
+
const struct InstructionSpecifier *spec;
- uint16_t instructionIDWithREXw;
- const struct InstructionSpecifier *specWithREXw;
-
+ uint16_t instructionIDWithWBit;
+ const struct InstructionSpecifier *specWithWBit;
+
spec = specifierForUID(instructionID);
-
- if (getIDWithAttrMask(&instructionIDWithREXw,
+
+ if (getIDWithAttrMask(&instructionIDWithWBit,
insn,
- attrMask & (~ATTR_XD))) {
- /*
- * Decoding with REX.w would yield nothing; give up and return original
- * decode.
- */
-
+ (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) {
insn->instructionID = instructionID;
insn->spec = spec;
return 0;
}
-
- specWithREXw = specifierForUID(instructionIDWithREXw);
-
- if (is64BitEquivalent(spec->name, specWithREXw->name)) {
- insn->instructionID = instructionIDWithREXw;
- insn->spec = specWithREXw;
+
+ specWithWBit = specifierForUID(instructionIDWithWBit);
+
+ if (instructionID != instructionIDWithWBit) {
+ insn->instructionID = instructionIDWithWBit;
+ insn->spec = specWithWBit;
} else {
insn->instructionID = instructionID;
insn->spec = spec;
}
return 0;
}
-
+
if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
/*
* The instruction tables make no distinction between instructions that
@@ -885,6 +851,43 @@ static int getID(struct InternalInstruction* insn) {
}
return 0;
}
+
+ if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
+ insn->rexPrefix & 0x01) {
+ /*
+ * NOOP shouldn't decode as NOOP if REX.b is set. Instead
+ * it should decode as XCHG %r8, %eax.
+ */
+
+ const struct InstructionSpecifier *spec;
+ uint16_t instructionIDWithNewOpcode;
+ const struct InstructionSpecifier *specWithNewOpcode;
+
+ spec = specifierForUID(instructionID);
+
+ /* Borrow opcode from one of the other XCHGar opcodes */
+ insn->opcode = 0x91;
+
+ if (getIDWithAttrMask(&instructionIDWithNewOpcode,
+ insn,
+ attrMask)) {
+ insn->opcode = 0x90;
+
+ insn->instructionID = instructionID;
+ insn->spec = spec;
+ return 0;
+ }
+
+ specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);
+
+ /* Change back */
+ insn->opcode = 0x90;
+
+ insn->instructionID = instructionIDWithNewOpcode;
+ insn->spec = specWithNewOpcode;
+
+ return 0;
+ }
insn->instructionID = instructionID;
insn->spec = specifierForUID(insn->instructionID);
@@ -1434,11 +1437,10 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
}
/*
- * readVVVV - Consumes an immediate operand from an instruction, given the
- * desired operand size.
+ * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.
*
* @param insn - The instruction whose operand is to be read.
- * @return - 0 if the immediate was successfully consumed; nonzero
+ * @return - 0 if the vvvv was successfully consumed; nonzero
* otherwise.
*/
static int readVVVV(struct InternalInstruction* insn) {
@@ -1451,6 +1453,9 @@ static int readVVVV(struct InternalInstruction* insn) {
else
return -1;
+ if (insn->mode != MODE_64BIT)
+ insn->vvvv &= 0x7;
+
return 0;
}
@@ -1463,8 +1468,14 @@ static int readVVVV(struct InternalInstruction* insn) {
*/
static int readOperands(struct InternalInstruction* insn) {
int index;
+ int hasVVVV, needVVVV;
dbgprintf(insn, "readOperands()");
+
+ /* If non-zero vvvv specified, need to make sure one of the operands
+ uses it. */
+ hasVVVV = !readVVVV(insn);
+ needVVVV = hasVVVV && (insn->vvvv != 0);
for (index = 0; index < X86_MAX_OPERANDS; ++index) {
switch (insn->spec->operands[index].encoding) {
@@ -1537,7 +1548,8 @@ static int readOperands(struct InternalInstruction* insn) {
return -1;
break;
case ENCODING_VVVV:
- if (readVVVV(insn))
+ needVVVV = 0; /* Mark that we have found a VVVV operand. */
+ if (!hasVVVV)
return -1;
if (fixupReg(insn, &insn->spec->operands[index]))
return -1;
@@ -1549,6 +1561,9 @@ static int readOperands(struct InternalInstruction* insn) {
return -1;
}
}
+
+ /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
+ if (needVVVV) return -1;
return 0;
}
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index 70315ed..8b79335 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -81,12 +81,18 @@ enum attributeBits {
"but not the operands") \
ENUM_ENTRY(IC_XS, 2, "may say something about the opcode " \
"but not the operands") \
+ ENUM_ENTRY(IC_XD_OPSIZE, 3, "requires an OPSIZE prefix, so " \
+ "operands change width") \
+ ENUM_ENTRY(IC_XS_OPSIZE, 3, "requires an OPSIZE prefix, so " \
+ "operands change width") \
ENUM_ENTRY(IC_64BIT_REXW, 4, "requires a REX.W prefix, so operands "\
"change width; overrides IC_OPSIZE") \
ENUM_ENTRY(IC_64BIT_OPSIZE, 3, "Just as meaningful as IC_OPSIZE") \
ENUM_ENTRY(IC_64BIT_XD, 5, "XD instructions are SSE; REX.W is " \
"secondary") \
ENUM_ENTRY(IC_64BIT_XS, 5, "Just as meaningful as IC_64BIT_XD") \
+ ENUM_ENTRY(IC_64BIT_XD_OPSIZE, 3, "Just as meaningful as IC_XD_OPSIZE") \
+ ENUM_ENTRY(IC_64BIT_XS_OPSIZE, 3, "Just as meaningful as IC_XS_OPSIZE") \
ENUM_ENTRY(IC_64BIT_REXW_XS, 6, "OPSIZE could mean a different " \
"opcode") \
ENUM_ENTRY(IC_64BIT_REXW_XD, 6, "Just as meaningful as " \
@@ -104,7 +110,7 @@ enum attributeBits {
ENUM_ENTRY(IC_VEX_W_OPSIZE, 4, "requires VEX, W, and OpSize") \
ENUM_ENTRY(IC_VEX_L, 3, "requires VEX and the L prefix") \
ENUM_ENTRY(IC_VEX_L_XS, 4, "requires VEX and the L and XS prefix")\
- ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XS prefix")\
+ ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XD prefix")\
ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize")
diff --git a/lib/Target/X86/InstPrinter/CMakeLists.txt b/lib/Target/X86/InstPrinter/CMakeLists.txt
index 033973e..2a2b5db 100644
--- a/lib/Target/X86/InstPrinter/CMakeLists.txt
+++ b/lib/Target/X86/InstPrinter/CMakeLists.txt
@@ -5,4 +5,11 @@ add_llvm_library(LLVMX86AsmPrinter
X86IntelInstPrinter.cpp
X86InstComments.cpp
)
-add_dependencies(LLVMX86AsmPrinter X86CodeGenTable_gen)
+
+add_llvm_library_dependencies(LLVMX86AsmPrinter
+ LLVMMC
+ LLVMSupport
+ LLVMX86Utils
+ )
+
+add_dependencies(LLVMX86AsmPrinter X86CommonTableGen)
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index c37d879..029d491 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -39,14 +39,17 @@ void X86ATTInstPrinter::printRegName(raw_ostream &OS,
OS << '%' << getRegisterName(RegNo);
}
-void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
+void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+ StringRef Annot) {
// Try to print any aliases first.
if (!printAliasInstr(MI, OS))
printInstruction(MI, OS);
// If verbose assembly is enabled, we can print some informative comments.
- if (CommentStream)
+ if (CommentStream) {
+ printAnnotation(OS, Annot);
EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
+ }
}
StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const {
@@ -90,7 +93,8 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
if (Op.isReg()) {
O << '%' << getRegisterName(Op.getReg());
} else if (Op.isImm()) {
- O << '$' << Op.getImm();
+ // Print X86 immediates as signed values.
+ O << '$' << (int64_t)Op.getImm();
if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256))
*CommentStream << format("imm = 0x%llX\n", (long long)Op.getImm());
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index 5426e5c..0293869 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -25,7 +25,7 @@ public:
X86ATTInstPrinter(const MCAsmInfo &MAI);
virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
- virtual void printInst(const MCInst *MI, raw_ostream &OS);
+ virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot);
virtual StringRef getOpcodeName(unsigned Opcode) const;
// Autogenerated by tblgen, returns true if we successfully printed an
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 4e28dfe..8d85b95 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -14,9 +14,9 @@
#include "X86InstComments.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "Utils/X86ShuffleDecode.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/raw_ostream.h"
-#include "../Utils/X86ShuffleDecode.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -136,9 +136,11 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
break;
case X86::SHUFPDrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::SHUFPDrmi:
DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
- Src2Name = getRegName(MI->getOperand(2).getReg());
break;
case X86::SHUFPSrri:
@@ -205,6 +207,31 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DecodeUNPCKHPMask(4, ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
+ case X86::VPERMILPSri:
+ DecodeVPERMILPSMask(4, MI->getOperand(2).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VPERMILPSYri:
+ DecodeVPERMILPSMask(8, MI->getOperand(2).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VPERMILPDri:
+ DecodeVPERMILPDMask(2, MI->getOperand(2).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VPERMILPDYri:
+ DecodeVPERMILPDMask(4, MI->getOperand(2).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VPERM2F128rr:
+ DecodeVPERM2F128Mask(MI->getOperand(3).getImm(), ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ break;
}
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index 506e26c..f9ab5ae 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -32,12 +32,15 @@ void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
OS << getRegisterName(RegNo);
}
-void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
+void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+ StringRef Annot) {
printInstruction(MI, OS);
// If verbose assembly is enabled, we can print some informative comments.
- if (CommentStream)
+ if (CommentStream) {
+ printAnnotation(OS, Annot);
EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
+ }
}
StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const {
return getInstructionName(Opcode);
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index e84a194..6d5ec62 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -27,7 +27,7 @@ public:
: MCInstPrinter(MAI) {}
virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
- virtual void printInst(const MCInst *MI, raw_ostream &OS);
+ virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot);
virtual StringRef getOpcodeName(unsigned Opcode) const;
// Autogenerated by tblgen.
diff --git a/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/lib/Target/X86/MCTargetDesc/CMakeLists.txt
index ca88f8f..8721912 100644
--- a/lib/Target/X86/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/X86/MCTargetDesc/CMakeLists.txt
@@ -1,7 +1,20 @@
add_llvm_library(LLVMX86Desc
+ X86AsmBackend.cpp
X86MCTargetDesc.cpp
X86MCAsmInfo.cpp
+ X86MCCodeEmitter.cpp
+ X86MachObjectWriter.cpp
)
+add_llvm_library_dependencies(LLVMX86Desc
+ LLVMMC
+ LLVMSupport
+ LLVMX86AsmPrinter
+ LLVMX86AsmPrinter
+ LLVMX86Info
+ )
+
+add_dependencies(LLVMX86Desc X86CommonTableGen)
+
# Hack: we need to include 'main' target directory to grab private headers
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 9b556a5..69ad7d7 100644
--- a/lib/Target/X86/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -7,9 +7,9 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Target/TargetAsmBackend.h"
-#include "X86.h"
-#include "X86FixupKinds.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "MCTargetDesc/X86BaseInfo.h"
+#include "MCTargetDesc/X86FixupKinds.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCELFObjectWriter.h"
@@ -24,9 +24,8 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Target/TargetAsmBackend.h"
using namespace llvm;
// Option to allow disabling arithmetic relaxation to workaround PR9807, which
@@ -63,10 +62,10 @@ public:
: MCELFObjectTargetWriter(is64Bit, OSType, EMachine, HasRelocationAddend) {}
};
-class X86AsmBackend : public TargetAsmBackend {
+class X86AsmBackend : public MCAsmBackend {
public:
X86AsmBackend(const Target &T)
- : TargetAsmBackend() {}
+ : MCAsmBackend() {}
unsigned getNumFixupKinds() const {
return X86::NumTargetFixupKinds;
@@ -81,7 +80,7 @@ public:
};
if (Kind < FirstTargetFixupKind)
- return TargetAsmBackend::getFixupKindInfo(Kind);
+ return MCAsmBackend::getFixupKindInfo(Kind);
assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
"Invalid kind!");
@@ -94,6 +93,14 @@ public:
assert(Fixup.getOffset() + Size <= DataSize &&
"Invalid fixup offset!");
+
+ // Check that uppper bits are either all zeros or all ones.
+ // Specifically ignore overflow/underflow as long as the leakage is
+ // limited to the lower bits. This is to remain compatible with
+ // other assemblers.
+ assert(isIntN(Size * 8 + 1, Value) &&
+ "Value does not fit in the Fixup field");
+
for (unsigned i = 0; i != Size; ++i)
Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
}
@@ -426,8 +433,7 @@ public:
} // end anonymous namespace
-TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
- const std::string &TT) {
+MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, StringRef TT) {
Triple TheTriple(TT);
if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
@@ -439,8 +445,7 @@ TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
return new ELFX86_32AsmBackend(T, TheTriple.getOS());
}
-TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
- const std::string &TT) {
+MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, StringRef TT) {
Triple TheTriple(TT);
if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
new file mode 100644
index 0000000..e6ba705
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -0,0 +1,548 @@
+//===-- X86BaseInfo.h - Top level definitions for X86 -------- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the X86 target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86BASEINFO_H
+#define X86BASEINFO_H
+
+#include "X86MCTargetDesc.h"
+#include "llvm/Support/DataTypes.h"
+#include <cassert>
+
+namespace llvm {
+
+namespace X86 {
+ // Enums for memory operand decoding. Each memory operand is represented with
+ // a 5 operand sequence in the form:
+ // [BaseReg, ScaleAmt, IndexReg, Disp, Segment]
+ // These enums help decode this.
+ enum {
+ AddrBaseReg = 0,
+ AddrScaleAmt = 1,
+ AddrIndexReg = 2,
+ AddrDisp = 3,
+
+ /// AddrSegmentReg - The operand # of the segment in the memory operand.
+ AddrSegmentReg = 4,
+
+ /// AddrNumOperands - Total number of operands in a memory reference.
+ AddrNumOperands = 5
+ };
+} // end namespace X86;
+
+
+/// X86II - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace X86II {
+ /// Target Operand Flag enum.
+ enum TOF {
+ //===------------------------------------------------------------------===//
+ // X86 Specific MachineOperand flags.
+
+ MO_NO_FLAG,
+
+ /// MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a
+ /// relocation of:
+ /// SYMBOL_LABEL + [. - PICBASELABEL]
+ MO_GOT_ABSOLUTE_ADDRESS,
+
+ /// MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the
+ /// immediate should get the value of the symbol minus the PIC base label:
+ /// SYMBOL_LABEL - PICBASELABEL
+ MO_PIC_BASE_OFFSET,
+
+ /// MO_GOT - On a symbol operand this indicates that the immediate is the
+ /// offset to the GOT entry for the symbol name from the base of the GOT.
+ ///
+ /// See the X86-64 ELF ABI supplement for more details.
+ /// SYMBOL_LABEL @GOT
+ MO_GOT,
+
+ /// MO_GOTOFF - On a symbol operand this indicates that the immediate is
+ /// the offset to the location of the symbol name from the base of the GOT.
+ ///
+ /// See the X86-64 ELF ABI supplement for more details.
+ /// SYMBOL_LABEL @GOTOFF
+ MO_GOTOFF,
+
+ /// MO_GOTPCREL - On a symbol operand this indicates that the immediate is
+ /// offset to the GOT entry for the symbol name from the current code
+ /// location.
+ ///
+ /// See the X86-64 ELF ABI supplement for more details.
+ /// SYMBOL_LABEL @GOTPCREL
+ MO_GOTPCREL,
+
+ /// MO_PLT - On a symbol operand this indicates that the immediate is
+ /// offset to the PLT entry of symbol name from the current code location.
+ ///
+ /// See the X86-64 ELF ABI supplement for more details.
+ /// SYMBOL_LABEL @PLT
+ MO_PLT,
+
+ /// MO_TLSGD - On a symbol operand this indicates that the immediate is
+ /// some TLS offset.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @TLSGD
+ MO_TLSGD,
+
+ /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is
+ /// some TLS offset.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @GOTTPOFF
+ MO_GOTTPOFF,
+
+ /// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is
+ /// some TLS offset.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @INDNTPOFF
+ MO_INDNTPOFF,
+
+ /// MO_TPOFF - On a symbol operand this indicates that the immediate is
+ /// some TLS offset.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @TPOFF
+ MO_TPOFF,
+
+ /// MO_NTPOFF - On a symbol operand this indicates that the immediate is
+ /// some TLS offset.
+ ///
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// SYMBOL_LABEL @NTPOFF
+ MO_NTPOFF,
+
+ /// MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the "__imp_FOO" symbol. This is used for
+ /// dllimport linkage on windows.
+ MO_DLLIMPORT,
+
+ /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the "FOO$stub" symbol. This is used for calls
+ /// and jumps to external functions on Tiger and earlier.
+ MO_DARWIN_STUB,
+
+ /// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the "FOO$non_lazy_ptr" symbol, which is a
+ /// non-PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
+ MO_DARWIN_NONLAZY,
+
+ /// MO_DARWIN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this indicates
+ /// that the reference is actually to "FOO$non_lazy_ptr - PICBASE", which is
+ /// a PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
+ MO_DARWIN_NONLAZY_PIC_BASE,
+
+ /// MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this
+ /// indicates that the reference is actually to "FOO$non_lazy_ptr -PICBASE",
+ /// which is a PIC-base-relative reference to a hidden dyld lazy pointer
+ /// stub.
+ MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE,
+
+ /// MO_TLVP - On a symbol operand this indicates that the immediate is
+ /// some TLS offset.
+ ///
+ /// This is the TLS offset for the Darwin TLS mechanism.
+ MO_TLVP,
+
+ /// MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate
+ /// is some TLS offset from the picbase.
+ ///
+ /// This is the 32-bit TLS offset for Darwin TLS in PIC mode.
+ MO_TLVP_PIC_BASE
+ };
+
+ enum {
+ //===------------------------------------------------------------------===//
+ // Instruction encodings. These are the standard/most common forms for X86
+ // instructions.
+ //
+
+ // PseudoFrm - This represents an instruction that is a pseudo instruction
+ // or one that has not been implemented yet. It is illegal to code generate
+ // it, but tolerated for intermediate implementation stages.
+ Pseudo = 0,
+
+ /// Raw - This form is for instructions that don't have any operands, so
+ /// they are just a fixed opcode value, like 'leave'.
+ RawFrm = 1,
+
+ /// AddRegFrm - This form is used for instructions like 'push r32' that have
+ /// their one register operand added to their opcode.
+ AddRegFrm = 2,
+
+ /// MRMDestReg - This form is used for instructions that use the Mod/RM byte
+ /// to specify a destination, which in this case is a register.
+ ///
+ MRMDestReg = 3,
+
+ /// MRMDestMem - This form is used for instructions that use the Mod/RM byte
+ /// to specify a destination, which in this case is memory.
+ ///
+ MRMDestMem = 4,
+
+ /// MRMSrcReg - This form is used for instructions that use the Mod/RM byte
+ /// to specify a source, which in this case is a register.
+ ///
+ MRMSrcReg = 5,
+
+ /// MRMSrcMem - This form is used for instructions that use the Mod/RM byte
+ /// to specify a source, which in this case is memory.
+ ///
+ MRMSrcMem = 6,
+
+ /// MRM[0-7][rm] - These forms are used to represent instructions that use
+ /// a Mod/RM byte, and use the middle field to hold extended opcode
+ /// information. In the intel manual these are represented as /0, /1, ...
+ ///
+
+ // First, instructions that operate on a register r/m operand...
+ MRM0r = 16, MRM1r = 17, MRM2r = 18, MRM3r = 19, // Format /0 /1 /2 /3
+ MRM4r = 20, MRM5r = 21, MRM6r = 22, MRM7r = 23, // Format /4 /5 /6 /7
+
+ // Next, instructions that operate on a memory r/m operand...
+ MRM0m = 24, MRM1m = 25, MRM2m = 26, MRM3m = 27, // Format /0 /1 /2 /3
+ MRM4m = 28, MRM5m = 29, MRM6m = 30, MRM7m = 31, // Format /4 /5 /6 /7
+
+ // MRMInitReg - This form is used for instructions whose source and
+ // destinations are the same register.
+ MRMInitReg = 32,
+
+ //// MRM_C1 - A mod/rm byte of exactly 0xC1.
+ MRM_C1 = 33,
+ MRM_C2 = 34,
+ MRM_C3 = 35,
+ MRM_C4 = 36,
+ MRM_C8 = 37,
+ MRM_C9 = 38,
+ MRM_E8 = 39,
+ MRM_F0 = 40,
+ MRM_F8 = 41,
+ MRM_F9 = 42,
+ MRM_D0 = 45,
+ MRM_D1 = 46,
+
+ /// RawFrmImm8 - This is used for the ENTER instruction, which has two
+ /// immediates, the first of which is a 16-bit immediate (specified by
+ /// the imm encoding) and the second is a 8-bit fixed value.
+ RawFrmImm8 = 43,
+
+ /// RawFrmImm16 - This is used for CALL FAR instructions, which have two
+ /// immediates, the first of which is a 16 or 32-bit immediate (specified by
+ /// the imm encoding) and the second is a 16-bit fixed value. In the AMD
+ /// manual, this operand is described as pntr16:32 and pntr16:16
+ RawFrmImm16 = 44,
+
+ FormMask = 63,
+
+ //===------------------------------------------------------------------===//
+ // Actual flags...
+
+ // OpSize - Set if this instruction requires an operand size prefix (0x66),
+ // which most often indicates that the instruction operates on 16 bit data
+ // instead of 32 bit data.
+ OpSize = 1 << 6,
+
+ // AsSize - Set if this instruction requires an operand size prefix (0x67),
+ // which most often indicates that the instruction address 16 bit address
+ // instead of 32 bit address (or 32 bit address in 64 bit mode).
+ AdSize = 1 << 7,
+
+ //===------------------------------------------------------------------===//
+ // Op0Mask - There are several prefix bytes that are used to form two byte
+ // opcodes. These are currently 0x0F, 0xF3, and 0xD8-0xDF. This mask is
+ // used to obtain the setting of this field. If no bits in this field is
+ // set, there is no prefix byte for obtaining a multibyte opcode.
+ //
+ Op0Shift = 8,
+ Op0Mask = 0x1F << Op0Shift,
+
+ // TB - TwoByte - Set if this instruction has a two byte opcode, which
+ // starts with a 0x0F byte before the real opcode.
+ TB = 1 << Op0Shift,
+
+ // REP - The 0xF3 prefix byte indicating repetition of the following
+ // instruction.
+ REP = 2 << Op0Shift,
+
+ // D8-DF - These escape opcodes are used by the floating point unit. These
+ // values must remain sequential.
+ D8 = 3 << Op0Shift, D9 = 4 << Op0Shift,
+ DA = 5 << Op0Shift, DB = 6 << Op0Shift,
+ DC = 7 << Op0Shift, DD = 8 << Op0Shift,
+ DE = 9 << Op0Shift, DF = 10 << Op0Shift,
+
+ // XS, XD - These prefix codes are for single and double precision scalar
+ // floating point operations performed in the SSE registers.
+ XD = 11 << Op0Shift, XS = 12 << Op0Shift,
+
+ // T8, TA, A6, A7 - Prefix after the 0x0F prefix.
+ T8 = 13 << Op0Shift, TA = 14 << Op0Shift,
+ A6 = 15 << Op0Shift, A7 = 16 << Op0Shift,
+
+ // TF - Prefix before and after 0x0F
+ TF = 17 << Op0Shift,
+
+ //===------------------------------------------------------------------===//
+ // REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
+ // They are used to specify GPRs and SSE registers, 64-bit operand size,
+ // etc. We only cares about REX.W and REX.R bits and only the former is
+ // statically determined.
+ //
+ REXShift = Op0Shift + 5,
+ REX_W = 1 << REXShift,
+
+ //===------------------------------------------------------------------===//
+ // This three-bit field describes the size of an immediate operand. Zero is
+ // unused so that we can tell if we forgot to set a value.
+ ImmShift = REXShift + 1,
+ ImmMask = 7 << ImmShift,
+ Imm8 = 1 << ImmShift,
+ Imm8PCRel = 2 << ImmShift,
+ Imm16 = 3 << ImmShift,
+ Imm16PCRel = 4 << ImmShift,
+ Imm32 = 5 << ImmShift,
+ Imm32PCRel = 6 << ImmShift,
+ Imm64 = 7 << ImmShift,
+
+ //===------------------------------------------------------------------===//
+ // FP Instruction Classification... Zero is non-fp instruction.
+
+ // FPTypeMask - Mask for all of the FP types...
+ FPTypeShift = ImmShift + 3,
+ FPTypeMask = 7 << FPTypeShift,
+
+ // NotFP - The default, set for instructions that do not use FP registers.
+ NotFP = 0 << FPTypeShift,
+
+ // ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0
+ ZeroArgFP = 1 << FPTypeShift,
+
+ // OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst
+ OneArgFP = 2 << FPTypeShift,
+
+ // OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a
+ // result back to ST(0). For example, fcos, fsqrt, etc.
+ //
+ OneArgFPRW = 3 << FPTypeShift,
+
+ // TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an
+ // explicit argument, storing the result to either ST(0) or the implicit
+ // argument. For example: fadd, fsub, fmul, etc...
+ TwoArgFP = 4 << FPTypeShift,
+
+ // CompareFP - 2 arg FP instructions which implicitly read ST(0) and an
+ // explicit argument, but have no destination. Example: fucom, fucomi, ...
+ CompareFP = 5 << FPTypeShift,
+
+ // CondMovFP - "2 operand" floating point conditional move instructions.
+ CondMovFP = 6 << FPTypeShift,
+
+ // SpecialFP - Special instruction forms. Dispatch by opcode explicitly.
+ SpecialFP = 7 << FPTypeShift,
+
+ // Lock prefix
+ LOCKShift = FPTypeShift + 3,
+ LOCK = 1 << LOCKShift,
+
+ // Segment override prefixes. Currently we just need ability to address
+ // stuff in gs and fs segments.
+ SegOvrShift = LOCKShift + 1,
+ SegOvrMask = 3 << SegOvrShift,
+ FS = 1 << SegOvrShift,
+ GS = 2 << SegOvrShift,
+
+ // Execution domain for SSE instructions in bits 23, 24.
+ // 0 in bits 23-24 means normal, non-SSE instruction.
+ SSEDomainShift = SegOvrShift + 2,
+
+ OpcodeShift = SSEDomainShift + 2,
+
+ //===------------------------------------------------------------------===//
+ /// VEX - The opcode prefix used by AVX instructions
+ VEXShift = OpcodeShift + 8,
+ VEX = 1U << 0,
+
+ /// VEX_W - Has a opcode specific functionality, but is used in the same
+ /// way as REX_W is for regular SSE instructions.
+ VEX_W = 1U << 1,
+
+ /// VEX_4V - Used to specify an additional AVX/SSE register. Several 2
+ /// address instructions in SSE are represented as 3 address ones in AVX
+ /// and the additional register is encoded in VEX_VVVV prefix.
+ VEX_4V = 1U << 2,
+
+ /// VEX_I8IMM - Specifies that the last register used in a AVX instruction,
+ /// must be encoded in the i8 immediate field. This usually happens in
+ /// instructions with 4 operands.
+ VEX_I8IMM = 1U << 3,
+
+ /// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
+ /// instruction uses 256-bit wide registers. This is usually auto detected
+ /// if a VR256 register is used, but some AVX instructions also have this
+ /// field marked when using a f256 memory references.
+ VEX_L = 1U << 4,
+
+ // VEX_LIG - Specifies that this instruction ignores the L-bit in the VEX
+ // prefix. Usually used for scalar instructions. Needed by disassembler.
+ VEX_LIG = 1U << 5,
+
+ /// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the
+ /// wacky 0x0F 0x0F prefix for 3DNow! instructions. The manual documents
+ /// this as having a 0x0F prefix with a 0x0F opcode, and each instruction
+ /// storing a classifier in the imm8 field. To simplify our implementation,
+ /// we handle this by storeing the classifier in the opcode field and using
+ /// this flag to indicate that the encoder should do the wacky 3DNow! thing.
+ Has3DNow0F0FOpcode = 1U << 6
+ };
+
+ // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
+ // specified machine instruction.
+ //
+ static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) {
+ return TSFlags >> X86II::OpcodeShift;
+ }
+
+ static inline bool hasImm(uint64_t TSFlags) {
+ return (TSFlags & X86II::ImmMask) != 0;
+ }
+
+ /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field
+ /// of the specified instruction.
+ static inline unsigned getSizeOfImm(uint64_t TSFlags) {
+ switch (TSFlags & X86II::ImmMask) {
+ default: assert(0 && "Unknown immediate size");
+ case X86II::Imm8:
+ case X86II::Imm8PCRel: return 1;
+ case X86II::Imm16:
+ case X86II::Imm16PCRel: return 2;
+ case X86II::Imm32:
+ case X86II::Imm32PCRel: return 4;
+ case X86II::Imm64: return 8;
+ }
+ }
+
+ /// isImmPCRel - Return true if the immediate of the specified instruction's
+ /// TSFlags indicates that it is pc relative.
+ static inline unsigned isImmPCRel(uint64_t TSFlags) {
+ switch (TSFlags & X86II::ImmMask) {
+ default: assert(0 && "Unknown immediate size");
+ case X86II::Imm8PCRel:
+ case X86II::Imm16PCRel:
+ case X86II::Imm32PCRel:
+ return true;
+ case X86II::Imm8:
+ case X86II::Imm16:
+ case X86II::Imm32:
+ case X86II::Imm64:
+ return false;
+ }
+ }
+
+ /// getMemoryOperandNo - The function returns the MCInst operand # for the
+ /// first field of the memory operand. If the instruction doesn't have a
+ /// memory operand, this returns -1.
+ ///
+ /// Note that this ignores tied operands. If there is a tied register which
+ /// is duplicated in the MCInst (e.g. "EAX = addl EAX, [mem]") it is only
+ /// counted as one operand.
+ ///
+ static inline int getMemoryOperandNo(uint64_t TSFlags) {
+ switch (TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg: assert(0 && "FIXME: Remove this form");
+ default: assert(0 && "Unknown FormMask value in getMemoryOperandNo!");
+ case X86II::Pseudo:
+ case X86II::RawFrm:
+ case X86II::AddRegFrm:
+ case X86II::MRMDestReg:
+ case X86II::MRMSrcReg:
+ case X86II::RawFrmImm8:
+ case X86II::RawFrmImm16:
+ return -1;
+ case X86II::MRMDestMem:
+ return 0;
+ case X86II::MRMSrcMem: {
+ bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+ unsigned FirstMemOp = 1;
+ if (HasVEX_4V)
+ ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
+
+ // FIXME: Maybe lea should have its own form? This is a horrible hack.
+ //if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
+ // Opcode == X86::LEA16r || Opcode == X86::LEA32r)
+ return FirstMemOp;
+ }
+ case X86II::MRM0r: case X86II::MRM1r:
+ case X86II::MRM2r: case X86II::MRM3r:
+ case X86II::MRM4r: case X86II::MRM5r:
+ case X86II::MRM6r: case X86II::MRM7r:
+ return -1;
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m:
+ return 0;
+ case X86II::MRM_C1:
+ case X86II::MRM_C2:
+ case X86II::MRM_C3:
+ case X86II::MRM_C4:
+ case X86II::MRM_C8:
+ case X86II::MRM_C9:
+ case X86II::MRM_E8:
+ case X86II::MRM_F0:
+ case X86II::MRM_F8:
+ case X86II::MRM_F9:
+ case X86II::MRM_D0:
+ case X86II::MRM_D1:
+ return -1;
+ }
+ }
+
+ /// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or
+ /// higher) register? e.g. r8, xmm8, xmm13, etc.
+ static inline bool isX86_64ExtendedReg(unsigned RegNo) {
+ switch (RegNo) {
+ default: break;
+ case X86::R8: case X86::R9: case X86::R10: case X86::R11:
+ case X86::R12: case X86::R13: case X86::R14: case X86::R15:
+ case X86::R8D: case X86::R9D: case X86::R10D: case X86::R11D:
+ case X86::R12D: case X86::R13D: case X86::R14D: case X86::R15D:
+ case X86::R8W: case X86::R9W: case X86::R10W: case X86::R11W:
+ case X86::R12W: case X86::R13W: case X86::R14W: case X86::R15W:
+ case X86::R8B: case X86::R9B: case X86::R10B: case X86::R11B:
+ case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B:
+ case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11:
+ case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
+ case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11:
+ case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15:
+ case X86::CR8: case X86::CR9: case X86::CR10: case X86::CR11:
+ case X86::CR12: case X86::CR13: case X86::CR14: case X86::CR15:
+ return true;
+ }
+ return false;
+ }
+
+ static inline bool isX86_64NonExtLowByteReg(unsigned reg) {
+ return (reg == X86::SPL || reg == X86::BPL ||
+ reg == X86::SIL || reg == X86::DIL);
+ }
+}
+
+} // end namespace llvm;
+
+#endif
diff --git a/lib/Target/X86/X86FixupKinds.h b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
index 17d242a..17d242a 100644
--- a/lib/Target/X86/X86FixupKinds.h
+++ b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index ce8ef49..2eee112 100644
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -12,12 +12,14 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mccodeemitter"
-#include "X86.h"
-#include "X86InstrInfo.h"
-#include "X86FixupKinds.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "MCTargetDesc/X86BaseInfo.h"
+#include "MCTargetDesc/X86FixupKinds.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/raw_ostream.h"
@@ -45,7 +47,7 @@ public:
}
static unsigned GetX86RegNum(const MCOperand &MO) {
- return X86RegisterInfo::getX86RegNum(MO.getReg());
+ return X86_MC::getX86RegNum(MO.getReg());
}
// On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range
@@ -159,9 +161,11 @@ static MCFixupKind getImmFixupKind(uint64_t TSFlags) {
static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) {
const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
-
- if ((BaseReg.getReg() != 0 && X86::GR32RegClass.contains(BaseReg.getReg())) ||
- (IndexReg.getReg() != 0 && X86::GR32RegClass.contains(IndexReg.getReg())))
+
+ if ((BaseReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg.getReg())) ||
+ (IndexReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg.getReg())))
return true;
return false;
}
@@ -191,11 +195,11 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const {
const MCExpr *Expr = NULL;
if (DispOp.isImm()) {
- // If this is a simple integer displacement that doesn't require a relocation,
- // emit it now.
+ // If this is a simple integer displacement that doesn't require a
+ // relocation, emit it now.
if (FixupKind != FK_PCRel_1 &&
- FixupKind != FK_PCRel_2 &&
- FixupKind != FK_PCRel_4) {
+ FixupKind != FK_PCRel_2 &&
+ FixupKind != FK_PCRel_4) {
EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS);
return;
}
@@ -205,7 +209,9 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
}
// If we have an immoffset, add it to the expression.
- if (FixupKind == FK_Data_4 && StartsWithGlobalOffsetTable(Expr)) {
+ if ((FixupKind == FK_Data_4 ||
+ FixupKind == MCFixupKind(X86::reloc_signed_4byte)) &&
+ StartsWithGlobalOffsetTable(Expr)) {
assert(ImmOffset == 0);
FixupKind = MCFixupKind(X86::reloc_global_offset_table);
@@ -346,7 +352,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
}
// Calculate what the SS field value should be...
- static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 };
+ static const unsigned SSTable[] = { ~0U, 0, 1, ~0U, 2, ~0U, ~0U, ~0U, 3 };
unsigned SS = SSTable[Scale.getImm()];
if (BaseReg == 0) {
@@ -486,71 +492,100 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
VEX_L = 1;
}
- unsigned NumOps = MI.getNumOperands();
+ // Classify VEX_B, VEX_4V, VEX_R, VEX_X
unsigned CurOp = 0;
- bool IsDestMem = false;
-
switch (TSFlags & X86II::FormMask) {
case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
- case X86II::MRMDestMem:
- IsDestMem = true;
- // The important info for the VEX prefix is never beyond the address
- // registers. Don't check beyond that.
- NumOps = CurOp = X86::AddrNumOperands;
+ case X86II::MRMDestMem: {
+ // MRMDestMem instructions forms:
+ // MemAddr, src1(ModR/M)
+ // MemAddr, src1(VEX_4V), src2(ModR/M)
+ // MemAddr, src1(ModR/M), imm8
+ //
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrBaseReg).getReg()))
+ VEX_B = 0x0;
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrIndexReg).getReg()))
+ VEX_X = 0x0;
+
+ CurOp = X86::AddrNumOperands;
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ const MCOperand &MO = MI.getOperand(CurOp);
+ if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg()))
+ VEX_R = 0x0;
+ break;
+ }
+ case X86II::MRMSrcMem: {
+ // MRMSrcMem instructions forms:
+ // src1(ModR/M), MemAddr
+ // src1(ModR/M), src2(VEX_4V), MemAddr
+ // src1(ModR/M), MemAddr, imm8
+ // src1(ModR/M), MemAddr, src2(VEX_I8IMM)
+ //
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ VEX_R = 0x0;
+
+ unsigned MemAddrOffset = 1;
+ if (HasVEX_4V) {
+ VEX_4V = getVEXRegisterEncoding(MI, 1);
+ MemAddrOffset++;
+ }
+
+ if (X86II::isX86_64ExtendedReg(
+ MI.getOperand(MemAddrOffset+X86::AddrBaseReg).getReg()))
+ VEX_B = 0x0;
+ if (X86II::isX86_64ExtendedReg(
+ MI.getOperand(MemAddrOffset+X86::AddrIndexReg).getReg()))
+ VEX_X = 0x0;
+ break;
+ }
case X86II::MRM0m: case X86II::MRM1m:
case X86II::MRM2m: case X86II::MRM3m:
case X86II::MRM4m: case X86II::MRM5m:
case X86II::MRM6m: case X86II::MRM7m:
- case X86II::MRMSrcMem:
+ // MRM[0-9]m instructions forms:
+ // MemAddr
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrBaseReg).getReg()))
+ VEX_B = 0x0;
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrIndexReg).getReg()))
+ VEX_X = 0x0;
+ break;
case X86II::MRMSrcReg:
- if (MI.getNumOperands() > CurOp && MI.getOperand(CurOp).isReg() &&
- X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+ // MRMSrcReg instructions forms:
+ // dst(ModR/M), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
+ // dst(ModR/M), src1(ModR/M)
+ // dst(ModR/M), src1(ModR/M), imm8
+ //
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_R = 0x0;
CurOp++;
- if (HasVEX_4V) {
- VEX_4V = getVEXRegisterEncoding(MI, IsDestMem ? CurOp-1 : CurOp);
- CurOp++;
- }
-
- // To only check operands before the memory address ones, start
- // the search from the beginning
- if (IsDestMem)
- CurOp = 0;
-
- // If the last register should be encoded in the immediate field
- // do not use any bit from VEX prefix to this register, ignore it
- if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM)
- NumOps--;
-
- for (; CurOp != NumOps; ++CurOp) {
- const MCOperand &MO = MI.getOperand(CurOp);
- if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
- VEX_B = 0x0;
- if (!VEX_B && MO.isReg() &&
- ((TSFlags & X86II::FormMask) == X86II::MRMSrcMem) &&
- X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
- VEX_X = 0x0;
- }
- break;
- default: // MRMDestReg, MRM0r-MRM7r, RawFrm
- if (!MI.getNumOperands())
- break;
-
- if (MI.getOperand(CurOp).isReg() &&
- X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
- VEX_B = 0;
-
if (HasVEX_4V)
- VEX_4V = getVEXRegisterEncoding(MI, CurOp);
-
- CurOp++;
- for (; CurOp != NumOps; ++CurOp) {
- const MCOperand &MO = MI.getOperand(CurOp);
- if (MO.isReg() && !HasVEX_4V &&
- X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
- VEX_R = 0x0;
- }
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+ VEX_B = 0x0;
+ break;
+ case X86II::MRMDestReg:
+ // MRMDestReg instructions forms:
+ // dst(ModR/M), src(ModR/M)
+ // dst(ModR/M), src(ModR/M), imm8
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ VEX_B = 0x0;
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
+ VEX_R = 0x0;
+ break;
+ case X86II::MRM0r: case X86II::MRM1r:
+ case X86II::MRM2r: case X86II::MRM3r:
+ case X86II::MRM4r: case X86II::MRM5r:
+ case X86II::MRM6r: case X86II::MRM7r:
+ // MRM0r-MRM7r instructions forms:
+ // dst(VEX_4V), src(ModR/M), imm8
+ VEX_4V = getVEXRegisterEncoding(MI, 0);
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
+ VEX_B = 0x0;
+ break;
+ default: // RawFrm
break;
}
@@ -604,7 +639,7 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
const MCOperand &MO = MI.getOperand(i);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
- if (!X86InstrInfo::isX86_64NonExtLowByteReg(Reg)) continue;
+ if (!X86II::isX86_64NonExtLowByteReg(Reg)) continue;
// FIXME: The caller of DetermineREXPrefix slaps this prefix onto anything
// that returns non-zero.
REX |= 0x40; // REX fixed encoding prefix
@@ -615,25 +650,25 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
case X86II::MRMSrcReg:
if (MI.getOperand(0).isReg() &&
- X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
REX |= 1 << 2; // set REX.R
i = isTwoAddr ? 2 : 1;
for (; i != NumOps; ++i) {
const MCOperand &MO = MI.getOperand(i);
- if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+ if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg()))
REX |= 1 << 0; // set REX.B
}
break;
case X86II::MRMSrcMem: {
if (MI.getOperand(0).isReg() &&
- X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
REX |= 1 << 2; // set REX.R
unsigned Bit = 0;
i = isTwoAddr ? 2 : 1;
for (; i != NumOps; ++i) {
const MCOperand &MO = MI.getOperand(i);
if (MO.isReg()) {
- if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+ if (X86II::isX86_64ExtendedReg(MO.getReg()))
REX |= 1 << Bit; // set REX.B (Bit=0) and REX.X (Bit=1)
Bit++;
}
@@ -648,13 +683,13 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands);
i = isTwoAddr ? 1 : 0;
if (NumOps > e && MI.getOperand(e).isReg() &&
- X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e).getReg()))
+ X86II::isX86_64ExtendedReg(MI.getOperand(e).getReg()))
REX |= 1 << 2; // set REX.R
unsigned Bit = 0;
for (; i != e; ++i) {
const MCOperand &MO = MI.getOperand(i);
if (MO.isReg()) {
- if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+ if (X86II::isX86_64ExtendedReg(MO.getReg()))
REX |= 1 << Bit; // REX.B (Bit=0) and REX.X (Bit=1)
Bit++;
}
@@ -663,12 +698,12 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
}
default:
if (MI.getOperand(0).isReg() &&
- X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
REX |= 1 << 0; // set REX.B
i = isTwoAddr ? 2 : 1;
for (unsigned e = NumOps; i != e; ++i) {
const MCOperand &MO = MI.getOperand(i);
- if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+ if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg()))
REX |= 1 << 2; // set REX.R
}
break;
@@ -731,7 +766,7 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
if ((TSFlags & X86II::AdSize) ||
(MemOperand != -1 && is64BitMode() && Is32BitMemOperand(MI, MemOperand)))
EmitByte(0x67, CurByte, OS);
-
+
// Emit the operand size opcode prefix as needed.
if (TSFlags & X86II::OpSize)
EmitByte(0x66, CurByte, OS);
@@ -834,7 +869,6 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V)
HasVEX_4V = true;
-
// Determine where the memory operand starts, if present.
int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
if (MemoryOperand != -1) MemoryOperand += CurOp;
@@ -844,12 +878,11 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
else
EmitVEXOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS);
-
unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
-
+
if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode)
BaseOpcode = 0x0F; // Weird 3DNow! encoding.
-
+
unsigned SrcRegNum = 0;
switch (TSFlags & X86II::FormMask) {
case X86II::MRMInitReg:
@@ -861,7 +894,6 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::RawFrm:
EmitByte(BaseOpcode, CurByte, OS);
break;
-
case X86II::RawFrmImm8:
EmitByte(BaseOpcode, CurByte, OS);
EmitImmediate(MI.getOperand(CurOp++),
@@ -1006,8 +1038,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
// in bits[7:4] of a immediate byte, and bits[3:0] are ignored.
if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) {
const MCOperand &MO = MI.getOperand(CurOp++);
- bool IsExtReg =
- X86InstrInfo::isX86_64ExtendedReg(MO.getReg());
+ bool IsExtReg = X86II::isX86_64ExtendedReg(MO.getReg());
unsigned RegNum = (IsExtReg ? (1 << 7) : 0);
RegNum |= GetX86RegNum(MO) << 4;
EmitImmediate(MCOperand::CreateImm(RegNum), 1, FK_Data_1, CurByte, OS,
@@ -1030,7 +1061,6 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode)
EmitByte(X86II::getBaseOpcodeFor(TSFlags), CurByte, OS);
-
#ifndef NDEBUG
// FIXME: Verify.
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index b77f37b..f98d5e3 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -13,12 +13,18 @@
#include "X86MCTargetDesc.h"
#include "X86MCAsmInfo.h"
+#include "InstPrinter/X86ATTInstPrinter.h"
+#include "InstPrinter/X86IntelInstPrinter.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Support/Host.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_REGINFO_MC_DESC
#include "X86GenRegisterInfo.inc"
@@ -34,9 +40,16 @@ using namespace llvm;
std::string X86_MC::ParseX86Triple(StringRef TT) {
Triple TheTriple(TT);
+ std::string FS;
if (TheTriple.getArch() == Triple::x86_64)
- return "+64bit-mode";
- return "-64bit-mode";
+ FS = "+64bit-mode";
+ else
+ FS = "-64bit-mode";
+ if (TheTriple.getOS() == Triple::NativeClient)
+ FS += ",+nacl-mode";
+ else
+ FS += ",-nacl-mode";
+ return FS;
}
/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
@@ -107,6 +120,135 @@ void X86_MC::DetectFamilyModel(unsigned EAX, unsigned &Family,
}
}
+unsigned X86_MC::getDwarfRegFlavour(StringRef TT, bool isEH) {
+ Triple TheTriple(TT);
+ if (TheTriple.getArch() == Triple::x86_64)
+ return DWARFFlavour::X86_64;
+
+ if (TheTriple.isOSDarwin())
+ return isEH ? DWARFFlavour::X86_32_DarwinEH : DWARFFlavour::X86_32_Generic;
+ if (TheTriple.getOS() == Triple::MinGW32 ||
+ TheTriple.getOS() == Triple::Cygwin)
+ // Unsupported by now, just quick fallback
+ return DWARFFlavour::X86_32_Generic;
+ return DWARFFlavour::X86_32_Generic;
+}
+
+/// getX86RegNum - This function maps LLVM register identifiers to their X86
+/// specific numbering, which is used in various places encoding instructions.
+unsigned X86_MC::getX86RegNum(unsigned RegNo) {
+ switch(RegNo) {
+ case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX;
+ case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX;
+ case X86::RDX: case X86::EDX: case X86::DX: case X86::DL: return N86::EDX;
+ case X86::RBX: case X86::EBX: case X86::BX: case X86::BL: return N86::EBX;
+ case X86::RSP: case X86::ESP: case X86::SP: case X86::SPL: case X86::AH:
+ return N86::ESP;
+ case X86::RBP: case X86::EBP: case X86::BP: case X86::BPL: case X86::CH:
+ return N86::EBP;
+ case X86::RSI: case X86::ESI: case X86::SI: case X86::SIL: case X86::DH:
+ return N86::ESI;
+ case X86::RDI: case X86::EDI: case X86::DI: case X86::DIL: case X86::BH:
+ return N86::EDI;
+
+ case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B:
+ return N86::EAX;
+ case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B:
+ return N86::ECX;
+ case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B:
+ return N86::EDX;
+ case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B:
+ return N86::EBX;
+ case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B:
+ return N86::ESP;
+ case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B:
+ return N86::EBP;
+ case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B:
+ return N86::ESI;
+ case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
+ return N86::EDI;
+
+ case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3:
+ case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7:
+ return RegNo-X86::ST0;
+
+ case X86::XMM0: case X86::XMM8:
+ case X86::YMM0: case X86::YMM8: case X86::MM0:
+ return 0;
+ case X86::XMM1: case X86::XMM9:
+ case X86::YMM1: case X86::YMM9: case X86::MM1:
+ return 1;
+ case X86::XMM2: case X86::XMM10:
+ case X86::YMM2: case X86::YMM10: case X86::MM2:
+ return 2;
+ case X86::XMM3: case X86::XMM11:
+ case X86::YMM3: case X86::YMM11: case X86::MM3:
+ return 3;
+ case X86::XMM4: case X86::XMM12:
+ case X86::YMM4: case X86::YMM12: case X86::MM4:
+ return 4;
+ case X86::XMM5: case X86::XMM13:
+ case X86::YMM5: case X86::YMM13: case X86::MM5:
+ return 5;
+ case X86::XMM6: case X86::XMM14:
+ case X86::YMM6: case X86::YMM14: case X86::MM6:
+ return 6;
+ case X86::XMM7: case X86::XMM15:
+ case X86::YMM7: case X86::YMM15: case X86::MM7:
+ return 7;
+
+ case X86::ES: return 0;
+ case X86::CS: return 1;
+ case X86::SS: return 2;
+ case X86::DS: return 3;
+ case X86::FS: return 4;
+ case X86::GS: return 5;
+
+ case X86::CR0: case X86::CR8 : case X86::DR0: return 0;
+ case X86::CR1: case X86::CR9 : case X86::DR1: return 1;
+ case X86::CR2: case X86::CR10: case X86::DR2: return 2;
+ case X86::CR3: case X86::CR11: case X86::DR3: return 3;
+ case X86::CR4: case X86::CR12: case X86::DR4: return 4;
+ case X86::CR5: case X86::CR13: case X86::DR5: return 5;
+ case X86::CR6: case X86::CR14: case X86::DR6: return 6;
+ case X86::CR7: case X86::CR15: case X86::DR7: return 7;
+
+ // Pseudo index registers are equivalent to a "none"
+ // scaled index (See Intel Manual 2A, table 2-3)
+ case X86::EIZ:
+ case X86::RIZ:
+ return 4;
+
+ default:
+ assert((int(RegNo) > 0) && "Unknown physical register!");
+ return 0;
+ }
+}
+
+void X86_MC::InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI) {
+ // FIXME: TableGen these.
+ for (unsigned Reg = X86::NoRegister+1; Reg < X86::NUM_TARGET_REGS; ++Reg) {
+ int SEH = X86_MC::getX86RegNum(Reg);
+ switch (Reg) {
+ case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B:
+ case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B:
+ case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B:
+ case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B:
+ case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B:
+ case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B:
+ case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B:
+ case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
+ case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11:
+ case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
+ case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11:
+ case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15:
+ SEH += 8;
+ break;
+ }
+ MRI->mapLLVMRegToSEHReg(Reg, SEH);
+ }
+}
+
MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
StringRef FS) {
std::string ArchFS = X86_MC::ParseX86Triple(TT);
@@ -131,55 +273,191 @@ MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
return X;
}
-// Force static initialization.
-extern "C" void LLVMInitializeX86MCSubtargetInfo() {
- TargetRegistry::RegisterMCSubtargetInfo(TheX86_32Target,
- X86_MC::createX86MCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheX86_64Target,
- X86_MC::createX86MCSubtargetInfo);
-}
-
static MCInstrInfo *createX86MCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitX86MCInstrInfo(X);
return X;
}
-extern "C" void LLVMInitializeX86MCInstrInfo() {
- TargetRegistry::RegisterMCInstrInfo(TheX86_32Target, createX86MCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheX86_64Target, createX86MCInstrInfo);
-}
+static MCRegisterInfo *createX86MCRegisterInfo(StringRef TT) {
+ Triple TheTriple(TT);
+ unsigned RA = (TheTriple.getArch() == Triple::x86_64)
+ ? X86::RIP // Should have dwarf #16.
+ : X86::EIP; // Should have dwarf #8.
-static MCRegisterInfo *createX86MCRegisterInfo() {
MCRegisterInfo *X = new MCRegisterInfo();
- InitX86MCRegisterInfo(X);
+ InitX86MCRegisterInfo(X, RA,
+ X86_MC::getDwarfRegFlavour(TT, false),
+ X86_MC::getDwarfRegFlavour(TT, true));
+ X86_MC::InitLLVM2SEHRegisterMapping(X);
return X;
}
-extern "C" void LLVMInitializeX86MCRegInfo() {
- TargetRegistry::RegisterMCRegInfo(TheX86_32Target, createX86MCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheX86_64Target, createX86MCRegisterInfo);
-}
-
-
static MCAsmInfo *createX86MCAsmInfo(const Target &T, StringRef TT) {
Triple TheTriple(TT);
+ bool is64Bit = TheTriple.getArch() == Triple::x86_64;
+ MCAsmInfo *MAI;
if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) {
- if (TheTriple.getArch() == Triple::x86_64)
- return new X86_64MCAsmInfoDarwin(TheTriple);
+ if (is64Bit)
+ MAI = new X86_64MCAsmInfoDarwin(TheTriple);
else
- return new X86MCAsmInfoDarwin(TheTriple);
+ MAI = new X86MCAsmInfoDarwin(TheTriple);
+ } else if (TheTriple.isOSWindows()) {
+ MAI = new X86MCAsmInfoCOFF(TheTriple);
+ } else {
+ MAI = new X86ELFMCAsmInfo(TheTriple);
}
+ // Initialize initial frame state.
+ // Calculate amount of bytes used for return address storing
+ int stackGrowth = is64Bit ? -8 : -4;
+
+ // Initial state of the frame pointer is esp+stackGrowth.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(is64Bit ? X86::RSP : X86::ESP, stackGrowth);
+ MAI->addInitialFrameState(0, Dst, Src);
+
+ // Add return address to move list
+ MachineLocation CSDst(is64Bit ? X86::RSP : X86::ESP, stackGrowth);
+ MachineLocation CSSrc(is64Bit ? X86::RIP : X86::EIP);
+ MAI->addInitialFrameState(0, CSDst, CSSrc);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+
+ Triple T(TT);
+ bool is64Bit = T.getArch() == Triple::x86_64;
+
+ if (RM == Reloc::Default) {
+ // Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode.
+ // Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we
+ // use static relocation model by default.
+ if (T.isOSDarwin()) {
+ if (is64Bit)
+ RM = Reloc::PIC_;
+ else
+ RM = Reloc::DynamicNoPIC;
+ } else if (T.isOSWindows() && is64Bit)
+ RM = Reloc::PIC_;
+ else
+ RM = Reloc::Static;
+ }
+
+ // ELF and X86-64 don't have a distinct DynamicNoPIC model. DynamicNoPIC
+ // is defined as a model for code which may be used in static or dynamic
+ // executables but not necessarily a shared library. On X86-32 we just
+ // compile in -static mode, in x86-64 we use PIC.
+ if (RM == Reloc::DynamicNoPIC) {
+ if (is64Bit)
+ RM = Reloc::PIC_;
+ else if (!T.isOSDarwin())
+ RM = Reloc::Static;
+ }
+
+ // If we are on Darwin, disallow static relocation model in X86-64 mode, since
+ // the Mach-O file format doesn't support it.
+ if (RM == Reloc::Static && T.isOSDarwin() && is64Bit)
+ RM = Reloc::PIC_;
+
+ // For static codegen, if we're not already set, use Small codegen.
+ if (CM == CodeModel::Default)
+ CM = CodeModel::Small;
+ else if (CM == CodeModel::JITDefault)
+ // 64-bit JIT places everything in the same buffer except external funcs.
+ CM = is64Bit ? CodeModel::Large : CodeModel::Small;
+
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
+}
+
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &_OS,
+ MCCodeEmitter *_Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
+ return createMachOStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll);
+
if (TheTriple.isOSWindows())
- return new X86MCAsmInfoCOFF(TheTriple);
+ return createWinCOFFStreamer(Ctx, MAB, *_Emitter, _OS, RelaxAll);
+
+ return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
+}
+
+static MCInstPrinter *createX86MCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCSubtargetInfo &STI) {
+ if (SyntaxVariant == 0)
+ return new X86ATTInstPrinter(MAI);
+ if (SyntaxVariant == 1)
+ return new X86IntelInstPrinter(MAI);
+ return 0;
+}
- return new X86ELFMCAsmInfo(TheTriple);
+static MCInstrAnalysis *createX86MCInstrAnalysis(const MCInstrInfo *Info) {
+ return new MCInstrAnalysis(Info);
}
-extern "C" void LLVMInitializeX86MCAsmInfo() {
- // Register the target asm info.
+// Force static initialization.
+extern "C" void LLVMInitializeX86TargetMC() {
+ // Register the MC asm info.
RegisterMCAsmInfoFn A(TheX86_32Target, createX86MCAsmInfo);
RegisterMCAsmInfoFn B(TheX86_64Target, createX86MCAsmInfo);
+
+ // Register the MC codegen info.
+ RegisterMCCodeGenInfoFn C(TheX86_32Target, createX86MCCodeGenInfo);
+ RegisterMCCodeGenInfoFn D(TheX86_64Target, createX86MCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheX86_32Target, createX86MCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheX86_64Target, createX86MCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheX86_32Target, createX86MCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheX86_64Target, createX86MCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheX86_32Target,
+ X86_MC::createX86MCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(TheX86_64Target,
+ X86_MC::createX86MCSubtargetInfo);
+
+ // Register the MC instruction analyzer.
+ TargetRegistry::RegisterMCInstrAnalysis(TheX86_32Target,
+ createX86MCInstrAnalysis);
+ TargetRegistry::RegisterMCInstrAnalysis(TheX86_64Target,
+ createX86MCInstrAnalysis);
+
+ // Register the code emitter.
+ TargetRegistry::RegisterMCCodeEmitter(TheX86_32Target,
+ createX86MCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(TheX86_64Target,
+ createX86MCCodeEmitter);
+
+ // Register the asm backend.
+ TargetRegistry::RegisterMCAsmBackend(TheX86_32Target,
+ createX86_32AsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(TheX86_64Target,
+ createX86_64AsmBackend);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterMCObjectStreamer(TheX86_32Target,
+ createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(TheX86_64Target,
+ createMCStreamer);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(TheX86_32Target,
+ createX86MCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(TheX86_64Target,
+ createX86MCInstPrinter);
}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index 89ea22b..c144c51 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -14,15 +14,39 @@
#ifndef X86MCTARGETDESC_H
#define X86MCTARGETDESC_H
+#include "llvm/Support/DataTypes.h"
#include <string>
namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
class MCSubtargetInfo;
class Target;
class StringRef;
+class raw_ostream;
extern Target TheX86_32Target, TheX86_64Target;
+/// DWARFFlavour - Flavour of dwarf regnumbers
+///
+namespace DWARFFlavour {
+ enum {
+ X86_64 = 0, X86_32_DarwinEH = 1, X86_32_Generic = 2
+ };
+}
+
+/// N86 namespace - Native X86 register numbers
+///
+namespace N86 {
+ enum {
+ EAX = 0, ECX = 1, EDX = 2, EBX = 3, ESP = 4, EBP = 5, ESI = 6, EDI = 7
+ };
+}
+
namespace X86_MC {
std::string ParseX86Triple(StringRef TT);
@@ -33,13 +57,32 @@ namespace X86_MC {
void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model);
- /// createARMMCSubtargetInfo - Create a X86 MCSubtargetInfo instance.
+ unsigned getDwarfRegFlavour(StringRef TT, bool isEH);
+
+ unsigned getX86RegNum(unsigned RegNo);
+
+ void InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI);
+
+ /// createX86MCSubtargetInfo - Create a X86 MCSubtargetInfo instance.
/// This is exposed so Asm parser, etc. do not need to go through
/// TargetRegistry.
MCSubtargetInfo *createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
StringRef FS);
}
+MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCAsmBackend *createX86_32AsmBackend(const Target &T, StringRef TT);
+MCAsmBackend *createX86_64AsmBackend(const Target &T, StringRef TT);
+
+/// createX86MachObjectWriter - Construct an X86 Mach-O object writer.
+MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype);
+
} // End llvm namespace
diff --git a/lib/Target/X86/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index 3711038..f0f1982 100644
--- a/lib/Target/X86/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -7,14 +7,14 @@
//
//===----------------------------------------------------------------------===//
-#include "X86.h"
-#include "X86FixupKinds.h"
-#include "llvm/ADT/Twine.h"
+#include "MCTargetDesc/X86FixupKinds.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Object/MachOFormat.h"
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index f16ec02..7d901af 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -862,7 +862,7 @@ define float @bar(float %x) nounwind {
This IR (from PR6194):
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-darwin10.0.0"
%0 = type { double, double }
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index 560947a..b407955 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -2,11 +2,6 @@
// Random ideas for the X86 backend.
//===---------------------------------------------------------------------===//
-We should add support for the "movbe" instruction, which does a byte-swapping
-copy (3-addr bswap + memory support?) This is available on Atom processors.
-
-//===---------------------------------------------------------------------===//
-
This should be one DIV/IDIV instruction, not a libcall:
unsigned test(unsigned long long X, unsigned Y) {
@@ -1222,7 +1217,7 @@ Also check why xmm7 is not used at all in the function.
Take the following:
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-S128"
target triple = "i386-apple-darwin8"
@in_exit.4870.b = internal global i1 false ; <i1*> [#uses=2]
define fastcc void @abort_gzip() noreturn nounwind {
@@ -2066,3 +2061,34 @@ The trick is to match "fetch_and_add(X, -C) == C".
//===---------------------------------------------------------------------===//
+unsigned log2(unsigned x) {
+ return x > 1 ? 32-__builtin_clz(x-1) : 0;
+}
+
+generates (x86_64):
+ xorl %eax, %eax
+ cmpl $2, %edi
+ jb LBB0_2
+## BB#1:
+ decl %edi
+ movl $63, %ecx
+ bsrl %edi, %eax
+ cmovel %ecx, %eax
+ xorl $-32, %eax
+ addl $33, %eax
+LBB0_2:
+ ret
+
+The cmov and the early test are redundant:
+ xorl %eax, %eax
+ cmpl $2, %edi
+ jb LBB0_2
+## BB#1:
+ decl %edi
+ bsrl %edi, %eax
+ xorl $-32, %eax
+ addl $33, %eax
+LBB0_2:
+ ret
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/TargetInfo/CMakeLists.txt b/lib/Target/X86/TargetInfo/CMakeLists.txt
index 90be9f5..4da00fa 100644
--- a/lib/Target/X86/TargetInfo/CMakeLists.txt
+++ b/lib/Target/X86/TargetInfo/CMakeLists.txt
@@ -4,4 +4,10 @@ add_llvm_library(LLVMX86Info
X86TargetInfo.cpp
)
-add_dependencies(LLVMX86Info X86CodeGenTable_gen)
+add_llvm_library_dependencies(LLVMX86Info
+ LLVMMC
+ LLVMSupport
+ LLVMTarget
+ )
+
+add_dependencies(LLVMX86Info X86CommonTableGen)
diff --git a/lib/Target/X86/TargetInfo/X86TargetInfo.cpp b/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
index 08d4d84..52a67f7 100644
--- a/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
+++ b/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
@@ -9,7 +9,7 @@
#include "X86.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
Target llvm::TheX86_32Target, llvm::TheX86_64Target;
diff --git a/lib/Target/X86/Utils/CMakeLists.txt b/lib/Target/X86/Utils/CMakeLists.txt
index 3ad5f99..caffd8b 100644
--- a/lib/Target/X86/Utils/CMakeLists.txt
+++ b/lib/Target/X86/Utils/CMakeLists.txt
@@ -3,4 +3,10 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
add_llvm_library(LLVMX86Utils
X86ShuffleDecode.cpp
)
-add_dependencies(LLVMX86Utils X86CodeGenTable_gen)
+
+add_llvm_library_dependencies(LLVMX86Utils
+ LLVMCore
+ LLVMSupport
+ )
+
+add_dependencies(LLVMX86Utils X86CommonTableGen)
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index cd06060..aeb3309 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -167,24 +167,77 @@ void DecodeUNPCKLPMask(EVT VT,
SmallVectorImpl<unsigned> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
- // Handle vector lengths > 128 bits. Define a "section" as a set of
- // 128 bits. AVX defines UNPCK* to operate independently on 128-bit
- // sections.
- unsigned NumSections = VT.getSizeInBits() / 128;
- if (NumSections == 0 ) NumSections = 1; // Handle MMX
- unsigned NumSectionElts = NumElts / NumSections;
+ // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
+ // independently on 128-bit lanes.
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ if (NumLanes == 0 ) NumLanes = 1; // Handle MMX
+ unsigned NumLaneElts = NumElts / NumLanes;
unsigned Start = 0;
- unsigned End = NumSectionElts / 2;
- for (unsigned s = 0; s < NumSections; ++s) {
+ unsigned End = NumLaneElts / 2;
+ for (unsigned s = 0; s < NumLanes; ++s) {
for (unsigned i = Start; i != End; ++i) {
ShuffleMask.push_back(i); // Reads from dest/src1
- ShuffleMask.push_back(i+NumSectionElts); // Reads from src/src2
+ ShuffleMask.push_back(i+NumLaneElts); // Reads from src/src2
}
// Process the next 128 bits.
- Start += NumSectionElts;
- End += NumSectionElts;
+ Start += NumLaneElts;
+ End += NumLaneElts;
}
}
+// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit
+// elements. For 256-bit vectors, it's considered as two 128 lanes, the
+// referenced elements can't cross lanes and the mask of the first lane must
+// be the same of the second.
+void DecodeVPERMILPSMask(unsigned NumElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ unsigned NumLanes = (NumElts*32)/128;
+ unsigned LaneSize = NumElts/NumLanes;
+
+ for (unsigned l = 0; l != NumLanes; ++l) {
+ for (unsigned i = 0; i != LaneSize; ++i) {
+ unsigned Idx = (Imm >> (i*2)) & 0x3 ;
+ ShuffleMask.push_back(Idx+(l*LaneSize));
+ }
+ }
+}
+
+// DecodeVPERMILPDMask - Decodes VPERMILPD permutes for any 128-bit 64-bit
+// elements. For 256-bit vectors, it's considered as two 128 lanes, the
+// referenced elements can't cross lanes but the mask of the first lane can
+// be the different of the second (not like VPERMILPS).
+void DecodeVPERMILPDMask(unsigned NumElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ unsigned NumLanes = (NumElts*64)/128;
+ unsigned LaneSize = NumElts/NumLanes;
+
+ for (unsigned l = 0; l < NumLanes; ++l) {
+ for (unsigned i = l*LaneSize; i < LaneSize*(l+1); ++i) {
+ unsigned Idx = (Imm >> i) & 0x1;
+ ShuffleMask.push_back(Idx+(l*LaneSize));
+ }
+ }
+}
+
+void DecodeVPERM2F128Mask(EVT VT, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ unsigned HalfSize = VT.getVectorNumElements()/2;
+ unsigned FstHalfBegin = (Imm & 0x3) * HalfSize;
+ unsigned SndHalfBegin = ((Imm >> 4) & 0x3) * HalfSize;
+
+ for (int i = FstHalfBegin, e = FstHalfBegin+HalfSize; i != e; ++i)
+ ShuffleMask.push_back(i);
+ for (int i = SndHalfBegin, e = SndHalfBegin+HalfSize; i != e; ++i)
+ ShuffleMask.push_back(i);
+}
+
+void DecodeVPERM2F128Mask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ // VPERM2F128 is used by any 256-bit EVT, but X86InstComments only
+ // has information about the instruction and not the types. So for
+ // instruction comments purpose, assume the 256-bit vector is v4i64.
+ return DecodeVPERM2F128Mask(MVT::v4i64, Imm, ShuffleMask);
+}
+
} // llvm namespace
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index b18f670..58193e6 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -82,6 +82,26 @@ void DecodeUNPCKLPDMask(unsigned NElts,
void DecodeUNPCKLPMask(EVT VT,
SmallVectorImpl<unsigned> &ShuffleMask);
+
+// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit
+// elements. For 256-bit vectors, it's considered as two 128 lanes, the
+// referenced elements can't cross lanes and the mask of the first lane must
+// be the same of the second.
+void DecodeVPERMILPSMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+// DecodeVPERMILPDMask - Decodes VPERMILPD permutes for any 128-bit 64-bit
+// elements. For 256-bit vectors, it's considered as two 128 lanes, the
+// referenced elements can't cross lanes but the mask of the first lane can
+// be the different of the second (not like VPERMILPS).
+void DecodeVPERMILPDMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeVPERM2F128Mask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeVPERM2F128Mask(EVT VT, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
} // llvm namespace
#endif
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index ec52dfb..81e9422 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -15,6 +15,7 @@
#ifndef TARGET_X86_H
#define TARGET_X86_H
+#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Target/TargetMachine.h"
@@ -24,16 +25,8 @@ namespace llvm {
class FunctionPass;
class JITCodeEmitter;
class MachineCodeEmitter;
-class MCCodeEmitter;
-class MCContext;
-class MCInstrInfo;
-class MCObjectWriter;
-class MCSubtargetInfo;
class Target;
-class TargetAsmBackend;
class X86TargetMachine;
-class formatted_raw_ostream;
-class raw_ostream;
/// createX86ISelDag - This pass converts a legalized DAG into a
/// X86-specific DAG, ready for instruction scheduling.
@@ -51,22 +44,16 @@ FunctionPass* createGlobalBaseRegPass();
///
FunctionPass *createX86FloatingPointStackifierPass();
-/// createSSEDomainFixPass - This pass twiddles SSE opcodes to prevent domain
-/// crossings.
-FunctionPass *createSSEDomainFixPass();
+/// createX86IssueVZeroUpperPass - This pass inserts AVX vzeroupper instructions
+/// before each call to avoid transition penalty between functions encoded with
+/// AVX and SSE.
+FunctionPass *createX86IssueVZeroUpperPass();
/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
/// to the specified MCE object.
FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
JITCodeEmitter &JCE);
-MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII,
- const MCSubtargetInfo &STI,
- MCContext &Ctx);
-
-TargetAsmBackend *createX86_32AsmBackend(const Target &, const std::string &);
-TargetAsmBackend *createX86_64AsmBackend(const Target &, const std::string &);
-
/// createX86EmitCodeToMemory - Returns a pass that converts a register
/// allocated function into raw machine code in a dynamically
/// allocated chunk of memory.
@@ -79,13 +66,6 @@ FunctionPass *createEmitX86CodeToMemory();
///
FunctionPass *createX86MaxStackAlignmentHeuristicPass();
-
-/// createX86MachObjectWriter - Construct an X86 Mach-O object writer.
-MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS,
- bool Is64Bit,
- uint32_t CPUType,
- uint32_t CPUSubtype);
-
} // End llvm namespace
#endif
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 4ccb43f..104b91f 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This is a target description file for the Intel i386 architecture, referred to
-// here as the "X86" architecture.
+// This is a target description file for the Intel i386 architecture, referred
+// to here as the "X86" architecture.
//
//===----------------------------------------------------------------------===//
@@ -23,6 +23,9 @@ include "llvm/Target/Target.td"
def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
"64-bit mode (x86_64)">;
+def ModeNaCl : SubtargetFeature<"nacl-mode", "InNaClMode", "true",
+ "Native Client mode">;
+
//===----------------------------------------------------------------------===//
// X86 Subtarget features.
//===----------------------------------------------------------------------===//
@@ -68,6 +71,9 @@ def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
"Support 64-bit instructions",
[FeatureCMOV]>;
+def FeatureCMPXCHG16B : SubtargetFeature<"cmpxchg16b", "HasCmpxchg16b", "true",
+ "64-bit with cmpxchg16b",
+ [Feature64Bit]>;
def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
"Bit testing of memory is slow">;
def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem",
@@ -90,6 +96,16 @@ def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
"Allow unaligned memory operands on vector/SIMD instructions">;
def FeatureAES : SubtargetFeature<"aes", "HasAES", "true",
"Enable AES instructions">;
+def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true",
+ "Support MOVBE instruction">;
+def FeatureRDRAND : SubtargetFeature<"rdrand", "HasRDRAND", "true",
+ "Support RDRAND instruction">;
+def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true",
+ "Support 16-bit floating point conversion instructions">;
+def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
+ "Support LZCNT instruction">;
+def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true",
+ "Support BMI instructions">;
//===----------------------------------------------------------------------===//
// X86 processors supported.
@@ -112,27 +128,43 @@ def : Proc<"pentium3m", [FeatureSSE1, FeatureSlowBTMem]>;
def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>;
def : Proc<"pentium4", [FeatureSSE2]>;
def : Proc<"pentium4m", [FeatureSSE2, FeatureSlowBTMem]>;
-def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem]>;
def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>;
-def : Proc<"nocona", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
-def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>;
-def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>;
-def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"nocona", [FeatureSSE3, FeatureCMPXCHG16B,
+ FeatureSlowBTMem]>;
+def : Proc<"core2", [FeatureSSSE3, FeatureCMPXCHG16B,
+ FeatureSlowBTMem]>;
+def : Proc<"penryn", [FeatureSSE41, FeatureCMPXCHG16B,
+ FeatureSlowBTMem]>;
+def : Proc<"atom", [FeatureSSE3, FeatureCMPXCHG16B, FeatureMOVBE,
+ FeatureSlowBTMem]>;
// "Arrandale" along with corei3 and corei5
-def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
- FeatureFastUAMem, FeatureAES]>;
-def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
- FeatureFastUAMem]>;
+def : Proc<"corei7", [FeatureSSE42, FeatureCMPXCHG16B,
+ FeatureSlowBTMem, FeatureFastUAMem, FeatureAES]>;
+def : Proc<"nehalem", [FeatureSSE42, FeatureCMPXCHG16B,
+ FeatureSlowBTMem, FeatureFastUAMem]>;
// Westmere is a similar machine to nehalem with some additional features.
// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
-def : Proc<"westmere", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
- FeatureFastUAMem, FeatureAES, FeatureCLMUL]>;
+def : Proc<"westmere", [FeatureSSE42, FeatureCMPXCHG16B,
+ FeatureSlowBTMem, FeatureFastUAMem, FeatureAES,
+ FeatureCLMUL]>;
+// Sandy Bridge
// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
// rather than a superset.
// FIXME: Disabling AVX for now since it's not ready.
-def : Proc<"corei7-avx", [FeatureSSE42, Feature64Bit,
+def : Proc<"corei7-avx", [FeatureSSE42, FeatureCMPXCHG16B,
FeatureAES, FeatureCLMUL]>;
+// Ivy Bridge
+def : Proc<"core-avx-i", [FeatureSSE42, FeatureCMPXCHG16B,
+ FeatureAES, FeatureCLMUL,
+ FeatureRDRAND, FeatureF16C]>;
+
+// Haswell
+def : Proc<"core-avx2", [FeatureSSE42, FeatureCMPXCHG16B, FeatureAES,
+ FeatureCLMUL, FeatureRDRAND, FeatureF16C,
+ FeatureFMA3, FeatureMOVBE, FeatureLZCNT,
+ FeatureBMI]>;
def : Proc<"k6", [FeatureMMX]>;
def : Proc<"k6-2", [Feature3DNow]>;
@@ -150,19 +182,21 @@ def : Proc<"athlon64", [FeatureSSE2, Feature3DNowA, Feature64Bit,
FeatureSlowBTMem]>;
def : Proc<"athlon-fx", [FeatureSSE2, Feature3DNowA, Feature64Bit,
FeatureSlowBTMem]>;
-def : Proc<"k8-sse3", [FeatureSSE3, Feature3DNowA, Feature64Bit,
+def : Proc<"k8-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
-def : Proc<"opteron-sse3", [FeatureSSE3, Feature3DNowA, Feature64Bit,
+def : Proc<"opteron-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
-def : Proc<"athlon64-sse3", [FeatureSSE3, Feature3DNowA, Feature64Bit,
+def : Proc<"athlon64-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
def : Proc<"amdfam10", [FeatureSSE3, FeatureSSE4A,
- Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
+ Feature3DNowA, FeatureCMPXCHG16B,
+ FeatureSlowBTMem]>;
def : Proc<"barcelona", [FeatureSSE3, FeatureSSE4A,
- Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
-def : Proc<"istanbul", [Feature3DNowA, Feature64Bit, FeatureSSE4A,
- Feature3DNowA]>;
-def : Proc<"shanghai", [Feature3DNowA, Feature64Bit, FeatureSSE4A,
+ Feature3DNowA, FeatureCMPXCHG16B,
+ FeatureSlowBTMem]>;
+def : Proc<"istanbul", [Feature3DNowA, FeatureCMPXCHG16B,
+ FeatureSSE4A, Feature3DNowA]>;
+def : Proc<"shanghai", [Feature3DNowA, FeatureCMPXCHG16B, FeatureSSE4A,
Feature3DNowA]>;
def : Proc<"winchip-c6", [FeatureMMX]>;
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 99b4479..4c3ff02 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -35,12 +35,12 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/COFF.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/ADT/SmallString.h"
using namespace llvm;
@@ -504,8 +504,8 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
// .indirect_symbol _foo
OutStreamer.EmitSymbolAttribute(Stubs[i].second.getPointer(),
MCSA_IndirectSymbol);
- // hlt; hlt; hlt; hlt; hlt hlt = 0xf4 = -12.
- const char HltInsts[] = { -12, -12, -12, -12, -12 };
+ // hlt; hlt; hlt; hlt; hlt hlt = 0xf4.
+ const char HltInsts[] = "\xf4\xf4\xf4\xf4\xf4";
OutStreamer.EmitBytes(StringRef(HltInsts, 5), 0/*addrspace*/);
}
@@ -708,21 +708,8 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
// Target Registry Stuff
//===----------------------------------------------------------------------===//
-static MCInstPrinter *createX86MCInstPrinter(const Target &T,
- unsigned SyntaxVariant,
- const MCAsmInfo &MAI) {
- if (SyntaxVariant == 0)
- return new X86ATTInstPrinter(MAI);
- if (SyntaxVariant == 1)
- return new X86IntelInstPrinter(MAI);
- return 0;
-}
-
// Force static initialization.
extern "C" void LLVMInitializeX86AsmPrinter() {
RegisterAsmPrinter<X86AsmPrinter> X(TheX86_32Target);
RegisterAsmPrinter<X86AsmPrinter> Y(TheX86_64Target);
-
- TargetRegistry::RegisterMCInstPrinter(TheX86_32Target,createX86MCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheX86_64Target,createX86MCInstPrinter);
}
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index 4b11db7..aeff03a 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -98,8 +98,6 @@ namespace {
void emitMemModRMByte(const MachineInstr &MI,
unsigned Op, unsigned RegOpcodeField,
intptr_t PCAdj = 0);
-
- unsigned getX86RegNum(unsigned RegNo) const;
};
template<class CodeEmitter>
@@ -169,7 +167,7 @@ static unsigned determineREX(const MachineInstr &MI) {
const MachineOperand& MO = MI.getOperand(i);
if (MO.isReg()) {
unsigned Reg = MO.getReg();
- if (X86InstrInfo::isX86_64NonExtLowByteReg(Reg))
+ if (X86II::isX86_64NonExtLowByteReg(Reg))
REX |= 0x40;
}
}
@@ -346,11 +344,6 @@ void Emitter<CodeEmitter>::emitJumpTableAddress(unsigned JTI, unsigned Reloc,
MCE.emitWordLE(0);
}
-template<class CodeEmitter>
-unsigned Emitter<CodeEmitter>::getX86RegNum(unsigned RegNo) const {
- return X86RegisterInfo::getX86RegNum(RegNo);
-}
-
inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode,
unsigned RM) {
assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!");
@@ -360,7 +353,7 @@ inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode,
template<class CodeEmitter>
void Emitter<CodeEmitter>::emitRegModRMByte(unsigned ModRMReg,
unsigned RegOpcodeFld){
- MCE.emitByte(ModRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg)));
+ MCE.emitByte(ModRMByte(3, RegOpcodeFld, X86_MC::getX86RegNum(ModRMReg)));
}
template<class CodeEmitter>
@@ -498,7 +491,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
// 2-7) and absolute references.
unsigned BaseRegNo = -1U;
if (BaseReg != 0 && BaseReg != X86::RIP)
- BaseRegNo = getX86RegNum(BaseReg);
+ BaseRegNo = X86_MC::getX86RegNum(BaseReg);
if (// The SIB byte must be used if there is an index register.
IndexReg.getReg() == 0 &&
@@ -566,7 +559,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
}
// Calculate what the SS field value should be...
- static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 };
+ static const unsigned SSTable[] = { ~0U, 0, 1, ~0U, 2, ~0U, ~0U, ~0U, 3 };
unsigned SS = SSTable[Scale.getImm()];
if (BaseReg == 0) {
@@ -574,15 +567,15 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
// Manual 2A, table 2-7. The displacement has already been output.
unsigned IndexRegNo;
if (IndexReg.getReg())
- IndexRegNo = getX86RegNum(IndexReg.getReg());
+ IndexRegNo = X86_MC::getX86RegNum(IndexReg.getReg());
else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5)
IndexRegNo = 4;
emitSIBByte(SS, IndexRegNo, 5);
} else {
- unsigned BaseRegNo = getX86RegNum(BaseReg);
+ unsigned BaseRegNo = X86_MC::getX86RegNum(BaseReg);
unsigned IndexRegNo;
if (IndexReg.getReg())
- IndexRegNo = getX86RegNum(IndexReg.getReg());
+ IndexRegNo = X86_MC::getX86RegNum(IndexReg.getReg());
else
IndexRegNo = 4; // For example [ESP+1*<noreg>+4]
emitSIBByte(SS, IndexRegNo, BaseRegNo);
@@ -809,7 +802,8 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
}
case X86II::AddRegFrm: {
- MCE.emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++).getReg()));
+ MCE.emitByte(BaseOpcode +
+ X86_MC::getX86RegNum(MI.getOperand(CurOp++).getReg()));
if (CurOp == NumOps)
break;
@@ -844,7 +838,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
case X86II::MRMDestReg: {
MCE.emitByte(BaseOpcode);
emitRegModRMByte(MI.getOperand(CurOp).getReg(),
- getX86RegNum(MI.getOperand(CurOp+1).getReg()));
+ X86_MC::getX86RegNum(MI.getOperand(CurOp+1).getReg()));
CurOp += 2;
if (CurOp != NumOps)
emitConstant(MI.getOperand(CurOp++).getImm(),
@@ -854,7 +848,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
case X86II::MRMDestMem: {
MCE.emitByte(BaseOpcode);
emitMemModRMByte(MI, CurOp,
- getX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands)
+ X86_MC::getX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands)
.getReg()));
CurOp += X86::AddrNumOperands + 1;
if (CurOp != NumOps)
@@ -866,7 +860,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
case X86II::MRMSrcReg:
MCE.emitByte(BaseOpcode);
emitRegModRMByte(MI.getOperand(CurOp+1).getReg(),
- getX86RegNum(MI.getOperand(CurOp).getReg()));
+ X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg()));
CurOp += 2;
if (CurOp != NumOps)
emitConstant(MI.getOperand(CurOp++).getImm(),
@@ -880,8 +874,8 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
X86II::getSizeOfImm(Desc->TSFlags) : 0;
MCE.emitByte(BaseOpcode);
- emitMemModRMByte(MI, CurOp+1, getX86RegNum(MI.getOperand(CurOp).getReg()),
- PCAdj);
+ emitMemModRMByte(MI, CurOp+1,
+ X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg()),PCAdj);
CurOp += AddrOperands + 1;
if (CurOp != NumOps)
emitConstant(MI.getOperand(CurOp++).getImm(),
@@ -968,7 +962,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
MCE.emitByte(BaseOpcode);
// Duplicate register, used by things like MOV8r0 (aka xor reg,reg).
emitRegModRMByte(MI.getOperand(CurOp).getReg(),
- getX86RegNum(MI.getOperand(CurOp).getReg()));
+ X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg()));
++CurOp;
break;
diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp
index f1d7ede..4a72d15 100644
--- a/lib/Target/X86/X86ELFWriterInfo.cpp
+++ b/lib/Target/X86/X86ELFWriterInfo.cpp
@@ -147,7 +147,7 @@ long int X86ELFWriterInfo::computeRelocation(unsigned SymOffset,
if (RelTy == ELF::R_X86_64_PC32 || RelTy == ELF::R_386_PC32)
return SymOffset - (RelOffset + 4);
else
- assert("computeRelocation unknown for this relocation type");
+ assert(0 && "computeRelocation unknown for this relocation type");
return 0;
}
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 21e163a..f912b28 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -22,6 +22,7 @@
#include "llvm/CallingConv.h"
#include "llvm/DerivedTypes.h"
#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Operator.h"
@@ -59,8 +60,8 @@ public:
explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
- X86ScalarSSEf64 = Subtarget->hasSSE2();
- X86ScalarSSEf32 = Subtarget->hasSSE1();
+ X86ScalarSSEf64 = Subtarget->hasSSE2() || Subtarget->hasAVX();
+ X86ScalarSSEf32 = Subtarget->hasSSE1() || Subtarget->hasAVX();
}
virtual bool TargetSelectInstruction(const Instruction *I);
@@ -134,7 +135,7 @@ private:
(VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
}
- bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false);
+ bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
bool IsMemcpySmall(uint64_t Len);
@@ -144,7 +145,7 @@ private:
} // end anonymous namespace.
-bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) {
+bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
if (evt == MVT::Other || !evt.isSimple())
// Unhandled type. Halt "fast" selection and bail.
@@ -198,8 +199,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
RC = X86::GR64RegisterClass;
break;
case MVT::f32:
- if (Subtarget->hasSSE1()) {
- Opc = X86::MOVSSrm;
+ if (X86ScalarSSEf32) {
+ Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
RC = X86::FR32RegisterClass;
} else {
Opc = X86::LD_Fp32m;
@@ -207,8 +208,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
}
break;
case MVT::f64:
- if (Subtarget->hasSSE2()) {
- Opc = X86::MOVSDrm;
+ if (X86ScalarSSEf64) {
+ Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
RC = X86::FR64RegisterClass;
} else {
Opc = X86::LD_Fp64m;
@@ -250,10 +251,12 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
case MVT::i32: Opc = X86::MOV32mr; break;
case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
case MVT::f32:
- Opc = Subtarget->hasSSE1() ? X86::MOVSSmr : X86::ST_Fp32m;
+ Opc = X86ScalarSSEf32 ?
+ (Subtarget->hasAVX() ? X86::VMOVSSmr : X86::MOVSSmr) : X86::ST_Fp32m;
break;
case MVT::f64:
- Opc = Subtarget->hasSSE2() ? X86::MOVSDmr : X86::ST_Fp64m;
+ Opc = X86ScalarSSEf64 ?
+ (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
break;
}
@@ -336,7 +339,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
U = C;
}
- if (const PointerType *Ty = dyn_cast<PointerType>(V->getType()))
+ if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
if (Ty->getAddressSpace() > 255)
// Fast instruction selection doesn't support the special
// address spaces.
@@ -399,7 +402,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
i != e; ++i, ++GTI) {
const Value *Op = *i;
- if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
const StructLayout *SL = TD.getStructLayout(STy);
Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
continue;
@@ -465,14 +468,23 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
// Handle constant address.
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
- // Can't handle alternate code models or TLS yet.
+ // Can't handle alternate code models yet.
if (TM.getCodeModel() != CodeModel::Small)
return false;
+ // Can't handle TLS yet.
if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
if (GVar->isThreadLocal())
return false;
+ // Can't handle TLS yet, part 2 (this is slightly crazy, but this is how
+ // it works...).
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ if (const GlobalVariable *GVar =
+ dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false)))
+ if (GVar->isThreadLocal())
+ return false;
+
// RIP-relative addresses can't have additional register operands, so if
// we've already folded stuff into the addressing mode, just force the
// global value into its own register, which we can use as the basereg.
@@ -658,6 +670,10 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
/// X86SelectStore - Select and emit code to implement store instructions.
bool X86FastISel::X86SelectStore(const Instruction *I) {
+ // Atomic stores need special handling.
+ if (cast<StoreInst>(I)->isAtomic())
+ return false;
+
MVT VT;
if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
return false;
@@ -780,6 +796,10 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
/// X86SelectLoad - Select and emit code to implement load instructions.
///
bool X86FastISel::X86SelectLoad(const Instruction *I) {
+ // Atomic loads need special handling.
+ if (cast<LoadInst>(I)->isAtomic())
+ return false;
+
MVT VT;
if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
return false;
@@ -797,14 +817,20 @@ bool X86FastISel::X86SelectLoad(const Instruction *I) {
}
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
+ bool HasAVX = Subtarget->hasAVX();
+ bool X86ScalarSSEf32 = HasAVX || Subtarget->hasSSE1();
+ bool X86ScalarSSEf64 = HasAVX || Subtarget->hasSSE2();
+
switch (VT.getSimpleVT().SimpleTy) {
default: return 0;
case MVT::i8: return X86::CMP8rr;
case MVT::i16: return X86::CMP16rr;
case MVT::i32: return X86::CMP32rr;
case MVT::i64: return X86::CMP64rr;
- case MVT::f32: return Subtarget->hasSSE1() ? X86::UCOMISSrr : 0;
- case MVT::f64: return Subtarget->hasSSE2() ? X86::UCOMISDrr : 0;
+ case MVT::f32:
+ return X86ScalarSSEf32 ? (HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr) : 0;
+ case MVT::f64:
+ return X86ScalarSSEf64 ? (HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr) : 0;
}
}
@@ -1207,7 +1233,7 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) {
bool X86FastISel::X86SelectFPExt(const Instruction *I) {
// fpext from float to double.
- if (Subtarget->hasSSE2() &&
+ if (X86ScalarSSEf64 &&
I->getType()->isDoubleTy()) {
const Value *V = I->getOperand(0);
if (V->getType()->isFloatTy()) {
@@ -1226,7 +1252,7 @@ bool X86FastISel::X86SelectFPExt(const Instruction *I) {
}
bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
- if (Subtarget->hasSSE2()) {
+ if (X86ScalarSSEf64) {
if (I->getType()->isFloatTy()) {
const Value *V = I->getOperand(0);
if (V->getType()->isDoubleTy()) {
@@ -1365,6 +1391,9 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
case Intrinsic::memset: {
const MemSetInst &MSI = cast<MemSetInst>(I);
+ if (MSI.isVolatile())
+ return false;
+
unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
if (!MSI.getLength()->getType()->isIntegerTy(SizeWidth))
return false;
@@ -1411,7 +1440,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
// Replace "add with overflow" intrinsics with an "add" instruction followed
// by a seto/setc instruction.
const Function *Callee = I.getCalledFunction();
- const Type *RetTy =
+ Type *RetTy =
cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
MVT VT;
@@ -1484,8 +1513,8 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
return false;
- const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
- const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ FunctionType *FTy = cast<FunctionType>(PT->getElementType());
bool isVarArg = FTy->isVarArg();
// Don't know how to handle Win64 varargs yet. Nothing special needed for
@@ -1547,8 +1576,8 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
Flags.setZExt();
if (CS.paramHasAttr(AttrInd, Attribute::ByVal)) {
- const PointerType *Ty = cast<PointerType>(ArgVal->getType());
- const Type *ElementTy = Ty->getElementType();
+ PointerType *Ty = cast<PointerType>(ArgVal->getType());
+ Type *ElementTy = Ty->getElementType();
unsigned FrameSize = TD.getTypeAllocSize(ElementTy);
unsigned FrameAlign = CS.getParamAlignment(AttrInd);
if (!FrameAlign)
@@ -1600,7 +1629,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
if (ArgReg == 0) return false;
- const Type *ArgTy = ArgVal->getType();
+ Type *ArgTy = ArgVal->getType();
MVT ArgVT;
if (!isTypeLegal(ArgTy, ArgVT))
return false;
@@ -1709,7 +1738,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
assert(Res && "memcpy length already checked!"); (void)Res;
} else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
// If this is a really simple value, emit this with the Value* version
- //of X86FastEmitStore. If it isn't simple, we don't want to do this,
+ // of X86FastEmitStore. If it isn't simple, we don't want to do this,
// as it can cause us to reevaluate the argument.
X86FastEmitStore(ArgVT, ArgVal, AM);
} else {
@@ -1965,8 +1994,8 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
RC = X86::GR64RegisterClass;
break;
case MVT::f32:
- if (Subtarget->hasSSE1()) {
- Opc = X86::MOVSSrm;
+ if (X86ScalarSSEf32) {
+ Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
RC = X86::FR32RegisterClass;
} else {
Opc = X86::LD_Fp32m;
@@ -1974,8 +2003,8 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
}
break;
case MVT::f64:
- if (Subtarget->hasSSE2()) {
- Opc = X86::MOVSDrm;
+ if (X86ScalarSSEf64) {
+ Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
RC = X86::FR64RegisterClass;
} else {
Opc = X86::LD_Fp64m;
@@ -2070,8 +2099,8 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
switch (VT.SimpleTy) {
default: return false;
case MVT::f32:
- if (Subtarget->hasSSE1()) {
- Opc = X86::FsFLD0SS;
+ if (X86ScalarSSEf32) {
+ Opc = Subtarget->hasAVX() ? X86::VFsFLD0SS : X86::FsFLD0SS;
RC = X86::FR32RegisterClass;
} else {
Opc = X86::LD_Fp032;
@@ -2079,8 +2108,8 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
}
break;
case MVT::f64:
- if (Subtarget->hasSSE2()) {
- Opc = X86::FsFLD0SD;
+ if (X86ScalarSSEf64) {
+ Opc = Subtarget->hasAVX() ? X86::VFsFLD0SD : X86::FsFLD0SD;
RC = X86::FR64RegisterClass;
} else {
Opc = X86::LD_Fp064;
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 6eed6abd..e3461c8 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -260,6 +260,21 @@ namespace {
BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
}
+ /// duplicatePendingSTBeforeKill - The instruction at I is about to kill
+ /// RegNo. If any PendingST registers still need the RegNo value, duplicate
+ /// them to new scratch registers.
+ void duplicatePendingSTBeforeKill(unsigned RegNo, MachineInstr *I) {
+ for (unsigned i = 0; i != NumPendingSTs; ++i) {
+ if (PendingST[i] != RegNo)
+ continue;
+ unsigned SR = getScratchReg();
+ DEBUG(dbgs() << "Duplicating pending ST" << i
+ << " in FP" << RegNo << " to FP" << SR << '\n');
+ duplicateToTop(RegNo, SR, I);
+ PendingST[i] = SR;
+ }
+ }
+
/// popStackAfter - Pop the current value off of the top of the FP stack
/// after the specified instruction.
void popStackAfter(MachineBasicBlock::iterator &I);
@@ -406,6 +421,10 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
if (MI->isCopy() && isFPCopy(MI))
FPInstClass = X86II::SpecialFP;
+ if (MI->isImplicitDef() &&
+ X86::RFP80RegClass.contains(MI->getOperand(0).getReg()))
+ FPInstClass = X86II::SpecialFP;
+
if (FPInstClass == X86II::NotFP)
continue; // Efficiently ignore non-fp insts!
@@ -461,6 +480,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
}
dumpStack();
);
+ (void)PrevMI;
Changed = true;
}
@@ -969,6 +989,9 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
unsigned Reg = getFPReg(MI->getOperand(NumOps-1));
bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
+ if (KillsSrc)
+ duplicatePendingSTBeforeKill(Reg, I);
+
// FISTP64m is strange because there isn't a non-popping versions.
// If we have one _and_ we don't want to pop the operand, duplicate the value
// on the stack instead of moving it. This ensure that popping the value is
@@ -1032,6 +1055,7 @@ void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
if (KillsSrc) {
+ duplicatePendingSTBeforeKill(Reg, I);
// If this is the last use of the source register, just make sure it's on
// the top of the stack.
moveToTop(Reg, I);
@@ -1318,6 +1342,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
// When the source is killed, allocate a scratch FP register.
if (KillsSrc) {
+ duplicatePendingSTBeforeKill(SrcFP, I);
unsigned Slot = getSlot(SrcFP);
unsigned SR = getScratchReg();
PendingST[DstST] = SR;
@@ -1369,6 +1394,15 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
break;
}
+ case TargetOpcode::IMPLICIT_DEF: {
+ // All FP registers must be explicitly defined, so load a 0 instead.
+ unsigned Reg = MI->getOperand(0).getReg() - X86::FP0;
+ DEBUG(dbgs() << "Emitting LD_F0 for implicit FP" << Reg << '\n');
+ BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::LD_F0));
+ pushReg(Reg);
+ break;
+ }
+
case X86::FpPOP_RETVAL: {
// The FpPOP_RETVAL instruction is used after calls that return a value on
// the floating point stack. We cannot model this with ST defs since CALL
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index ed45a9a..d54f4ae 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -15,6 +15,7 @@
#include "X86InstrBuilder.h"
#include "X86InstrInfo.h"
#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -91,12 +92,12 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
return 0;
static const unsigned CallerSavedRegs32Bit[] = {
- X86::EAX, X86::EDX, X86::ECX
+ X86::EAX, X86::EDX, X86::ECX, 0
};
static const unsigned CallerSavedRegs64Bit[] = {
X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,
- X86::R8, X86::R9, X86::R10, X86::R11
+ X86::R8, X86::R9, X86::R10, X86::R11, 0
};
unsigned Opc = MBBI->getOpcode();
@@ -283,8 +284,8 @@ static bool isEAXLiveIn(MachineFunction &MF) {
}
void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
- MCSymbol *Label,
- unsigned FramePtr) const {
+ MCSymbol *Label,
+ unsigned FramePtr) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineModuleInfo &MMI = MF.getMMI();
@@ -346,6 +347,247 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
}
}
+/// getCompactUnwindRegNum - Get the compact unwind number for a given
+/// register. The number corresponds to the enum lists in
+/// compact_unwind_encoding.h.
+static int getCompactUnwindRegNum(const unsigned *CURegs, unsigned Reg) {
+ int Idx = 1;
+ for (; *CURegs; ++CURegs, ++Idx)
+ if (*CURegs == Reg)
+ return Idx;
+
+ return -1;
+}
+
+/// encodeCompactUnwindRegistersWithoutFrame - Create the permutation encoding
+/// used with frameless stacks. It is passed the number of registers to be saved
+/// and an array of the registers saved.
+static uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[6],
+ unsigned RegCount,
+ bool Is64Bit) {
+ // The saved registers are numbered from 1 to 6. In order to encode the order
+ // in which they were saved, we re-number them according to their place in the
+ // register order. The re-numbering is relative to the last re-numbered
+ // register. E.g., if we have registers {6, 2, 4, 5} saved in that order:
+ //
+ // Orig Re-Num
+ // ---- ------
+ // 6 6
+ // 2 2
+ // 4 3
+ // 5 3
+ //
+ static const unsigned CU32BitRegs[] = {
+ X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
+ };
+ static const unsigned CU64BitRegs[] = {
+ X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
+ };
+ const unsigned *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs);
+
+ uint32_t RenumRegs[6];
+ for (unsigned i = 6 - RegCount; i < 6; ++i) {
+ int CUReg = getCompactUnwindRegNum(CURegs, SavedRegs[i]);
+ if (CUReg == -1) return ~0U;
+ SavedRegs[i] = CUReg;
+
+ unsigned Countless = 0;
+ for (unsigned j = 6 - RegCount; j < i; ++j)
+ if (SavedRegs[j] < SavedRegs[i])
+ ++Countless;
+
+ RenumRegs[i] = SavedRegs[i] - Countless - 1;
+ }
+
+ // Take the renumbered values and encode them into a 10-bit number.
+ uint32_t permutationEncoding = 0;
+ switch (RegCount) {
+ case 6:
+ permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
+ + 6 * RenumRegs[2] + 2 * RenumRegs[3]
+ + RenumRegs[4];
+ break;
+ case 5:
+ permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
+ + 6 * RenumRegs[3] + 2 * RenumRegs[4]
+ + RenumRegs[5];
+ break;
+ case 4:
+ permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3]
+ + 3 * RenumRegs[4] + RenumRegs[5];
+ break;
+ case 3:
+ permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4]
+ + RenumRegs[5];
+ break;
+ case 2:
+ permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5];
+ break;
+ case 1:
+ permutationEncoding |= RenumRegs[5];
+ break;
+ }
+
+ assert((permutationEncoding & 0x3FF) == permutationEncoding &&
+ "Invalid compact register encoding!");
+ return permutationEncoding;
+}
+
+/// encodeCompactUnwindRegistersWithFrame - Return the registers encoded for a
+/// compact encoding with a frame pointer.
+static uint32_t encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[6],
+ bool Is64Bit) {
+ static const unsigned CU32BitRegs[] = {
+ X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
+ };
+ static const unsigned CU64BitRegs[] = {
+ X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
+ };
+ const unsigned *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs);
+
+ // Encode the registers in the order they were saved, 3-bits per register. The
+ // registers are numbered from 1 to 6.
+ uint32_t RegEnc = 0;
+ for (int I = 5; I >= 0; --I) {
+ unsigned Reg = SavedRegs[I];
+ if (Reg == 0) break;
+ int CURegNum = getCompactUnwindRegNum(CURegs, Reg);
+ if (CURegNum == -1)
+ return ~0U;
+ RegEnc |= (CURegNum & 0x7) << (5 - I);
+ }
+
+ assert((RegEnc & 0x7FFF) == RegEnc && "Invalid compact register encoding!");
+ return RegEnc;
+}
+
+uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
+ const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned StackPtr = RegInfo->getStackRegister();
+
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+
+ bool Is64Bit = STI.is64Bit();
+ bool HasFP = hasFP(MF);
+
+ unsigned SavedRegs[6] = { 0, 0, 0, 0, 0, 0 };
+ int SavedRegIdx = 6;
+
+ unsigned OffsetSize = (Is64Bit ? 8 : 4);
+
+ unsigned PushInstr = (Is64Bit ? X86::PUSH64r : X86::PUSH32r);
+ unsigned PushInstrSize = 1;
+ unsigned MoveInstr = (Is64Bit ? X86::MOV64rr : X86::MOV32rr);
+ unsigned MoveInstrSize = (Is64Bit ? 3 : 2);
+ unsigned SubtractInstr = getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta);
+ unsigned SubtractInstrIdx = (Is64Bit ? 3 : 2);
+
+ unsigned StackDivide = (Is64Bit ? 8 : 4);
+
+ unsigned InstrOffset = 0;
+ unsigned CFAOffset = 0;
+ unsigned StackAdjust = 0;
+
+ MachineBasicBlock &MBB = MF.front(); // Prologue is in entry BB.
+ bool ExpectEnd = false;
+ for (MachineBasicBlock::iterator
+ MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE; ++MBBI) {
+ MachineInstr &MI = *MBBI;
+ unsigned Opc = MI.getOpcode();
+ if (Opc == X86::PROLOG_LABEL) continue;
+ if (!MI.getFlag(MachineInstr::FrameSetup)) break;
+
+ // We don't exect any more prolog instructions.
+ if (ExpectEnd) return 0;
+
+ if (Opc == PushInstr) {
+ // If there are too many saved registers, we cannot use compact encoding.
+ if (--SavedRegIdx < 0) return 0;
+
+ SavedRegs[SavedRegIdx] = MI.getOperand(0).getReg();
+ CFAOffset += OffsetSize;
+ InstrOffset += PushInstrSize;
+ } else if (Opc == MoveInstr) {
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ unsigned DstReg = MI.getOperand(0).getReg();
+
+ if (DstReg != FramePtr || SrcReg != StackPtr)
+ return 0;
+
+ CFAOffset = 0;
+ memset(SavedRegs, 0, sizeof(SavedRegs));
+ InstrOffset += MoveInstrSize;
+ } else if (Opc == SubtractInstr) {
+ if (StackAdjust)
+ // We all ready have a stack pointer adjustment.
+ return 0;
+
+ if (!MI.getOperand(0).isReg() ||
+ MI.getOperand(0).getReg() != MI.getOperand(1).getReg() ||
+ MI.getOperand(0).getReg() != StackPtr || !MI.getOperand(2).isImm())
+ // We need this to be a stack adjustment pointer. Something like:
+ //
+ // %RSP<def> = SUB64ri8 %RSP, 48
+ return 0;
+
+ StackAdjust = MI.getOperand(2).getImm() / StackDivide;
+ SubtractInstrIdx += InstrOffset;
+ ExpectEnd = true;
+ }
+ }
+
+ // Encode that we are using EBP/RBP as the frame pointer.
+ uint32_t CompactUnwindEncoding = 0;
+ CFAOffset /= StackDivide;
+ if (HasFP) {
+ if ((CFAOffset & 0xFF) != CFAOffset)
+ // Offset was too big for compact encoding.
+ return 0;
+
+ // Get the encoding of the saved registers when we have a frame pointer.
+ uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(SavedRegs, Is64Bit);
+ if (RegEnc == ~0U)
+ return 0;
+
+ CompactUnwindEncoding |= 0x01000000;
+ CompactUnwindEncoding |= (CFAOffset & 0xFF) << 16;
+ CompactUnwindEncoding |= RegEnc & 0x7FFF;
+ } else {
+ unsigned FullOffset = CFAOffset + StackAdjust;
+ if ((FullOffset & 0xFF) == FullOffset) {
+ // Frameless stack.
+ CompactUnwindEncoding |= 0x02000000;
+ CompactUnwindEncoding |= (FullOffset & 0xFF) << 16;
+ } else {
+ if ((CFAOffset & 0x7) != CFAOffset)
+ // The extra stack adjustments are too big for us to handle.
+ return 0;
+
+ // Frameless stack with an offset too large for us to encode compactly.
+ CompactUnwindEncoding |= 0x03000000;
+
+ // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
+ // instruction.
+ CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
+
+ // Encode any extra stack stack changes (done via push instructions).
+ CompactUnwindEncoding |= (CFAOffset & 0x7) << 13;
+ }
+
+ // Get the encoding of the saved registers when we don't have a frame
+ // pointer.
+ uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegs,
+ 6 - SavedRegIdx,
+ Is64Bit);
+ if (RegEnc == ~0U) return 0;
+ CompactUnwindEncoding |= RegEnc & 0x3FF;
+ }
+
+ return CompactUnwindEncoding;
+}
+
/// emitPrologue - Push callee-saved registers onto the stack, which
/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
/// space for local variables. Also emit labels used by the exception handler to
@@ -370,7 +612,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned SlotSize = RegInfo->getSlotSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
unsigned StackPtr = RegInfo->getStackRegister();
-
DebugLoc DL;
// If we're forcing a stack realignment we can't rely on just the frame
@@ -398,7 +639,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
!RegInfo->needsStackRealignment(MF) &&
!MFI->hasVarSizedObjects() && // No dynamic alloca.
!MFI->adjustsStack() && // No calls.
- !IsWin64) { // Win64 has no Red Zone
+ !IsWin64 && // Win64 has no Red Zone
+ !EnableSegmentedStacks) { // Regular stack
uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
if (HasFP) MinSize += SlotSize;
StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
@@ -459,7 +701,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
if (needsFrameMoves) {
// Mark the place where EBP/RBP was saved.
MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL))
+ .addSym(FrameLabel);
// Define the current CFA rule to use the provided offset.
if (StackSize) {
@@ -478,7 +721,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
}
- // Update EBP with the new base value...
+ // Update EBP with the new base value.
BuildMI(MBB, MBBI, DL,
TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
.addReg(StackPtr)
@@ -487,7 +730,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
if (needsFrameMoves) {
// Mark effective beginning of when frame pointer becomes valid.
MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL))
+ .addSym(FrameLabel);
// Define the current CFA to use the EBP/RBP register.
MachineLocation FPDst(FramePtr);
@@ -504,8 +748,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
if (RegInfo->needsStackRealignment(MF)) {
MachineInstr *MI =
BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri),
- StackPtr).addReg(StackPtr).addImm(-MaxAlign);
+ TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr)
+ .addReg(StackPtr)
+ .addImm(-MaxAlign)
+ .setMIFlag(MachineInstr::FrameSetup);
// The EFLAGS implicit def is dead.
MI->getOperand(3).setIsDead();
@@ -522,6 +768,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
(MBBI->getOpcode() == X86::PUSH32r ||
MBBI->getOpcode() == X86::PUSH64r)) {
PushedRegs = true;
+ MBBI->setFlag(MachineInstr::FrameSetup);
++MBBI;
if (!HasFP && needsFrameMoves) {
@@ -530,8 +777,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
// Define the current CFA rule to use the provided offset.
- unsigned Ptr = StackSize ?
- MachineLocation::VirtualFP : StackPtr;
+ unsigned Ptr = StackSize ? MachineLocation::VirtualFP : StackPtr;
MachineLocation SPDst(Ptr);
MachineLocation SPSrc(Ptr, StackOffset);
Moves.push_back(MachineMove(Label, SPDst, SPSrc));
@@ -586,26 +832,30 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// Save EAX
BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
- .addReg(X86::EAX, RegState::Kill);
+ .addReg(X86::EAX, RegState::Kill)
+ .setMIFlag(MachineInstr::FrameSetup);
}
if (Is64Bit) {
// Handle the 64-bit Windows ABI case where we need to call __chkstk.
// Function prologue is responsible for adjusting the stack pointer.
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX)
- .addImm(NumBytes);
+ .addImm(NumBytes)
+ .setMIFlag(MachineInstr::FrameSetup);
} else {
// Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
// We'll also use 4 already allocated bytes for EAX.
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
- .addImm(isEAXAlive ? NumBytes - 4 : NumBytes);
+ .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
+ .setMIFlag(MachineInstr::FrameSetup);
}
BuildMI(MBB, MBBI, DL,
TII.get(Is64Bit ? X86::W64ALLOCA : X86::CALLpcrel32))
.addExternalSymbol(StackProbeSymbol)
.addReg(StackPtr, RegState::Define | RegState::Implicit)
- .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit)
+ .setMIFlag(MachineInstr::FrameSetup);
// MSVC x64's __chkstk needs to adjust %rsp.
// FIXME: %rax preserves the offset and should be available.
@@ -618,6 +868,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
X86::EAX),
StackPtr, false, NumBytes - 4);
+ MI->setFlag(MachineInstr::FrameSetup);
MBB.insert(MBBI, MI);
}
} else if (NumBytes)
@@ -627,7 +878,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) {
// Mark end of stack pointer adjustment.
MCSymbol *Label = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL))
+ .addSym(Label);
if (!HasFP && NumBytes) {
// Define the current CFA rule to use the provided offset.
@@ -647,6 +899,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
if (PushedRegs)
emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr);
}
+
+ // Darwin 10.7 and greater has support for compact unwind encoding.
+ if (STI.getTargetTriple().isMacOSX() &&
+ !STI.getTargetTriple().isMacOSXVersionLT(10, 7))
+ MMI.setCompactUnwindEncoding(getCompactUnwindEncoding(MF));
}
void X86FrameLowering::emitEpilogue(MachineFunction &MF,
@@ -844,23 +1101,6 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
}
}
-void
-X86FrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const {
- // Calculate amount of bytes used for return address storing
- int stackGrowth = (STI.is64Bit() ? -8 : -4);
- const X86RegisterInfo *RI = TM.getRegisterInfo();
-
- // Initial state of the frame pointer is esp+stackGrowth.
- MachineLocation Dst(MachineLocation::VirtualFP);
- MachineLocation Src(RI->getStackRegister(), stackGrowth);
- Moves.push_back(MachineMove(0, Dst, Src));
-
- // Add return address to move list
- MachineLocation CSDst(RI->getStackRegister(), stackGrowth);
- MachineLocation CSSrc(RI->getRARegister());
- Moves.push_back(MachineMove(0, CSDst, CSSrc));
-}
-
int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const {
const X86RegisterInfo *RI =
static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
@@ -873,9 +1113,7 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) con
// Skip the saved EBP.
Offset += RI->getSlotSize();
} else {
- unsigned Align = MFI->getObjectAlignment(FI);
- assert((-(Offset + StackSize)) % Align == 0);
- Align = 0;
+ assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
return Offset + StackSize;
}
// FIXME: Support tail calls
@@ -1027,184 +1265,183 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
true);
assert(FrameIdx == MFI->getObjectIndexBegin() &&
"Slot for EBP register must be last in order to be found!");
- FrameIdx = 0;
+ (void)FrameIdx;
}
}
-/// permuteEncode - Create the permutation encoding used with frameless
-/// stacks. It is passed the number of registers to be saved and an array of the
-/// registers saved.
-static uint32_t permuteEncode(unsigned SavedCount, unsigned Registers[6]) {
- // The saved registers are numbered from 1 to 6. In order to encode the order
- // in which they were saved, we re-number them according to their place in the
- // register order. The re-numbering is relative to the last re-numbered
- // register. E.g., if we have registers {6, 2, 4, 5} saved in that order:
- //
- // Orig Re-Num
- // ---- ------
- // 6 6
- // 2 2
- // 4 3
- // 5 3
- //
- bool Used[7] = { false, false, false, false, false, false, false };
- uint32_t RenumRegs[6];
- for (unsigned I = 0; I < SavedCount; ++I) {
- uint32_t Renum = 0;
- for (unsigned U = 1; U < 7; ++U) {
- if (U == Registers[I])
- break;
- if (!Used[U])
- ++Renum;
- }
-
- Used[Registers[I]] = true;
- RenumRegs[I] = Renum;
+static bool
+HasNestArgument(const MachineFunction *MF) {
+ const Function *F = MF->getFunction();
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; I++) {
+ if (I->hasNestAttr())
+ return true;
}
+ return false;
+}
- // Take the renumbered values and encode them into a 10-bit number.
- uint32_t permutationEncoding = 0;
- switch (SavedCount) {
- case 6:
- permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
- + 6 * RenumRegs[2] + 2 * RenumRegs[3]
- + RenumRegs[4];
- break;
- case 5:
- permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
- + 6 * RenumRegs[2] + 2 * RenumRegs[3]
- + RenumRegs[4];
- break;
- case 4:
- permutationEncoding |= 60 * RenumRegs[0] + 12 * RenumRegs[1]
- + 3 * RenumRegs[2] + RenumRegs[3];
- break;
- case 3:
- permutationEncoding |= 20 * RenumRegs[0] + 4 * RenumRegs[1]
- + RenumRegs[2];
- break;
- case 2:
- permutationEncoding |= 5 * RenumRegs[0] + RenumRegs[1];
- break;
- case 1:
- permutationEncoding |= RenumRegs[0];
- break;
+static unsigned
+GetScratchRegister(bool Is64Bit, const MachineFunction &MF) {
+ if (Is64Bit) {
+ return X86::R11;
+ } else {
+ CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
+ bool IsNested = HasNestArgument(&MF);
+
+ if (CallingConvention == CallingConv::X86_FastCall) {
+ if (IsNested) {
+ report_fatal_error("Segmented stacks does not support fastcall with "
+ "nested function.");
+ return -1;
+ } else {
+ return X86::EAX;
+ }
+ } else {
+ if (IsNested)
+ return X86::EDX;
+ else
+ return X86::ECX;
+ }
}
-
- return permutationEncoding;
}
-uint32_t X86FrameLowering::
-getCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs,
- int DataAlignmentFactor, bool IsEH) const {
- uint32_t Encoding = 0;
- int CFAOffset = 0;
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
- unsigned SavedRegs[6] = { 0, 0, 0, 0, 0, 0 };
- unsigned SavedRegIdx = 0;
- int FramePointerReg = -1;
+void
+X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
+ MachineBasicBlock &prologueMBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const X86InstrInfo &TII = *TM.getInstrInfo();
+ uint64_t StackSize;
+ bool Is64Bit = STI.is64Bit();
+ unsigned TlsReg, TlsOffset;
+ DebugLoc DL;
+ const X86Subtarget *ST = &MF.getTarget().getSubtarget<X86Subtarget>();
- for (ArrayRef<MCCFIInstruction>::const_iterator
- I = Instrs.begin(), E = Instrs.end(); I != E; ++I) {
- const MCCFIInstruction &Inst = *I;
- MCSymbol *Label = Inst.getLabel();
+ unsigned ScratchReg = GetScratchRegister(Is64Bit, MF);
+ assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
+ "Scratch register is live-in");
- // Ignore invalid labels.
- if (Label && !Label->isDefined()) continue;
+ if (MF.getFunction()->isVarArg())
+ report_fatal_error("Segmented stacks do not support vararg functions.");
+ if (!ST->isTargetLinux())
+ report_fatal_error("Segmented stacks supported only on linux.");
- unsigned Operation = Inst.getOperation();
- if (Operation != MCCFIInstruction::Move &&
- Operation != MCCFIInstruction::RelMove)
- // FIXME: We can't handle this frame just yet.
- return 0;
-
- const MachineLocation &Dst = Inst.getDestination();
- const MachineLocation &Src = Inst.getSource();
- const bool IsRelative = (Operation == MCCFIInstruction::RelMove);
-
- if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
- if (Src.getReg() != MachineLocation::VirtualFP) {
- // DW_CFA_def_cfa
- assert(FramePointerReg == -1 &&"Defining more than one frame pointer?");
- if (TRI->getLLVMRegNum(Src.getReg(), IsEH) != X86::EBP &&
- TRI->getLLVMRegNum(Src.getReg(), IsEH) != X86::RBP)
- // The frame pointer isn't EBP/RBP. Cannot make unwind information
- // compact.
- return 0;
- FramePointerReg = TRI->getCompactUnwindRegNum(Src.getReg(), IsEH);
- } // else DW_CFA_def_cfa_offset
-
- if (IsRelative)
- CFAOffset += Src.getOffset();
- else
- CFAOffset -= Src.getOffset();
+ MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ bool IsNested = false;
- continue;
- }
+ // We need to know if the function has a nest argument only in 64 bit mode.
+ if (Is64Bit)
+ IsNested = HasNestArgument(&MF);
- if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
- // DW_CFA_def_cfa_register
- assert(FramePointerReg == -1 && "Defining more than one frame pointer?");
+ // The MOV R10, RAX needs to be in a different block, since the RET we emit in
+ // allocMBB needs to be last (terminating) instruction.
+ MachineBasicBlock *restoreR10MBB = NULL;
+ if (IsNested)
+ restoreR10MBB = MF.CreateMachineBasicBlock();
- if (TRI->getLLVMRegNum(Dst.getReg(), IsEH) != X86::EBP &&
- TRI->getLLVMRegNum(Dst.getReg(), IsEH) != X86::RBP)
- // The frame pointer isn't EBP/RBP. Cannot make unwind information
- // compact.
- return 0;
+ for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(),
+ e = prologueMBB.livein_end(); i != e; i++) {
+ allocMBB->addLiveIn(*i);
+ checkMBB->addLiveIn(*i);
- FramePointerReg = TRI->getCompactUnwindRegNum(Dst.getReg(), IsEH);
- if (SavedRegIdx != 1 || SavedRegs[0] != unsigned(FramePointerReg))
- return 0;
+ if (IsNested)
+ restoreR10MBB->addLiveIn(*i);
+ }
- SavedRegs[0] = 0;
- SavedRegIdx = 0;
- continue;
- }
+ if (IsNested) {
+ allocMBB->addLiveIn(X86::R10);
+ restoreR10MBB->addLiveIn(X86::RAX);
+ }
- unsigned Reg = Src.getReg();
- int Offset = Dst.getOffset();
- if (IsRelative)
- Offset -= CFAOffset;
- Offset /= DataAlignmentFactor;
+ if (IsNested)
+ MF.push_front(restoreR10MBB);
+ MF.push_front(allocMBB);
+ MF.push_front(checkMBB);
+
+ // Eventually StackSize will be calculated by a link-time pass; which will
+ // also decide whether checking code needs to be injected into this particular
+ // prologue.
+ StackSize = MFI->getStackSize();
+
+ // Read the limit off the current stacklet off the stack_guard location.
+ if (Is64Bit) {
+ TlsReg = X86::FS;
+ TlsOffset = 0x70;
+
+ BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP)
+ .addImm(0).addReg(0).addImm(-StackSize).addReg(0);
+ BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg)
+ .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
+ } else {
+ TlsReg = X86::GS;
+ TlsOffset = 0x30;
- if (Offset < 0) {
- // FIXME: Handle?
- // DW_CFA_offset_extended_sf
- return 0;
- } else if (Reg < 64) {
- // DW_CFA_offset + Reg
- if (SavedRegIdx >= 6) return 0;
- int CURegNum = TRI->getCompactUnwindRegNum(Reg, IsEH);
- if (CURegNum == -1) return 0;
- SavedRegs[SavedRegIdx++] = CURegNum;
- } else {
- // FIXME: Handle?
- // DW_CFA_offset_extended
- return 0;
- }
+ BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
+ .addImm(0).addReg(0).addImm(-StackSize).addReg(0);
+ BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
+ .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
}
- // Bail if there are too many registers to encode.
- if (SavedRegIdx > 6) return 0;
+ // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
+ // It jumps to normal execution of the function body.
+ BuildMI(checkMBB, DL, TII.get(X86::JG_4)).addMBB(&prologueMBB);
+
+ // On 32 bit we first push the arguments size and then the frame size. On 64
+ // bit, we pass the stack frame size in r10 and the argument size in r11.
+ if (Is64Bit) {
+ // Functions with nested arguments use R10, so it needs to be saved across
+ // the call to _morestack
+
+ if (IsNested)
+ BuildMI(allocMBB, DL, TII.get(X86::MOV64rr), X86::RAX).addReg(X86::R10);
+
+ BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R10)
+ .addImm(StackSize);
+ BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R11)
+ .addImm(X86FI->getArgumentStackSize());
+ MF.getRegInfo().setPhysRegUsed(X86::R10);
+ MF.getRegInfo().setPhysRegUsed(X86::R11);
+ } else {
+ // Since we'll call __morestack, stack alignment needs to be preserved.
+ BuildMI(allocMBB, DL, TII.get(X86::SUB32ri), X86::ESP).addReg(X86::ESP)
+ .addImm(8);
+ BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
+ .addImm(X86FI->getArgumentStackSize());
+ BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
+ .addImm(StackSize);
+ }
- // Check if the offset is too big.
- CFAOffset /= 4;
- if ((CFAOffset & 0xFF) != CFAOffset)
- return 0;
- Encoding |= (CFAOffset & 0xFF) << 16; // Size encoding.
-
- if (FramePointerReg != -1) {
- Encoding |= 0x01000000; // EBP/RBP Unwind Frame
- for (unsigned I = 0; I != SavedRegIdx; ++I) {
- unsigned Reg = SavedRegs[I];
- if (Reg == unsigned(FramePointerReg)) continue;
- Encoding |= (Reg & 0x7) << (I * 3); // Register encoding
- }
+ // __morestack is in libgcc
+ if (Is64Bit)
+ BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
+ .addExternalSymbol("__morestack");
+ else
+ BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
+ .addExternalSymbol("__morestack");
+
+ // __morestack only seems to remove 8 bytes off the stack. Add back the
+ // additional 8 bytes we added before pushing the arguments.
+ if (!Is64Bit)
+ BuildMI(allocMBB, DL, TII.get(X86::ADD32ri), X86::ESP).addReg(X86::ESP)
+ .addImm(8);
+ BuildMI(allocMBB, DL, TII.get(X86::RET));
+
+ if (IsNested)
+ BuildMI(restoreR10MBB, DL, TII.get(X86::MOV64rr), X86::R10)
+ .addReg(X86::RAX);
+
+ if (IsNested) {
+ allocMBB->addSuccessor(restoreR10MBB);
+ restoreR10MBB->addSuccessor(&prologueMBB);
} else {
- Encoding |= 0x02000000; // Frameless unwind with small stack
- Encoding |= (SavedRegIdx & 0x7) << 10;
- Encoding |= permuteEncode(SavedRegIdx, SavedRegs);
+ allocMBB->addSuccessor(&prologueMBB);
}
- return Encoding;
+ checkMBB->addSuccessor(allocMBB);
+ checkMBB->addSuccessor(&prologueMBB);
+
+#ifdef XDEBUG
+ MF.verify();
+#endif
}
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index 14c31ed..6f49064 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -41,6 +41,8 @@ public:
void emitPrologue(MachineFunction &MF) const;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ void adjustForSegmentedStacks(MachineFunction &MF) const;
+
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;
@@ -57,11 +59,8 @@ public:
bool hasFP(const MachineFunction &MF) const;
bool hasReservedCallFrame(const MachineFunction &MF) const;
- void getInitialFrameState(std::vector<MachineMove> &Moves) const;
int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
-
- uint32_t getCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs,
- int DataAlignmentFactor, bool IsEH) const;
+ uint32_t getCompactUnwindEncoding(MachineFunction &MF) const;
};
} // End llvm namespace
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 2b0f283..02b0ff2 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -474,10 +474,15 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
continue;
- // If the source and destination are SSE registers, then this is a legal
- // conversion that should not be lowered.
EVT SrcVT = N->getOperand(0).getValueType();
EVT DstVT = N->getValueType(0);
+
+ // If any of the sources are vectors, no fp stack involved.
+ if (SrcVT.isVector() || DstVT.isVector())
+ continue;
+
+ // If the source and destination are SSE registers, then this is a legal
+ // conversion that should not be lowered.
bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT);
bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT);
if (SrcIsSSE && DstIsSSE)
@@ -2168,9 +2173,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl,
MVT::i8, Reg);
- // Emit a testb. No special NOREX tricks are needed since there's
- // only one GPR operand!
- return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32,
+ // Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only
+ // target GR8_NOREX registers, so make sure the register class is
+ // forced.
+ return CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, MVT::i32,
Subreg, ShiftedImm);
}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 5096d9a..7c8ce17 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -51,6 +51,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
using namespace dwarf;
@@ -71,9 +72,6 @@ static SDValue Extract128BitVector(SDValue Vec,
SelectionDAG &DAG,
DebugLoc dl);
-static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG);
-
-
/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
/// sets things up to match to an AVX VEXTRACTF128 instruction or a
/// simple subregister reference. Idx is an index in the 128 bits we
@@ -85,14 +83,10 @@ static SDValue Extract128BitVector(SDValue Vec,
DebugLoc dl) {
EVT VT = Vec.getValueType();
assert(VT.getSizeInBits() == 256 && "Unexpected vector size!");
-
EVT ElVT = VT.getVectorElementType();
-
- int Factor = VT.getSizeInBits() / 128;
-
- EVT ResultVT = EVT::getVectorVT(*DAG.getContext(),
- ElVT,
- VT.getVectorNumElements() / Factor);
+ int Factor = VT.getSizeInBits()/128;
+ EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
+ VT.getVectorNumElements()/Factor);
// Extract from UNDEF is UNDEF.
if (Vec.getOpcode() == ISD::UNDEF)
@@ -111,7 +105,6 @@ static SDValue Extract128BitVector(SDValue Vec,
* ElemsPerChunk);
SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
-
SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
VecIdx);
@@ -136,21 +129,18 @@ static SDValue Insert128BitVector(SDValue Result,
assert(VT.getSizeInBits() == 128 && "Unexpected vector size!");
EVT ElVT = VT.getVectorElementType();
-
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
-
EVT ResultVT = Result.getValueType();
// Insert the relevant 128 bits.
- unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits();
+ unsigned ElemsPerChunk = 128/ElVT.getSizeInBits();
// This is the index of the first element of the 128-bit chunk
// we want.
- unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
+ unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/128)
* ElemsPerChunk);
SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
-
Result = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
VecIdx);
return Result;
@@ -159,34 +149,6 @@ static SDValue Insert128BitVector(SDValue Result,
return SDValue();
}
-/// Given two vectors, concat them.
-static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG) {
- DebugLoc dl = Lower.getDebugLoc();
-
- assert(Lower.getValueType() == Upper.getValueType() && "Mismatched vectors!");
-
- EVT VT = EVT::getVectorVT(*DAG.getContext(),
- Lower.getValueType().getVectorElementType(),
- Lower.getValueType().getVectorNumElements() * 2);
-
- // TODO: Generalize to arbitrary vector length (this assumes 256-bit vectors).
- assert(VT.getSizeInBits() == 256 && "Unsupported vector concat!");
-
- // Insert the upper subvector.
- SDValue Vec = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Upper,
- DAG.getConstant(
- // This is half the length of the result
- // vector. Start inserting the upper 128
- // bits here.
- Lower.getValueType().getVectorNumElements(),
- MVT::i32),
- DAG, dl);
-
- // Insert the lower subvector.
- Vec = Insert128BitVector(Vec, Lower, DAG.getConstant(0, MVT::i32), DAG, dl);
- return Vec;
-}
-
static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
bool is64Bit = Subtarget->is64Bit();
@@ -197,11 +159,8 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
return new TargetLoweringObjectFileMachO();
}
- if (Subtarget->isTargetELF()) {
- if (is64Bit)
- return new X8664_ELFTargetObjectFile(TM);
- return new X8632_ELFTargetObjectFile(TM);
- }
+ if (Subtarget->isTargetELF())
+ return new TargetLoweringObjectFileELF();
if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
return new TargetLoweringObjectFileCOFF();
llvm_unreachable("unknown subtarget type");
@@ -222,6 +181,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// X86 is weird, it always uses i8 for shift amounts and setcc results.
setBooleanContents(ZeroOrOneBooleanContent);
+ // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
// For 64-bit since we have so many registers use the ILP scheduler, for
// 32-bit code use the register pressure specific scheduling.
@@ -354,7 +315,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
} else if (!UseSoftFloat) {
- if (X86ScalarSSEf32 && !Subtarget->hasSSE3())
+ // Since AVX is a superset of SSE3, only check for SSE here.
+ if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
// Expand FP_TO_UINT into a select.
// FIXME: We would like to use a Custom expander here eventually to do
// the optimal thing for SSE vs. the default expansion in the legalizer.
@@ -417,15 +379,24 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FREM , MVT::f80 , Expand);
setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
- setOperationAction(ISD::CTTZ , MVT::i8 , Custom);
- setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
- setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
- setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
- setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
- setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
- if (Subtarget->is64Bit()) {
- setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
- setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
+ if (Subtarget->hasBMI()) {
+ setOperationAction(ISD::CTTZ , MVT::i8 , Promote);
+ } else {
+ setOperationAction(ISD::CTTZ , MVT::i8 , Custom);
+ setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
+ setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
+ }
+
+ if (Subtarget->hasLZCNT()) {
+ setOperationAction(ISD::CTLZ , MVT::i8 , Promote);
+ } else {
+ setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
}
if (Subtarget->hasPOPCNT()) {
@@ -491,8 +462,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
if (Subtarget->hasXMM())
setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
- // We may not have a libcall for MEMBARRIER so we should lower this.
setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
// On X86 and X86-64, atomic operations are lowered to locked instructions.
// Locked instructions, in turn, have implicit fence semantics (all memory
@@ -506,9 +477,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
MVT VT = IntVTs[i];
setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
+ setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
}
if (!Subtarget->is64Bit()) {
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
@@ -518,6 +491,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
}
+ if (Subtarget->hasCmpxchg16b()) {
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
+ }
+
// FIXME - use subtarget debug flags
if (!Subtarget->isTargetDarwin() &&
!Subtarget->isTargetELF() &&
@@ -539,7 +516,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
- setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
setOperationAction(ISD::TRAP, MVT::Other, Legal);
@@ -556,11 +534,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- setOperationAction(ISD::DYNAMIC_STACKALLOC,
- (Subtarget->is64Bit() ? MVT::i64 : MVT::i32),
- (Subtarget->isTargetCOFF()
- && !Subtarget->isTargetEnvMacho()
- ? Custom : Expand));
+
+ if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
+ MVT::i64 : MVT::i32, Custom);
+ else if (EnableSegmentedStacks)
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
+ MVT::i64 : MVT::i32, Custom);
+ else
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
+ MVT::i64 : MVT::i32, Expand);
if (!UseSoftFloat && X86ScalarSSEf64) {
// f32 and f64 use SSE.
@@ -739,7 +722,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::ROTL, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::ROTR, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::VSETCC, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SETCC, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FLOG, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FLOG2, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FLOG10, (MVT::SimpleValueType)VT, Expand);
@@ -754,6 +737,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SIGN_EXTEND, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::ZERO_EXTEND, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::ANY_EXTEND, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::VSELECT, (MVT::SimpleValueType)VT, Expand);
for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
setTruncStoreAction((MVT::SimpleValueType)VT,
@@ -816,7 +800,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
- setOperationAction(ISD::VSETCC, MVT::v4f32, Custom);
+ setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
}
if (!UseSoftFloat && Subtarget->hasXMMInt()) {
@@ -846,10 +830,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
- setOperationAction(ISD::VSETCC, MVT::v2f64, Custom);
- setOperationAction(ISD::VSETCC, MVT::v16i8, Custom);
- setOperationAction(ISD::VSETCC, MVT::v8i16, Custom);
- setOperationAction(ISD::VSETCC, MVT::v4i32, Custom);
+ setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
+ setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
+ setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
+ setOperationAction(ISD::SETCC, MVT::v4i32, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
@@ -925,7 +909,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
}
- if (Subtarget->hasSSE41()) {
+ if (Subtarget->hasSSE41() || Subtarget->hasAVX()) {
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
@@ -944,6 +928,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SHL, MVT::v4i32, Custom);
setOperationAction(ISD::SHL, MVT::v16i8, Custom);
+ setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v2i64, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
+
// i8 and i16 vectors are custom , because the source register and source
// source memory operand types are not the same width. f32 vectors are
// custom since the immediate controlling the insert encodes additional
@@ -964,10 +954,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
}
- if (Subtarget->hasSSE2()) {
+ if (Subtarget->hasXMMInt()) {
setOperationAction(ISD::SRL, MVT::v2i64, Custom);
setOperationAction(ISD::SRL, MVT::v4i32, Custom);
setOperationAction(ISD::SRL, MVT::v16i8, Custom);
+ setOperationAction(ISD::SRL, MVT::v8i16, Custom);
setOperationAction(ISD::SHL, MVT::v2i64, Custom);
setOperationAction(ISD::SHL, MVT::v4i32, Custom);
@@ -977,15 +968,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SRA, MVT::v8i16, Custom);
}
- if (Subtarget->hasSSE42())
- setOperationAction(ISD::VSETCC, MVT::v2i64, Custom);
+ if (Subtarget->hasSSE42() || Subtarget->hasAVX())
+ setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
if (!UseSoftFloat && Subtarget->hasAVX()) {
- addRegisterClass(MVT::v8f32, X86::VR256RegisterClass);
- addRegisterClass(MVT::v4f64, X86::VR256RegisterClass);
- addRegisterClass(MVT::v8i32, X86::VR256RegisterClass);
- addRegisterClass(MVT::v4i64, X86::VR256RegisterClass);
- addRegisterClass(MVT::v32i8, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v32i8, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v16i16, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v8i32, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v8f32, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v4i64, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v4f64, X86::VR256RegisterClass);
setOperationAction(ISD::LOAD, MVT::v8f32, Legal);
setOperationAction(ISD::LOAD, MVT::v4f64, Legal);
@@ -1005,6 +997,59 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
+ setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
+
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f64, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i64, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i8, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i16, Custom);
+
+ setOperationAction(ISD::SRL, MVT::v4i64, Custom);
+ setOperationAction(ISD::SRL, MVT::v8i32, Custom);
+ setOperationAction(ISD::SRL, MVT::v16i16, Custom);
+ setOperationAction(ISD::SRL, MVT::v32i8, Custom);
+
+ setOperationAction(ISD::SHL, MVT::v4i64, Custom);
+ setOperationAction(ISD::SHL, MVT::v8i32, Custom);
+ setOperationAction(ISD::SHL, MVT::v16i16, Custom);
+ setOperationAction(ISD::SHL, MVT::v32i8, Custom);
+
+ setOperationAction(ISD::SRA, MVT::v8i32, Custom);
+ setOperationAction(ISD::SRA, MVT::v16i16, Custom);
+
+ setOperationAction(ISD::SETCC, MVT::v32i8, Custom);
+ setOperationAction(ISD::SETCC, MVT::v16i16, Custom);
+ setOperationAction(ISD::SETCC, MVT::v8i32, Custom);
+ setOperationAction(ISD::SETCC, MVT::v4i64, Custom);
+
+ setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
+ setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
+ setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
+
+ setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v4i64, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v8i32, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v8f32, Legal);
+
+ setOperationAction(ISD::ADD, MVT::v4i64, Custom);
+ setOperationAction(ISD::ADD, MVT::v8i32, Custom);
+ setOperationAction(ISD::ADD, MVT::v16i16, Custom);
+ setOperationAction(ISD::ADD, MVT::v32i8, Custom);
+
+ setOperationAction(ISD::SUB, MVT::v4i64, Custom);
+ setOperationAction(ISD::SUB, MVT::v8i32, Custom);
+ setOperationAction(ISD::SUB, MVT::v16i16, Custom);
+ setOperationAction(ISD::SUB, MVT::v32i8, Custom);
+
+ setOperationAction(ISD::MUL, MVT::v4i64, Custom);
+ setOperationAction(ISD::MUL, MVT::v8i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v16i16, Custom);
+ // Don't lower v32i8 because there is no 128-bit byte mul
+
// Custom lower several nodes for 256-bit types.
for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
@@ -1093,6 +1138,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::BUILD_VECTOR);
+ setTargetDAGCombine(ISD::VSELECT);
setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRA);
@@ -1100,7 +1146,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::FADD);
+ setTargetDAGCombine(ISD::FSUB);
setTargetDAGCombine(ISD::SUB);
+ setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::SINT_TO_FP);
@@ -1124,25 +1173,26 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
-MVT::SimpleValueType X86TargetLowering::getSetCCResultType(EVT VT) const {
- return MVT::i8;
+EVT X86TargetLowering::getSetCCResultType(EVT VT) const {
+ if (!VT.isVector()) return MVT::i8;
+ return VT.changeVectorElementTypeToInteger();
}
/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
/// the desired ByVal argument alignment.
-static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) {
+static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
if (MaxAlign == 16)
return;
- if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
if (VTy->getBitWidth() == 128)
MaxAlign = 16;
- } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
unsigned EltAlign = 0;
getMaxByValAlign(ATy->getElementType(), EltAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
- } else if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
unsigned EltAlign = 0;
getMaxByValAlign(STy->getElementType(i), EltAlign);
@@ -1159,7 +1209,7 @@ static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) {
/// function arguments in the caller parameter area. For X86, aggregates
/// that contain SSE vectors are placed at 16-byte boundaries while the rest
/// are at 4-byte boundaries.
-unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
+unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
if (Subtarget->is64Bit()) {
// Max of 8 and alignment of type.
unsigned TyAlign = TD->getABITypeAlignment(Ty);
@@ -1203,9 +1253,12 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
((DstAlign == 0 || DstAlign >= 16) &&
(SrcAlign == 0 || SrcAlign >= 16))) &&
Subtarget->getStackAlignment() >= 16) {
- if (Subtarget->hasSSE2())
+ if (Subtarget->hasAVX() &&
+ Subtarget->getStackAlignment() >= 32)
+ return MVT::v8f32;
+ if (Subtarget->hasXMMInt())
return MVT::v4i32;
- if (Subtarget->hasSSE1())
+ if (Subtarget->hasXMM())
return MVT::v4f32;
} else if (!MemcpyStrSrc && Size >= 8 &&
!Subtarget->is64Bit() &&
@@ -1408,7 +1461,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
ValToCopy);
// If we don't have SSE2 available, convert to v4f32 so the generated
// register is legal.
- if (!Subtarget->hasSSE2())
+ if (!Subtarget->hasXMMInt())
ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy);
}
}
@@ -1700,6 +1753,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
// places.
assert(VA.getValNo() != LastVal &&
"Don't support value assigned to multiple locs yet");
+ (void)LastVal;
LastVal = VA.getValNo();
if (VA.isRegLoc()) {
@@ -1917,6 +1971,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
}
+ FuncInfo->setArgumentStackSize(StackSize);
+
return Chain;
}
@@ -2744,8 +2800,6 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::MOVSD:
case X86ISD::UNPCKLPS:
case X86ISD::UNPCKLPD:
- case X86ISD::VUNPCKLPS:
- case X86ISD::VUNPCKLPD:
case X86ISD::VUNPCKLPSY:
case X86ISD::VUNPCKLPDY:
case X86ISD::PUNPCKLWD:
@@ -2754,10 +2808,17 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::PUNPCKLQDQ:
case X86ISD::UNPCKHPS:
case X86ISD::UNPCKHPD:
+ case X86ISD::VUNPCKHPSY:
+ case X86ISD::VUNPCKHPDY:
case X86ISD::PUNPCKHWD:
case X86ISD::PUNPCKHBW:
case X86ISD::PUNPCKHDQ:
case X86ISD::PUNPCKHQDQ:
+ case X86ISD::VPERMILPS:
+ case X86ISD::VPERMILPSY:
+ case X86ISD::VPERMILPD:
+ case X86ISD::VPERMILPDY:
+ case X86ISD::VPERM2F128:
return true;
}
return false;
@@ -2783,6 +2844,10 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::PSHUFD:
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
+ case X86ISD::VPERMILPS:
+ case X86ISD::VPERMILPSY:
+ case X86ISD::VPERMILPD:
+ case X86ISD::VPERMILPDY:
return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
}
@@ -2796,6 +2861,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::PALIGN:
case X86ISD::SHUFPD:
case X86ISD::SHUFPS:
+ case X86ISD::VPERM2F128:
return DAG.getNode(Opc, dl, VT, V1, V2,
DAG.getConstant(TargetMask, MVT::i8));
}
@@ -2815,8 +2881,6 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::MOVSD:
case X86ISD::UNPCKLPS:
case X86ISD::UNPCKLPD:
- case X86ISD::VUNPCKLPS:
- case X86ISD::VUNPCKLPD:
case X86ISD::VUNPCKLPSY:
case X86ISD::VUNPCKLPDY:
case X86ISD::PUNPCKLWD:
@@ -2825,6 +2889,8 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::PUNPCKLQDQ:
case X86ISD::UNPCKHPS:
case X86ISD::UNPCKHPD:
+ case X86ISD::VUNPCKHPSY:
+ case X86ISD::VUNPCKHPDY:
case X86ISD::PUNPCKHWD:
case X86ISD::PUNPCKHBW:
case X86ISD::PUNPCKHDQ:
@@ -3026,6 +3092,17 @@ static bool isUndefOrInRange(int Val, int Low, int Hi) {
return (Val < 0) || (Val >= Low && Val < Hi);
}
+/// isUndefOrInRange - Return true if every element in Mask, begining
+/// from position Pos and ending in Pos+Size, falls within the specified
+/// range (L, L+Pos]. or is undef.
+static bool isUndefOrInRange(const SmallVectorImpl<int> &Mask,
+ int Pos, int Size, int Low, int Hi) {
+ for (int i = Pos, e = Pos+Size; i != e; ++i)
+ if (!isUndefOrInRange(Mask[i], Low, Hi))
+ return false;
+ return true;
+}
+
/// isUndefOrEqual - Val is either less than zero (undef) or equal to the
/// specified value.
static bool isUndefOrEqual(int Val, int CmpVal) {
@@ -3034,6 +3111,17 @@ static bool isUndefOrEqual(int Val, int CmpVal) {
return false;
}
+/// isSequentialOrUndefInRange - Return true if every element in Mask, begining
+/// from position Pos and ending in Pos+Size, falls within the specified
+/// sequential range (L, L+Pos]. or is undef.
+static bool isSequentialOrUndefInRange(const SmallVectorImpl<int> &Mask,
+ int Pos, int Size, int Low) {
+ for (int i = Pos, e = Pos+Size; i != e; ++i, ++Low)
+ if (!isUndefOrEqual(Mask[i], Low))
+ return false;
+ return true;
+}
+
/// isPSHUFDMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference
/// the second operand.
@@ -3104,11 +3192,13 @@ bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) {
/// isPALIGNRMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PALIGNR.
static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT,
- bool hasSSSE3) {
+ bool hasSSSE3OrAVX) {
int i, e = VT.getVectorNumElements();
+ if (VT.getSizeInBits() != 128 && VT.getSizeInBits() != 64)
+ return false;
// Do not handle v2i64 / v2f64 shuffles with palignr.
- if (e < 4 || !hasSSSE3)
+ if (e < 4 || !hasSSSE3OrAVX)
return false;
for (i = 0; i != e; ++i)
@@ -3119,42 +3209,176 @@ static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT,
if (i == e)
return false;
- // Determine if it's ok to perform a palignr with only the LHS, since we
- // don't have access to the actual shuffle elements to see if RHS is undef.
- bool Unary = Mask[i] < (int)e;
- bool NeedsUnary = false;
+ // Make sure we're shifting in the right direction.
+ if (Mask[i] <= i)
+ return false;
int s = Mask[i] - i;
// Check the rest of the elements to see if they are consecutive.
for (++i; i != e; ++i) {
int m = Mask[i];
- if (m < 0)
- continue;
+ if (m >= 0 && m != s+i)
+ return false;
+ }
+ return true;
+}
+
+/// isVSHUFPSYMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to 256-bit
+/// VSHUFPSY.
+static bool isVSHUFPSYMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ const X86Subtarget *Subtarget) {
+ int NumElems = VT.getVectorNumElements();
+
+ if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
+ return false;
+
+ if (NumElems != 8)
+ return false;
- Unary = Unary && (m < (int)e);
- NeedsUnary = NeedsUnary || (m < s);
+ // VSHUFPSY divides the resulting vector into 4 chunks.
+ // The sources are also splitted into 4 chunks, and each destination
+ // chunk must come from a different source chunk.
+ //
+ // SRC1 => X7 X6 X5 X4 X3 X2 X1 X0
+ // SRC2 => Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y9
+ //
+ // DST => Y7..Y4, Y7..Y4, X7..X4, X7..X4,
+ // Y3..Y0, Y3..Y0, X3..X0, X3..X0
+ //
+ int QuarterSize = NumElems/4;
+ int HalfSize = QuarterSize*2;
+ for (int i = 0; i < QuarterSize; ++i)
+ if (!isUndefOrInRange(Mask[i], 0, HalfSize))
+ return false;
+ for (int i = QuarterSize; i < QuarterSize*2; ++i)
+ if (!isUndefOrInRange(Mask[i], NumElems, NumElems+HalfSize))
+ return false;
- if (NeedsUnary && !Unary)
+ // The mask of the second half must be the same as the first but with
+ // the appropriate offsets. This works in the same way as VPERMILPS
+ // works with masks.
+ for (int i = QuarterSize*2; i < QuarterSize*3; ++i) {
+ if (!isUndefOrInRange(Mask[i], HalfSize, NumElems))
+ return false;
+ int FstHalfIdx = i-HalfSize;
+ if (Mask[FstHalfIdx] < 0)
+ continue;
+ if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize))
return false;
- if (Unary && m != ((s+i) & (e-1)))
+ }
+ for (int i = QuarterSize*3; i < NumElems; ++i) {
+ if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2))
return false;
- if (!Unary && m != (s+i))
+ int FstHalfIdx = i-HalfSize;
+ if (Mask[FstHalfIdx] < 0)
+ continue;
+ if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize))
return false;
+
}
+
return true;
}
-bool X86::isPALIGNRMask(ShuffleVectorSDNode *N) {
- SmallVector<int, 8> M;
- N->getMask(M);
- return ::isPALIGNRMask(M, N->getValueType(0), true);
+/// getShuffleVSHUFPSYImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VSHUFPSY instruction.
+static unsigned getShuffleVSHUFPSYImmediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ EVT VT = SVOp->getValueType(0);
+ int NumElems = VT.getVectorNumElements();
+
+ assert(NumElems == 8 && VT.getSizeInBits() == 256 &&
+ "Only supports v8i32 and v8f32 types");
+
+ int HalfSize = NumElems/2;
+ unsigned Mask = 0;
+ for (int i = 0; i != NumElems ; ++i) {
+ if (SVOp->getMaskElt(i) < 0)
+ continue;
+ // The mask of the first half must be equal to the second one.
+ unsigned Shamt = (i%HalfSize)*2;
+ unsigned Elt = SVOp->getMaskElt(i) % HalfSize;
+ Mask |= Elt << Shamt;
+ }
+
+ return Mask;
+}
+
+/// isVSHUFPDYMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to 256-bit
+/// VSHUFPDY. This shuffle doesn't have the same restriction as the PS
+/// version and the mask of the second half isn't binded with the first
+/// one.
+static bool isVSHUFPDYMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ const X86Subtarget *Subtarget) {
+ int NumElems = VT.getVectorNumElements();
+
+ if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
+ return false;
+
+ if (NumElems != 4)
+ return false;
+
+ // VSHUFPSY divides the resulting vector into 4 chunks.
+ // The sources are also splitted into 4 chunks, and each destination
+ // chunk must come from a different source chunk.
+ //
+ // SRC1 => X3 X2 X1 X0
+ // SRC2 => Y3 Y2 Y1 Y0
+ //
+ // DST => Y2..Y3, X2..X3, Y1..Y0, X1..X0
+ //
+ int QuarterSize = NumElems/4;
+ int HalfSize = QuarterSize*2;
+ for (int i = 0; i < QuarterSize; ++i)
+ if (!isUndefOrInRange(Mask[i], 0, HalfSize))
+ return false;
+ for (int i = QuarterSize; i < QuarterSize*2; ++i)
+ if (!isUndefOrInRange(Mask[i], NumElems, NumElems+HalfSize))
+ return false;
+ for (int i = QuarterSize*2; i < QuarterSize*3; ++i)
+ if (!isUndefOrInRange(Mask[i], HalfSize, NumElems))
+ return false;
+ for (int i = QuarterSize*3; i < NumElems; ++i)
+ if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2))
+ return false;
+
+ return true;
+}
+
+/// getShuffleVSHUFPDYImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VSHUFPDY instruction.
+static unsigned getShuffleVSHUFPDYImmediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ EVT VT = SVOp->getValueType(0);
+ int NumElems = VT.getVectorNumElements();
+
+ assert(NumElems == 4 && VT.getSizeInBits() == 256 &&
+ "Only supports v4i64 and v4f64 types");
+
+ int HalfSize = NumElems/2;
+ unsigned Mask = 0;
+ for (int i = 0; i != NumElems ; ++i) {
+ if (SVOp->getMaskElt(i) < 0)
+ continue;
+ int Elt = SVOp->getMaskElt(i) % HalfSize;
+ Mask |= Elt << i;
+ }
+
+ return Mask;
}
/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to SHUFP*.
+/// specifies a shuffle of elements that is suitable for input to 128-bit
+/// SHUFPS and SHUFPD.
static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
int NumElems = VT.getVectorNumElements();
+
+ if (VT.getSizeInBits() != 128)
+ return false;
+
if (NumElems != 2 && NumElems != 4)
return false;
@@ -3204,7 +3428,13 @@ static bool isCommutedSHUFP(ShuffleVectorSDNode *N) {
/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) {
- if (N->getValueType(0).getVectorNumElements() != 4)
+ EVT VT = N->getValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
+
+ if (VT.getSizeInBits() != 128)
+ return false;
+
+ if (NumElems != 4)
return false;
// Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
@@ -3218,15 +3448,19 @@ bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) {
/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
/// <2, 3, 2, 3>
bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
- unsigned NumElems = N->getValueType(0).getVectorNumElements();
+ EVT VT = N->getValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
+
+ if (VT.getSizeInBits() != 128)
+ return false;
if (NumElems != 4)
return false;
return isUndefOrEqual(N->getMaskElt(0), 2) &&
- isUndefOrEqual(N->getMaskElt(1), 3) &&
- isUndefOrEqual(N->getMaskElt(2), 2) &&
- isUndefOrEqual(N->getMaskElt(3), 3);
+ isUndefOrEqual(N->getMaskElt(1), 3) &&
+ isUndefOrEqual(N->getMaskElt(2), 2) &&
+ isUndefOrEqual(N->getMaskElt(3), 3);
}
/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
@@ -3273,20 +3507,22 @@ bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) {
static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
bool V2IsSplat = false) {
int NumElts = VT.getVectorNumElements();
- if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
+
+ assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ "Unsupported vector type for unpckh");
+
+ if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8)
return false;
- // Handle vector lengths > 128 bits. Define a "section" as a set of
- // 128 bits. AVX defines UNPCK* to operate independently on 128-bit
- // sections.
- unsigned NumSections = VT.getSizeInBits() / 128;
- if (NumSections == 0 ) NumSections = 1; // Handle MMX
- unsigned NumSectionElts = NumElts / NumSections;
+ // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
+ // independently on 128-bit lanes.
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLaneElts = NumElts/NumLanes;
unsigned Start = 0;
- unsigned End = NumSectionElts;
- for (unsigned s = 0; s < NumSections; ++s) {
- for (unsigned i = Start, j = s * NumSectionElts;
+ unsigned End = NumLaneElts;
+ for (unsigned s = 0; s < NumLanes; ++s) {
+ for (unsigned i = Start, j = s * NumLaneElts;
i != End;
i += 2, ++j) {
int BitI = Mask[i];
@@ -3302,8 +3538,8 @@ static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
}
}
// Process the next 128 bits.
- Start += NumSectionElts;
- End += NumSectionElts;
+ Start += NumLaneElts;
+ End += NumLaneElts;
}
return true;
@@ -3320,21 +3556,38 @@ bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) {
static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT,
bool V2IsSplat = false) {
int NumElts = VT.getVectorNumElements();
- if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
+
+ assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ "Unsupported vector type for unpckh");
+
+ if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8)
return false;
- for (int i = 0, j = 0; i != NumElts; i += 2, ++j) {
- int BitI = Mask[i];
- int BitI1 = Mask[i+1];
- if (!isUndefOrEqual(BitI, j + NumElts/2))
- return false;
- if (V2IsSplat) {
- if (isUndefOrEqual(BitI1, NumElts))
- return false;
- } else {
- if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts))
+ // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
+ // independently on 128-bit lanes.
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLaneElts = NumElts/NumLanes;
+
+ unsigned Start = 0;
+ unsigned End = NumLaneElts;
+ for (unsigned l = 0; l != NumLanes; ++l) {
+ for (unsigned i = Start, j = (l*NumLaneElts)+NumLaneElts/2;
+ i != End; i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+ if (!isUndefOrEqual(BitI, j))
return false;
+ if (V2IsSplat) {
+ if (isUndefOrEqual(BitI1, NumElts))
+ return false;
+ } else {
+ if (!isUndefOrEqual(BitI1, j+NumElts))
+ return false;
+ }
}
+ // Process the next 128 bits.
+ Start += NumLaneElts;
+ End += NumLaneElts;
}
return true;
}
@@ -3353,16 +3606,21 @@ static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) {
if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
return false;
- // Handle vector lengths > 128 bits. Define a "section" as a set of
- // 128 bits. AVX defines UNPCK* to operate independently on 128-bit
- // sections.
- unsigned NumSections = VT.getSizeInBits() / 128;
- if (NumSections == 0 ) NumSections = 1; // Handle MMX
- unsigned NumSectionElts = NumElems / NumSections;
+ // For 256-bit i64/f64, use MOVDDUPY instead, so reject the matching pattern
+ // FIXME: Need a better way to get rid of this, there's no latency difference
+ // between UNPCKLPD and MOVDDUP, the later should always be checked first and
+ // the former later. We should also remove the "_undef" special mask.
+ if (NumElems == 4 && VT.getSizeInBits() == 256)
+ return false;
+
+ // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
+ // independently on 128-bit lanes.
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ unsigned NumLaneElts = NumElems / NumLanes;
- for (unsigned s = 0; s < NumSections; ++s) {
- for (unsigned i = s * NumSectionElts, j = s * NumSectionElts;
- i != NumSectionElts * (s + 1);
+ for (unsigned s = 0; s < NumLanes; ++s) {
+ for (unsigned i = s * NumLaneElts, j = s * NumLaneElts;
+ i != NumLaneElts * (s + 1);
i += 2, ++j) {
int BitI = Mask[i];
int BitI1 = Mask[i+1];
@@ -3433,6 +3691,189 @@ bool X86::isMOVLMask(ShuffleVectorSDNode *N) {
return ::isMOVLMask(M, N->getValueType(0));
}
+/// isVPERM2F128Mask - Match 256-bit shuffles where the elements are considered
+/// as permutations between 128-bit chunks or halves. As an example: this
+/// shuffle bellow:
+/// vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15>
+/// The first half comes from the second half of V1 and the second half from the
+/// the second half of V2.
+static bool isVPERM2F128Mask(const SmallVectorImpl<int> &Mask, EVT VT,
+ const X86Subtarget *Subtarget) {
+ if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
+ return false;
+
+ // The shuffle result is divided into half A and half B. In total the two
+ // sources have 4 halves, namely: C, D, E, F. The final values of A and
+ // B must come from C, D, E or F.
+ int HalfSize = VT.getVectorNumElements()/2;
+ bool MatchA = false, MatchB = false;
+
+ // Check if A comes from one of C, D, E, F.
+ for (int Half = 0; Half < 4; ++Half) {
+ if (isSequentialOrUndefInRange(Mask, 0, HalfSize, Half*HalfSize)) {
+ MatchA = true;
+ break;
+ }
+ }
+
+ // Check if B comes from one of C, D, E, F.
+ for (int Half = 0; Half < 4; ++Half) {
+ if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, Half*HalfSize)) {
+ MatchB = true;
+ break;
+ }
+ }
+
+ return MatchA && MatchB;
+}
+
+/// getShuffleVPERM2F128Immediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VPERM2F128 instructions.
+static unsigned getShuffleVPERM2F128Immediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ EVT VT = SVOp->getValueType(0);
+
+ int HalfSize = VT.getVectorNumElements()/2;
+
+ int FstHalf = 0, SndHalf = 0;
+ for (int i = 0; i < HalfSize; ++i) {
+ if (SVOp->getMaskElt(i) > 0) {
+ FstHalf = SVOp->getMaskElt(i)/HalfSize;
+ break;
+ }
+ }
+ for (int i = HalfSize; i < HalfSize*2; ++i) {
+ if (SVOp->getMaskElt(i) > 0) {
+ SndHalf = SVOp->getMaskElt(i)/HalfSize;
+ break;
+ }
+ }
+
+ return (FstHalf | (SndHalf << 4));
+}
+
+/// isVPERMILPDMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to VPERMILPD*.
+/// Note that VPERMIL mask matching is different depending whether theunderlying
+/// type is 32 or 64. In the VPERMILPS the high half of the mask should point
+/// to the same elements of the low, but to the higher half of the source.
+/// In VPERMILPD the two lanes could be shuffled independently of each other
+/// with the same restriction that lanes can't be crossed.
+static bool isVPERMILPDMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ const X86Subtarget *Subtarget) {
+ int NumElts = VT.getVectorNumElements();
+ int NumLanes = VT.getSizeInBits()/128;
+
+ if (!Subtarget->hasAVX())
+ return false;
+
+ // Only match 256-bit with 64-bit types
+ if (VT.getSizeInBits() != 256 || NumElts != 4)
+ return false;
+
+ // The mask on the high lane is independent of the low. Both can match
+ // any element in inside its own lane, but can't cross.
+ int LaneSize = NumElts/NumLanes;
+ for (int l = 0; l < NumLanes; ++l)
+ for (int i = l*LaneSize; i < LaneSize*(l+1); ++i) {
+ int LaneStart = l*LaneSize;
+ if (!isUndefOrInRange(Mask[i], LaneStart, LaneStart+LaneSize))
+ return false;
+ }
+
+ return true;
+}
+
+/// isVPERMILPSMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to VPERMILPS*.
+/// Note that VPERMIL mask matching is different depending whether theunderlying
+/// type is 32 or 64. In the VPERMILPS the high half of the mask should point
+/// to the same elements of the low, but to the higher half of the source.
+/// In VPERMILPD the two lanes could be shuffled independently of each other
+/// with the same restriction that lanes can't be crossed.
+static bool isVPERMILPSMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ const X86Subtarget *Subtarget) {
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumLanes = VT.getSizeInBits()/128;
+
+ if (!Subtarget->hasAVX())
+ return false;
+
+ // Only match 256-bit with 32-bit types
+ if (VT.getSizeInBits() != 256 || NumElts != 8)
+ return false;
+
+ // The mask on the high lane should be the same as the low. Actually,
+ // they can differ if any of the corresponding index in a lane is undef
+ // and the other stays in range.
+ int LaneSize = NumElts/NumLanes;
+ for (int i = 0; i < LaneSize; ++i) {
+ int HighElt = i+LaneSize;
+ bool HighValid = isUndefOrInRange(Mask[HighElt], LaneSize, NumElts);
+ bool LowValid = isUndefOrInRange(Mask[i], 0, LaneSize);
+
+ if (!HighValid || !LowValid)
+ return false;
+ if (Mask[i] < 0 || Mask[HighElt] < 0)
+ continue;
+ if (Mask[HighElt]-Mask[i] != LaneSize)
+ return false;
+ }
+
+ return true;
+}
+
+/// getShuffleVPERMILPSImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VPERMILPS* instructions.
+static unsigned getShuffleVPERMILPSImmediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ EVT VT = SVOp->getValueType(0);
+
+ int NumElts = VT.getVectorNumElements();
+ int NumLanes = VT.getSizeInBits()/128;
+ int LaneSize = NumElts/NumLanes;
+
+ // Although the mask is equal for both lanes do it twice to get the cases
+ // where a mask will match because the same mask element is undef on the
+ // first half but valid on the second. This would get pathological cases
+ // such as: shuffle <u, 0, 1, 2, 4, 4, 5, 6>, which is completely valid.
+ unsigned Mask = 0;
+ for (int l = 0; l < NumLanes; ++l) {
+ for (int i = 0; i < LaneSize; ++i) {
+ int MaskElt = SVOp->getMaskElt(i+(l*LaneSize));
+ if (MaskElt < 0)
+ continue;
+ if (MaskElt >= LaneSize)
+ MaskElt -= LaneSize;
+ Mask |= MaskElt << (i*2);
+ }
+ }
+
+ return Mask;
+}
+
+/// getShuffleVPERMILPDImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VPERMILPD* instructions.
+static unsigned getShuffleVPERMILPDImmediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ EVT VT = SVOp->getValueType(0);
+
+ int NumElts = VT.getVectorNumElements();
+ int NumLanes = VT.getSizeInBits()/128;
+
+ unsigned Mask = 0;
+ int LaneSize = NumElts/NumLanes;
+ for (int l = 0; l < NumLanes; ++l)
+ for (int i = l*LaneSize; i < LaneSize*(l+1); ++i) {
+ int MaskElt = SVOp->getMaskElt(i);
+ if (MaskElt < 0)
+ continue;
+ Mask |= (MaskElt-l*LaneSize) << i;
+ }
+
+ return Mask;
+}
+
/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
/// of what x86 movss want. X86 movs requires the lowest element to be lowest
/// element of vector 2 and the other elements to come from vector 1 in order.
@@ -3463,58 +3904,92 @@ static bool isCommutedMOVL(ShuffleVectorSDNode *N, bool V2IsSplat = false,
/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
-bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N) {
- if (N->getValueType(0).getVectorNumElements() != 4)
+/// Masks to match: <1, 1, 3, 3> or <1, 1, 3, 3, 5, 5, 7, 7>
+bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N,
+ const X86Subtarget *Subtarget) {
+ if (!Subtarget->hasSSE3() && !Subtarget->hasAVX())
return false;
- // Expect 1, 1, 3, 3
- for (unsigned i = 0; i < 2; ++i) {
- int Elt = N->getMaskElt(i);
- if (Elt >= 0 && Elt != 1)
- return false;
- }
+ // The second vector must be undef
+ if (N->getOperand(1).getOpcode() != ISD::UNDEF)
+ return false;
- bool HasHi = false;
- for (unsigned i = 2; i < 4; ++i) {
- int Elt = N->getMaskElt(i);
- if (Elt >= 0 && Elt != 3)
+ EVT VT = N->getValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
+
+ if ((VT.getSizeInBits() == 128 && NumElems != 4) ||
+ (VT.getSizeInBits() == 256 && NumElems != 8))
+ return false;
+
+ // "i+1" is the value the indexed mask element must have
+ for (unsigned i = 0; i < NumElems; i += 2)
+ if (!isUndefOrEqual(N->getMaskElt(i), i+1) ||
+ !isUndefOrEqual(N->getMaskElt(i+1), i+1))
return false;
- if (Elt == 3)
- HasHi = true;
- }
- // Don't use movshdup if it can be done with a shufps.
- // FIXME: verify that matching u, u, 3, 3 is what we want.
- return HasHi;
+
+ return true;
}
/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
-bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N) {
- if (N->getValueType(0).getVectorNumElements() != 4)
+/// Masks to match: <0, 0, 2, 2> or <0, 0, 2, 2, 4, 4, 6, 6>
+bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N,
+ const X86Subtarget *Subtarget) {
+ if (!Subtarget->hasSSE3() && !Subtarget->hasAVX())
+ return false;
+
+ // The second vector must be undef
+ if (N->getOperand(1).getOpcode() != ISD::UNDEF)
+ return false;
+
+ EVT VT = N->getValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
+
+ if ((VT.getSizeInBits() == 128 && NumElems != 4) ||
+ (VT.getSizeInBits() == 256 && NumElems != 8))
return false;
- // Expect 0, 0, 2, 2
- for (unsigned i = 0; i < 2; ++i)
- if (N->getMaskElt(i) > 0)
+ // "i" is the value the indexed mask element must have
+ for (unsigned i = 0; i < NumElems; i += 2)
+ if (!isUndefOrEqual(N->getMaskElt(i), i) ||
+ !isUndefOrEqual(N->getMaskElt(i+1), i))
return false;
- bool HasHi = false;
- for (unsigned i = 2; i < 4; ++i) {
- int Elt = N->getMaskElt(i);
- if (Elt >= 0 && Elt != 2)
+ return true;
+}
+
+/// isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to 256-bit
+/// version of MOVDDUP.
+static bool isMOVDDUPYMask(ShuffleVectorSDNode *N,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ int NumElts = VT.getVectorNumElements();
+ bool V2IsUndef = N->getOperand(1).getOpcode() == ISD::UNDEF;
+
+ if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256 ||
+ !V2IsUndef || NumElts != 4)
+ return false;
+
+ for (int i = 0; i != NumElts/2; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(i), 0))
return false;
- if (Elt == 2)
- HasHi = true;
- }
- // Don't use movsldup if it can be done with a shufps.
- return HasHi;
+ for (int i = NumElts/2; i != NumElts; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(i), NumElts/2))
+ return false;
+ return true;
}
/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to MOVDDUP.
+/// specifies a shuffle of elements that is suitable for input to 128-bit
+/// version of MOVDDUP.
bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) {
- int e = N->getValueType(0).getVectorNumElements() / 2;
+ EVT VT = N->getValueType(0);
+
+ if (VT.getSizeInBits() != 128)
+ return false;
+ int e = VT.getVectorNumElements() / 2;
for (int i = 0; i < e; ++i)
if (!isUndefOrEqual(N->getMaskElt(i), i))
return false;
@@ -3627,6 +4102,7 @@ unsigned X86::getShufflePALIGNRImmediate(SDNode *N) {
if (Val >= 0)
break;
}
+ assert(Val - i > 0 && "PALIGNR imm should be positive");
return (Val - i) * EltSize;
}
@@ -3644,7 +4120,6 @@ unsigned X86::getExtractVEXTRACTF128Immediate(SDNode *N) {
EVT ElVT = VecVT.getVectorElementType();
unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
-
return Index / NumElemsPerChunk;
}
@@ -3662,7 +4137,6 @@ unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) {
EVT ElVT = VecVT.getVectorElementType();
unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
-
return Index / NumElemsPerChunk;
}
@@ -3716,7 +4190,10 @@ static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) {
/// V1 (and in order), and the upper half elements should come from the upper
/// half of V2 (and in order).
static bool ShouldXformToMOVHLPS(ShuffleVectorSDNode *Op) {
- if (Op->getValueType(0).getVectorNumElements() != 4)
+ EVT VT = Op->getValueType(0);
+ if (VT.getSizeInBits() != 128)
+ return false;
+ if (VT.getVectorNumElements() != 4)
return false;
for (unsigned i = 0, e = 2; i != e; ++i)
if (!isUndefOrEqual(Op->getMaskElt(i), i+2))
@@ -3748,6 +4225,10 @@ static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) {
/// MOVLP, it must be either a vector load or a scalar load to vector.
static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
ShuffleVectorSDNode *Op) {
+ EVT VT = Op->getValueType(0);
+ if (VT.getSizeInBits() != 128)
+ return false;
+
if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
return false;
// Is V2 is a vector load, don't do this transformation. We will try to use
@@ -3755,7 +4236,7 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
if (ISD::isNON_EXTLoad(V2))
return false;
- unsigned NumElems = Op->getValueType(0).getVectorNumElements();
+ unsigned NumElems = VT.getVectorNumElements();
if (NumElems != 2 && NumElems != 4)
return false;
@@ -3811,7 +4292,7 @@ static bool isZeroShuffle(ShuffleVectorSDNode *N) {
/// getZeroVector - Returns a vector of specified type with all zero elements.
///
-static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG,
+static SDValue getZeroVector(EVT VT, bool HasXMMInt, SelectionDAG &DAG,
DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
@@ -3819,7 +4300,7 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG,
// to their dest type. This ensures they get CSE'd.
SDValue Vec;
if (VT.getSizeInBits() == 128) { // SSE
- if (HasSSE2) { // SSE2
+ if (HasXMMInt) { // SSE2
SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
} else { // SSE1
@@ -3838,21 +4319,25 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG,
}
/// getOnesVector - Returns a vector of specified type with all bits set.
-/// Always build ones vectors as <4 x i32> or <8 x i32> bitcasted to
-/// their original type, ensuring they get CSE'd.
+/// Always build ones vectors as <4 x i32>. For 256-bit types, use two
+/// <4 x i32> inserted in a <8 x i32> appropriately. Then bitcast to their
+/// original type, ensuring they get CSE'd.
static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
assert((VT.is128BitVector() || VT.is256BitVector())
&& "Expected a 128-bit or 256-bit vector type");
SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
+ SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ Cst, Cst, Cst, Cst);
- SDValue Vec;
if (VT.is256BitVector()) {
- SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
- } else
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+ SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, MVT::v8i32),
+ Vec, DAG.getConstant(0, MVT::i32), DAG, dl);
+ Vec = Insert128BitVector(InsV, Vec,
+ DAG.getConstant(4 /* NumElems/2 */, MVT::i32), DAG, dl);
+ }
+
return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
}
@@ -3902,7 +4387,7 @@ static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
}
-/// getUnpackhMask - Returns a vector_shuffle node for an unpackh operation.
+/// getUnpackh - Returns a vector_shuffle node for an unpackh operation.
static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
SDValue V2) {
unsigned NumElems = VT.getVectorNumElements();
@@ -3915,31 +4400,95 @@ static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
}
-/// PromoteSplat - Promote a splat of v4i32, v8i16 or v16i8 to v4f32.
-static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
- EVT PVT = MVT::v4f32;
- EVT VT = SV->getValueType(0);
- DebugLoc dl = SV->getDebugLoc();
- SDValue V1 = SV->getOperand(0);
+// PromoteSplati8i16 - All i16 and i8 vector types can't be used directly by
+// a generic shuffle instruction because the target has no such instructions.
+// Generate shuffles which repeat i16 and i8 several times until they can be
+// represented by v4f32 and then be manipulated by target suported shuffles.
+static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) {
+ EVT VT = V.getValueType();
int NumElems = VT.getVectorNumElements();
- int EltNo = SV->getSplatIndex();
+ DebugLoc dl = V.getDebugLoc();
- // unpack elements to the correct location
while (NumElems > 4) {
if (EltNo < NumElems/2) {
- V1 = getUnpackl(DAG, dl, VT, V1, V1);
+ V = getUnpackl(DAG, dl, VT, V, V);
} else {
- V1 = getUnpackh(DAG, dl, VT, V1, V1);
+ V = getUnpackh(DAG, dl, VT, V, V);
EltNo -= NumElems/2;
}
NumElems >>= 1;
}
+ return V;
+}
+
+/// getLegalSplat - Generate a legal splat with supported x86 shuffles
+static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) {
+ EVT VT = V.getValueType();
+ DebugLoc dl = V.getDebugLoc();
+ assert((VT.getSizeInBits() == 128 || VT.getSizeInBits() == 256)
+ && "Vector size not supported");
+
+ if (VT.getSizeInBits() == 128) {
+ V = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V);
+ int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
+ V = DAG.getVectorShuffle(MVT::v4f32, dl, V, DAG.getUNDEF(MVT::v4f32),
+ &SplatMask[0]);
+ } else {
+ // To use VPERMILPS to splat scalars, the second half of indicies must
+ // refer to the higher part, which is a duplication of the lower one,
+ // because VPERMILPS can only handle in-lane permutations.
+ int SplatMask[8] = { EltNo, EltNo, EltNo, EltNo,
+ EltNo+4, EltNo+4, EltNo+4, EltNo+4 };
+
+ V = DAG.getNode(ISD::BITCAST, dl, MVT::v8f32, V);
+ V = DAG.getVectorShuffle(MVT::v8f32, dl, V, DAG.getUNDEF(MVT::v8f32),
+ &SplatMask[0]);
+ }
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, V);
+}
+
+/// PromoteSplat - Splat is promoted to target supported vector shuffles.
+static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
+ EVT SrcVT = SV->getValueType(0);
+ SDValue V1 = SV->getOperand(0);
+ DebugLoc dl = SV->getDebugLoc();
+
+ int EltNo = SV->getSplatIndex();
+ int NumElems = SrcVT.getVectorNumElements();
+ unsigned Size = SrcVT.getSizeInBits();
+
+ assert(((Size == 128 && NumElems > 4) || Size == 256) &&
+ "Unknown how to promote splat for type");
+
+ // Extract the 128-bit part containing the splat element and update
+ // the splat element index when it refers to the higher register.
+ if (Size == 256) {
+ unsigned Idx = (EltNo > NumElems/2) ? NumElems/2 : 0;
+ V1 = Extract128BitVector(V1, DAG.getConstant(Idx, MVT::i32), DAG, dl);
+ if (Idx > 0)
+ EltNo -= NumElems/2;
+ }
+
+ // All i16 and i8 vector types can't be used directly by a generic shuffle
+ // instruction because the target has no such instruction. Generate shuffles
+ // which repeat i16 and i8 several times until they fit in i32, and then can
+ // be manipulated by target suported shuffles.
+ EVT EltVT = SrcVT.getVectorElementType();
+ if (EltVT == MVT::i8 || EltVT == MVT::i16)
+ V1 = PromoteSplati8i16(V1, DAG, EltNo);
+
+ // Recreate the 256-bit vector and place the same 128-bit vector
+ // into the low and high part. This is necessary because we want
+ // to use VPERM* to shuffle the vectors
+ if (Size == 256) {
+ SDValue InsV = Insert128BitVector(DAG.getUNDEF(SrcVT), V1,
+ DAG.getConstant(0, MVT::i32), DAG, dl);
+ V1 = Insert128BitVector(InsV, V1,
+ DAG.getConstant(NumElems/2, MVT::i32), DAG, dl);
+ }
- // Perform the splat.
- int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
- V1 = DAG.getNode(ISD::BITCAST, dl, PVT, V1);
- V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), &SplatMask[0]);
- return DAG.getNode(ISD::BITCAST, dl, VT, V1);
+ return getLegalSplat(DAG, V1, EltNo);
}
/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
@@ -3947,11 +4496,11 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
/// element of V2 is swizzled into the zero/undef vector, landing at element
/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
- bool isZero, bool HasSSE2,
- SelectionDAG &DAG) {
+ bool isZero, bool HasXMMInt,
+ SelectionDAG &DAG) {
EVT VT = V2.getValueType();
SDValue V1 = isZero
- ? getZeroVector(VT, HasSSE2, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT);
+ ? getZeroVector(VT, HasXMMInt, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT);
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 16> MaskVec;
for (unsigned i = 0; i != NumElems; ++i)
@@ -4005,6 +4554,8 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
break;
case X86ISD::UNPCKHPS:
case X86ISD::UNPCKHPD:
+ case X86ISD::VUNPCKHPSY:
+ case X86ISD::VUNPCKHPDY:
DecodeUNPCKHPMask(NumElems, ShuffleMask);
break;
case X86ISD::PUNPCKLBW:
@@ -4015,8 +4566,6 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
break;
case X86ISD::UNPCKLPS:
case X86ISD::UNPCKLPD:
- case X86ISD::VUNPCKLPS:
- case X86ISD::VUNPCKLPD:
case X86ISD::VUNPCKLPSY:
case X86ISD::VUNPCKLPDY:
DecodeUNPCKLPMask(VT, ShuffleMask);
@@ -4052,8 +4601,41 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG,
Depth+1);
}
+ case X86ISD::VPERMILPS:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeVPERMILPSMask(4, cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::VPERMILPSY:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeVPERMILPSMask(8, cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::VPERMILPD:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeVPERMILPDMask(2, cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::VPERMILPDY:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeVPERMILPDMask(4, cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::VPERM2F128:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeVPERM2F128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::MOVDDUP:
+ case X86ISD::MOVLHPD:
+ case X86ISD::MOVLPD:
+ case X86ISD::MOVLPS:
+ case X86ISD::MOVSHDUP:
+ case X86ISD::MOVSLDUP:
+ case X86ISD::PALIGN:
+ return SDValue(); // Not yet implemented.
default:
- assert("not implemented for target shuffle node");
+ assert(0 && "unknown target shuffle node");
return SDValue();
}
@@ -4205,6 +4787,11 @@ static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
/// logical left or right shift of a vector.
static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+ // Although the logic below support any bitwidth size, there are no
+ // shift instructions which handle more than 128-bit vectors.
+ if (SVOp->getValueType(0).getSizeInBits() > 128)
+ return false;
+
if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) ||
isVectorShiftRight(SVOp, DAG, isLeft, ShVal, ShAmt))
return true;
@@ -4295,6 +4882,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
unsigned NumBits, SelectionDAG &DAG,
const TargetLowering &TLI, DebugLoc dl) {
+ assert(VT.getSizeInBits() == 128 && "Unknown type for VShift");
EVT ShVT = MVT::v2i64;
unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL;
SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
@@ -4333,42 +4921,52 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
return SDValue();
}
+ // FIXME: 256-bit vector instructions don't require a strict alignment,
+ // improve this code to support it better.
+ unsigned RequiredAlign = VT.getSizeInBits()/8;
SDValue Chain = LD->getChain();
- // Make sure the stack object alignment is at least 16.
+ // Make sure the stack object alignment is at least 16 or 32.
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
- if (DAG.InferPtrAlignment(Ptr) < 16) {
+ if (DAG.InferPtrAlignment(Ptr) < RequiredAlign) {
if (MFI->isFixedObjectIndex(FI)) {
// Can't change the alignment. FIXME: It's possible to compute
// the exact stack offset and reference FI + adjust offset instead.
// If someone *really* cares about this. That's the way to implement it.
return SDValue();
} else {
- MFI->setObjectAlignment(FI, 16);
+ MFI->setObjectAlignment(FI, RequiredAlign);
}
}
- // (Offset % 16) must be multiple of 4. Then address is then
+ // (Offset % 16 or 32) must be multiple of 4. Then address is then
// Ptr + (Offset & ~15).
if (Offset < 0)
return SDValue();
- if ((Offset % 16) & 3)
+ if ((Offset % RequiredAlign) & 3)
return SDValue();
- int64_t StartOffset = Offset & ~15;
+ int64_t StartOffset = Offset & ~(RequiredAlign-1);
if (StartOffset)
Ptr = DAG.getNode(ISD::ADD, Ptr.getDebugLoc(), Ptr.getValueType(),
Ptr,DAG.getConstant(StartOffset, Ptr.getValueType()));
int EltNo = (Offset - StartOffset) >> 2;
- int Mask[4] = { EltNo, EltNo, EltNo, EltNo };
- EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32;
- SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,
+ int NumElems = VT.getVectorNumElements();
+
+ EVT CanonVT = VT.getSizeInBits() == 128 ? MVT::v4i32 : MVT::v8i32;
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems);
+ SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr,
LD->getPointerInfo().getWithOffset(StartOffset),
false, false, 0);
- // Canonicalize it to a v4i32 shuffle.
- V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
- return DAG.getNode(ISD::BITCAST, dl, VT,
- DAG.getVectorShuffle(MVT::v4i32, dl, V1,
- DAG.getUNDEF(MVT::v4i32),&Mask[0]));
+
+ // Canonicalize it to a v4i32 or v8i32 shuffle.
+ SmallVector<int, 8> Mask;
+ for (int i = 0; i < NumElems; ++i)
+ Mask.push_back(EltNo);
+
+ V1 = DAG.getNode(ISD::BITCAST, dl, CanonVT, V1);
+ return DAG.getNode(ISD::BITCAST, dl, NVT,
+ DAG.getVectorShuffle(CanonVT, dl, V1,
+ DAG.getUNDEF(CanonVT),&Mask[0]));
}
return SDValue();
@@ -4428,12 +5026,16 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
LDBase->getPointerInfo(),
LDBase->isVolatile(), LDBase->isNonTemporal(),
LDBase->getAlignment());
- } else if (NumElems == 4 && LastLoadedElt == 1) {
+ } else if (NumElems == 4 && LastLoadedElt == 1 &&
+ DAG.getTargetLoweringInfo().isTypeLegal(MVT::v2i64)) {
SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
- SDValue ResNode = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys,
- Ops, 2, MVT::i32,
- LDBase->getMemOperand());
+ SDValue ResNode =
+ DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, 2, MVT::i64,
+ LDBase->getPointerInfo(),
+ LDBase->getAlignment(),
+ false/*isVolatile*/, true/*ReadMem*/,
+ false/*WriteMem*/);
return DAG.getNode(ISD::BITCAST, DL, VT, ResNode);
}
return SDValue();
@@ -4445,47 +5047,26 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
EVT ExtVT = VT.getVectorElementType();
-
unsigned NumElems = Op.getNumOperands();
- // For AVX-length vectors, build the individual 128-bit pieces and
- // use shuffles to put them in place.
- if (VT.getSizeInBits() > 256 &&
- Subtarget->hasAVX() &&
- !ISD::isBuildVectorAllZeros(Op.getNode())) {
- SmallVector<SDValue, 8> V;
- V.resize(NumElems);
- for (unsigned i = 0; i < NumElems; ++i) {
- V[i] = Op.getOperand(i);
- }
-
- EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
-
- // Build the lower subvector.
- SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[0], NumElems/2);
- // Build the upper subvector.
- SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[NumElems / 2],
- NumElems/2);
+ // Vectors containing all zeros can be matched by pxor and xorps later
+ if (ISD::isBuildVectorAllZeros(Op.getNode())) {
+ // Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd
+ // and 2) ensure that i64 scalars are eliminated on x86-32 hosts.
+ if (Op.getValueType() == MVT::v4i32 ||
+ Op.getValueType() == MVT::v8i32)
+ return Op;
- return ConcatVectors(Lower, Upper, DAG);
+ return getZeroVector(Op.getValueType(), Subtarget->hasXMMInt(), DAG, dl);
}
- // All zero's:
- // - pxor (SSE2), xorps (SSE1), vpxor (128 AVX), xorp[s|d] (256 AVX)
- // All one's:
- // - pcmpeqd (SSE2 and 128 AVX), fallback to constant pools (256 AVX)
- if (ISD::isBuildVectorAllZeros(Op.getNode()) ||
- ISD::isBuildVectorAllOnes(Op.getNode())) {
- // Canonicalize this to <4 x i32> or <8 x 32> (SSE) to
- // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are
- // eliminated on x86-32 hosts.
- if (Op.getValueType() == MVT::v4i32 ||
- Op.getValueType() == MVT::v8i32)
+ // Vectors containing all ones can be matched by pcmpeqd on 128-bit width
+ // vectors or broken into v4i32 operations on 256-bit vectors.
+ if (ISD::isBuildVectorAllOnes(Op.getNode())) {
+ if (Op.getValueType() == MVT::v4i32)
return Op;
- if (ISD::isBuildVectorAllOnes(Op.getNode()))
- return getOnesVector(Op.getValueType(), DAG, dl);
- return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG, dl);
+ return getOnesVector(Op.getValueType(), DAG, dl);
}
unsigned EVTBits = ExtVT.getSizeInBits();
@@ -4538,7 +5119,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item);
Item = getShuffleVectorZeroOrUndef(Item, 0, true,
- Subtarget->hasSSE2(), DAG);
+ Subtarget->hasXMMInt(), DAG);
// Now we have our 32-bit value zero extended in the low element of
// a vector. If Idx != 0, swizzle it into place.
@@ -4566,7 +5147,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
(ExtVT == MVT::i64 && Subtarget->is64Bit())) {
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
- return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(),
+ return getShuffleVectorZeroOrUndef(Item, 0, true,Subtarget->hasXMMInt(),
DAG);
} else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
@@ -4574,7 +5155,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
EVT MiddleVT = MVT::v4i32;
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item);
Item = getShuffleVectorZeroOrUndef(Item, 0, true,
- Subtarget->hasSSE2(), DAG);
+ Subtarget->hasXMMInt(), DAG);
return DAG.getNode(ISD::BITCAST, dl, VT, Item);
}
}
@@ -4603,7 +5184,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// Turn it into a shuffle of zero and zero-extended scalar to vector.
Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0,
- Subtarget->hasSSE2(), DAG);
+ Subtarget->hasXMMInt(), DAG);
SmallVector<int, 8> MaskVec;
for (unsigned i = 0; i < NumElems; i++)
MaskVec.push_back(i == Idx ? 0 : 1);
@@ -4631,6 +5212,27 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (IsAllConstants)
return SDValue();
+ // For AVX-length vectors, build the individual 128-bit pieces and use
+ // shuffles to put them in place.
+ if (VT.getSizeInBits() == 256 && !ISD::isBuildVectorAllZeros(Op.getNode())) {
+ SmallVector<SDValue, 32> V;
+ for (unsigned i = 0; i < NumElems; ++i)
+ V.push_back(Op.getOperand(i));
+
+ EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
+
+ // Build both the lower and upper subvector.
+ SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[0], NumElems/2);
+ SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[NumElems / 2],
+ NumElems/2);
+
+ // Recreate the wider vector with the lower and upper part.
+ SDValue Vec = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Lower,
+ DAG.getConstant(0, MVT::i32), DAG, dl);
+ return Insert128BitVector(Vec, Upper, DAG.getConstant(NumElems/2, MVT::i32),
+ DAG, dl);
+ }
+
// Let legalizer expand 2-wide build_vectors.
if (EVTBits == 64) {
if (NumNonZero == 1) {
@@ -4639,7 +5241,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
Op.getOperand(Idx));
return getShuffleVectorZeroOrUndef(V2, Idx, true,
- Subtarget->hasSSE2(), DAG);
+ Subtarget->hasXMMInt(), DAG);
}
return SDValue();
}
@@ -4664,7 +5266,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
for (unsigned i = 0; i < 4; ++i) {
bool isZero = !(NonZeros & (1 << i));
if (isZero)
- V[i] = getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl);
+ V[i] = getZeroVector(VT, Subtarget->hasXMMInt(), DAG, dl);
else
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
}
@@ -4708,7 +5310,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return LD;
// For SSE 4.1, use insertps to put the high elements into the low element.
- if (getSubtarget()->hasSSE41()) {
+ if (getSubtarget()->hasSSE41() || getSubtarget()->hasAVX()) {
SDValue Result;
if (Op.getOperand(0).getOpcode() != ISD::UNDEF)
Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
@@ -4758,13 +5360,12 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
-SDValue
-X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
- // We support concatenate two MMX registers and place them in a MMX
- // register. This is better than doing a stack convert.
+// LowerMMXCONCAT_VECTORS - We support concatenate two MMX registers and place
+// them in a MMX register. This is better than doing a stack convert.
+static SDValue LowerMMXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
EVT ResVT = Op.getValueType();
- assert(Op.getNumOperands() == 2);
+
assert(ResVT == MVT::v2i64 || ResVT == MVT::v4i32 ||
ResVT == MVT::v8i16 || ResVT == MVT::v16i8);
int Mask[2];
@@ -4785,6 +5386,42 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp);
}
+// LowerAVXCONCAT_VECTORS - 256-bit AVX can use the vinsertf128 instruction
+// to create 256-bit vectors from two other 128-bit ones.
+static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ EVT ResVT = Op.getValueType();
+
+ assert(ResVT.getSizeInBits() == 256 && "Value type must be 256-bit wide");
+
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ unsigned NumElems = ResVT.getVectorNumElements();
+
+ SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, ResVT), V1,
+ DAG.getConstant(0, MVT::i32), DAG, dl);
+ return Insert128BitVector(V, V2, DAG.getConstant(NumElems/2, MVT::i32),
+ DAG, dl);
+}
+
+SDValue
+X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
+ EVT ResVT = Op.getValueType();
+
+ assert(Op.getNumOperands() == 2);
+ assert((ResVT.getSizeInBits() == 128 || ResVT.getSizeInBits() == 256) &&
+ "Unsupported CONCAT_VECTORS for value type");
+
+ // We support concatenate two MMX registers and place them in a MMX register.
+ // This is better than doing a stack convert.
+ if (ResVT.is128BitVector())
+ return LowerMMXCONCAT_VECTORS(Op, DAG);
+
+ // 256-bit AVX can use the vinsertf128 instruction to create 256-bit vectors
+ // from two other 128-bit ones.
+ return LowerAVXCONCAT_VECTORS(Op, DAG);
+}
+
// v8i16 shuffles - Prefer shuffles in the following order:
// 1. [all] pshuflw, pshufhw, optional move
// 2. [ssse3] 1 x pshufb
@@ -4844,7 +5481,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
// quads, disable the next transformation since it does not help SSSE3.
bool V1Used = InputQuads[0] || InputQuads[1];
bool V2Used = InputQuads[2] || InputQuads[3];
- if (Subtarget->hasSSSE3()) {
+ if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) {
if (InputQuads.count() == 2 && V1Used && V2Used) {
BestLoQuad = InputQuads.find_first();
BestHiQuad = InputQuads.find_next(BestLoQuad);
@@ -4917,7 +5554,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
// If we have SSSE3, and all words of the result are from 1 input vector,
// case 2 is generated, otherwise case 3 is generated. If no SSSE3
// is present, fall back to case 4.
- if (Subtarget->hasSSSE3()) {
+ if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) {
SmallVector<SDValue,16> pshufbMask;
// If we have elements from both input vectors, set the high bit of the
@@ -4985,7 +5622,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
&MaskV[0]);
- if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3())
+ if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ (Subtarget->hasSSSE3() || Subtarget->hasAVX()))
NewV = getTargetShuffleNode(X86ISD::PSHUFLW, dl, MVT::v8i16,
NewV.getOperand(0),
X86::getShufflePSHUFLWImmediate(NewV.getNode()),
@@ -5013,7 +5651,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
&MaskV[0]);
- if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3())
+ if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ (Subtarget->hasSSSE3() || Subtarget->hasAVX()))
NewV = getTargetShuffleNode(X86ISD::PSHUFHW, dl, MVT::v8i16,
NewV.getOperand(0),
X86::getShufflePSHUFHWImmediate(NewV.getNode()),
@@ -5079,7 +5718,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
}
// If SSSE3, use 1 pshufb instruction per vector with elements in the result.
- if (TLI.getSubtarget()->hasSSSE3()) {
+ if (TLI.getSubtarget()->hasSSSE3() || TLI.getSubtarget()->hasAVX()) {
SmallVector<SDValue,16> pshufbMask;
// If all result elements are from one input vector, then only translate
@@ -5276,15 +5915,109 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT,
OpVT, SrcOp)));
}
-/// LowerVECTOR_SHUFFLE_4wide - Handle all 4 wide cases with a number of
-/// shuffles.
+/// areShuffleHalvesWithinDisjointLanes - Check whether each half of a vector
+/// shuffle node referes to only one lane in the sources.
+static bool areShuffleHalvesWithinDisjointLanes(ShuffleVectorSDNode *SVOp) {
+ EVT VT = SVOp->getValueType(0);
+ int NumElems = VT.getVectorNumElements();
+ int HalfSize = NumElems/2;
+ SmallVector<int, 16> M;
+ SVOp->getMask(M);
+ bool MatchA = false, MatchB = false;
+
+ for (int l = 0; l < NumElems*2; l += HalfSize) {
+ if (isUndefOrInRange(M, 0, HalfSize, l, l+HalfSize)) {
+ MatchA = true;
+ break;
+ }
+ }
+
+ for (int l = 0; l < NumElems*2; l += HalfSize) {
+ if (isUndefOrInRange(M, HalfSize, HalfSize, l, l+HalfSize)) {
+ MatchB = true;
+ break;
+ }
+ }
+
+ return MatchA && MatchB;
+}
+
+/// LowerVECTOR_SHUFFLE_256 - Handle all 256-bit wide vectors shuffles
+/// which could not be matched by any known target speficic shuffle
+static SDValue
+LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
+ if (areShuffleHalvesWithinDisjointLanes(SVOp)) {
+ // If each half of a vector shuffle node referes to only one lane in the
+ // source vectors, extract each used 128-bit lane and shuffle them using
+ // 128-bit shuffles. Then, concatenate the results. Otherwise leave
+ // the work to the legalizer.
+ DebugLoc dl = SVOp->getDebugLoc();
+ EVT VT = SVOp->getValueType(0);
+ int NumElems = VT.getVectorNumElements();
+ int HalfSize = NumElems/2;
+
+ // Extract the reference for each half
+ int FstVecExtractIdx = 0, SndVecExtractIdx = 0;
+ int FstVecOpNum = 0, SndVecOpNum = 0;
+ for (int i = 0; i < HalfSize; ++i) {
+ int Elt = SVOp->getMaskElt(i);
+ if (SVOp->getMaskElt(i) < 0)
+ continue;
+ FstVecOpNum = Elt/NumElems;
+ FstVecExtractIdx = Elt % NumElems < HalfSize ? 0 : HalfSize;
+ break;
+ }
+ for (int i = HalfSize; i < NumElems; ++i) {
+ int Elt = SVOp->getMaskElt(i);
+ if (SVOp->getMaskElt(i) < 0)
+ continue;
+ SndVecOpNum = Elt/NumElems;
+ SndVecExtractIdx = Elt % NumElems < HalfSize ? 0 : HalfSize;
+ break;
+ }
+
+ // Extract the subvectors
+ SDValue V1 = Extract128BitVector(SVOp->getOperand(FstVecOpNum),
+ DAG.getConstant(FstVecExtractIdx, MVT::i32), DAG, dl);
+ SDValue V2 = Extract128BitVector(SVOp->getOperand(SndVecOpNum),
+ DAG.getConstant(SndVecExtractIdx, MVT::i32), DAG, dl);
+
+ // Generate 128-bit shuffles
+ SmallVector<int, 16> MaskV1, MaskV2;
+ for (int i = 0; i < HalfSize; ++i) {
+ int Elt = SVOp->getMaskElt(i);
+ MaskV1.push_back(Elt < 0 ? Elt : Elt % HalfSize);
+ }
+ for (int i = HalfSize; i < NumElems; ++i) {
+ int Elt = SVOp->getMaskElt(i);
+ MaskV2.push_back(Elt < 0 ? Elt : Elt % HalfSize);
+ }
+
+ EVT NVT = V1.getValueType();
+ V1 = DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &MaskV1[0]);
+ V2 = DAG.getVectorShuffle(NVT, dl, V2, DAG.getUNDEF(NVT), &MaskV2[0]);
+
+ // Concatenate the result back
+ SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), V1,
+ DAG.getConstant(0, MVT::i32), DAG, dl);
+ return Insert128BitVector(V, V2, DAG.getConstant(NumElems/2, MVT::i32),
+ DAG, dl);
+ }
+
+ return SDValue();
+}
+
+/// LowerVECTOR_SHUFFLE_128v4 - Handle all 128-bit wide vectors with
+/// 4 elements, and match them with several different shuffle types.
static SDValue
-LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
+LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
DebugLoc dl = SVOp->getDebugLoc();
EVT VT = SVOp->getValueType(0);
+ assert(VT.getSizeInBits() == 128 && "Unsupported vector size");
+
SmallVector<std::pair<int, int>, 8> Locs;
Locs.resize(4);
SmallVector<int, 8> Mask1(4U, -1);
@@ -5542,18 +6275,21 @@ SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) {
static
SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG,
- bool HasSSE2) {
+ bool HasXMMInt) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
EVT VT = Op.getValueType();
assert(VT != MVT::v2i64 && "unsupported shuffle type");
- if (HasSSE2 && VT == MVT::v2f64)
+ if (HasXMMInt && VT == MVT::v2f64)
return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG);
- // v4f32 or v4i32
- return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V2, DAG);
+ // v4f32 or v4i32: canonizalized to v4f32 (which is legal for SSE1)
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ getTargetShuffleNode(X86ISD::MOVLHPS, dl, MVT::v4f32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V1),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V2), DAG));
}
static
@@ -5572,8 +6308,24 @@ SDValue getMOVHighToLow(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG) {
return getTargetShuffleNode(X86ISD::MOVHLPS, dl, VT, V1, V2, DAG);
}
+static inline unsigned getSHUFPOpcode(EVT VT) {
+ switch(VT.getSimpleVT().SimpleTy) {
+ case MVT::v8i32: // Use fp unit for int unpack.
+ case MVT::v8f32:
+ case MVT::v4i32: // Use fp unit for int unpack.
+ case MVT::v4f32: return X86ISD::SHUFPS;
+ case MVT::v4i64: // Use fp unit for int unpack.
+ case MVT::v4f64:
+ case MVT::v2i64: // Use fp unit for int unpack.
+ case MVT::v2f64: return X86ISD::SHUFPD;
+ default:
+ llvm_unreachable("Unknown type for shufp*");
+ }
+ return 0;
+}
+
static
-SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
+SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
EVT VT = Op.getValueType();
@@ -5602,7 +6354,7 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
CanFoldLoad = false;
if (CanFoldLoad) {
- if (HasSSE2 && NumElems == 2)
+ if (HasXMMInt && NumElems == 2)
return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG);
if (NumElems == 4)
@@ -5616,28 +6368,30 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
// this is horrible, but will stay like this until we move all shuffle
// matching to x86 specific nodes. Note that for the 1st condition all
// types are matched with movsd.
- if ((HasSSE2 && NumElems == 2) || !X86::isMOVLMask(SVOp))
- return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
- else if (HasSSE2)
+ if (HasXMMInt) {
+ // FIXME: isMOVLMask should be checked and matched before getMOVLP,
+ // as to remove this logic from here, as much as possible
+ if (NumElems == 2 || !X86::isMOVLMask(SVOp))
+ return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
-
+ }
assert(VT != MVT::v4i32 && "unsupported shuffle type");
// Invert the operand order and use SHUFPS to match it.
- return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V2, V1,
+ return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V2, V1,
X86::getShuffleSHUFImmediate(SVOp), DAG);
}
-static inline unsigned getUNPCKLOpcode(EVT VT, const X86Subtarget *Subtarget) {
+static inline unsigned getUNPCKLOpcode(EVT VT) {
switch(VT.getSimpleVT().SimpleTy) {
case MVT::v4i32: return X86ISD::PUNPCKLDQ;
case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
- case MVT::v4f32:
- return Subtarget->hasAVX() ? X86ISD::VUNPCKLPS : X86ISD::UNPCKLPS;
- case MVT::v2f64:
- return Subtarget->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD;
+ case MVT::v4f32: return X86ISD::UNPCKLPS;
+ case MVT::v2f64: return X86ISD::UNPCKLPD;
+ case MVT::v8i32: // Use fp unit for int unpack.
case MVT::v8f32: return X86ISD::VUNPCKLPSY;
+ case MVT::v4i64: // Use fp unit for int unpack.
case MVT::v4f64: return X86ISD::VUNPCKLPDY;
case MVT::v16i8: return X86ISD::PUNPCKLBW;
case MVT::v8i16: return X86ISD::PUNPCKLWD;
@@ -5653,6 +6407,10 @@ static inline unsigned getUNPCKHOpcode(EVT VT) {
case MVT::v2i64: return X86ISD::PUNPCKHQDQ;
case MVT::v4f32: return X86ISD::UNPCKHPS;
case MVT::v2f64: return X86ISD::UNPCKHPD;
+ case MVT::v8i32: // Use fp unit for int unpack.
+ case MVT::v8f32: return X86ISD::VUNPCKHPSY;
+ case MVT::v4i64: // Use fp unit for int unpack.
+ case MVT::v4f64: return X86ISD::VUNPCKHPDY;
case MVT::v16i8: return X86ISD::PUNPCKHBW;
case MVT::v8i16: return X86ISD::PUNPCKHWD;
default:
@@ -5661,6 +6419,68 @@ static inline unsigned getUNPCKHOpcode(EVT VT) {
return 0;
}
+static inline unsigned getVPERMILOpcode(EVT VT) {
+ switch(VT.getSimpleVT().SimpleTy) {
+ case MVT::v4i32:
+ case MVT::v4f32: return X86ISD::VPERMILPS;
+ case MVT::v2i64:
+ case MVT::v2f64: return X86ISD::VPERMILPD;
+ case MVT::v8i32:
+ case MVT::v8f32: return X86ISD::VPERMILPSY;
+ case MVT::v4i64:
+ case MVT::v4f64: return X86ISD::VPERMILPDY;
+ default:
+ llvm_unreachable("Unknown type for vpermil");
+ }
+ return 0;
+}
+
+/// isVectorBroadcast - Check if the node chain is suitable to be xformed to
+/// a vbroadcast node. The nodes are suitable whenever we can fold a load coming
+/// from a 32 or 64 bit scalar. Update Op to the desired load to be folded.
+static bool isVectorBroadcast(SDValue &Op) {
+ EVT VT = Op.getValueType();
+ bool Is256 = VT.getSizeInBits() == 256;
+
+ assert((VT.getSizeInBits() == 128 || Is256) &&
+ "Unsupported type for vbroadcast node");
+
+ SDValue V = Op;
+ if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+
+ if (Is256 && !(V.hasOneUse() &&
+ V.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ V.getOperand(0).getOpcode() == ISD::UNDEF))
+ return false;
+
+ if (Is256)
+ V = V.getOperand(1);
+
+ if (!V.hasOneUse())
+ return false;
+
+ // Check the source scalar_to_vector type. 256-bit broadcasts are
+ // supported for 32/64-bit sizes, while 128-bit ones are only supported
+ // for 32-bit scalars.
+ if (V.getOpcode() != ISD::SCALAR_TO_VECTOR)
+ return false;
+
+ unsigned ScalarSize = V.getOperand(0).getValueType().getSizeInBits();
+ if (ScalarSize != 32 && ScalarSize != 64)
+ return false;
+ if (!Is256 && ScalarSize == 64)
+ return false;
+
+ V = V.getOperand(0);
+ if (!MayFoldLoad(V))
+ return false;
+
+ // Return the load node
+ Op = V;
+ return true;
+}
+
static
SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI,
@@ -5672,23 +6492,29 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
SDValue V2 = Op.getOperand(1);
if (isZeroShuffle(SVOp))
- return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl);
+ return getZeroVector(VT, Subtarget->hasXMMInt(), DAG, dl);
// Handle splat operations
if (SVOp->isSplat()) {
- // Special case, this is the only place now where it's
- // allowed to return a vector_shuffle operation without
- // using a target specific node, because *hopefully* it
- // will be optimized away by the dag combiner.
- if (VT.getVectorNumElements() <= 4 &&
- CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI))
+ unsigned NumElem = VT.getVectorNumElements();
+ int Size = VT.getSizeInBits();
+ // Special case, this is the only place now where it's allowed to return
+ // a vector_shuffle operation without using a target specific node, because
+ // *hopefully* it will be optimized away by the dag combiner. FIXME: should
+ // this be moved to DAGCombine instead?
+ if (NumElem <= 4 && CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI))
return Op;
- // Handle splats by matching through known masks
- if (VT.getVectorNumElements() <= 4)
+ // Use vbroadcast whenever the splat comes from a foldable load
+ if (Subtarget->hasAVX() && isVectorBroadcast(V1))
+ return DAG.getNode(X86ISD::VBROADCAST, dl, VT, V1);
+
+ // Handle splats by matching through known shuffle masks
+ if ((Size == 128 && NumElem <= 4) ||
+ (Size == 256 && NumElem < 8))
return SDValue();
- // Canonicalize all of the remaining to v4f32.
+ // All remaning splats are promoted to target supported vector shuffles.
return PromoteSplat(SVOp, DAG);
}
@@ -5698,7 +6524,8 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
if (NewOp.getNode())
return DAG.getNode(ISD::BITCAST, dl, VT, NewOp);
- } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) {
+ } else if ((VT == MVT::v4i32 ||
+ (VT == MVT::v4f32 && Subtarget->hasXMMInt()))) {
// FIXME: Figure out a cleaner way to do this.
// Try to make use of movq to zero out the top part.
if (ISD::isBuildVectorAllZeros(V2.getNode())) {
@@ -5731,9 +6558,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
bool V1IsSplat = false;
bool V2IsSplat = false;
- bool HasSSE2 = Subtarget->hasSSE2() || Subtarget->hasAVX();
- bool HasSSE3 = Subtarget->hasSSE3() || Subtarget->hasAVX();
- bool HasSSSE3 = Subtarget->hasSSSE3() || Subtarget->hasAVX();
+ bool HasXMMInt = Subtarget->hasXMMInt();
MachineFunction &MF = DAG.getMachineFunction();
bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
@@ -5765,21 +6590,20 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
// unpckh_undef). Only use pshufd if speed is more important than size.
if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp))
- if (VT != MVT::v2i64 && VT != MVT::v2f64)
- return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), dl, VT, V1, V1, DAG);
+ return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp))
- if (VT != MVT::v2i64 && VT != MVT::v2f64)
- return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
- if (X86::isMOVDDUPMask(SVOp) && HasSSE3 && V2IsUndef &&
- RelaxedMayFoldVectorLoad(V1))
+ if (X86::isMOVDDUPMask(SVOp) &&
+ (Subtarget->hasSSE3() || Subtarget->hasAVX()) &&
+ V2IsUndef && RelaxedMayFoldVectorLoad(V1))
return getMOVDDup(Op, dl, V1, DAG);
if (X86::isMOVHLPS_v_undef_Mask(SVOp))
return getMOVHighToLow(Op, dl, DAG);
// Use to match splats
- if (HasSSE2 && X86::isUNPCKHMask(SVOp) && V2IsUndef &&
+ if (HasXMMInt && X86::isUNPCKHMask(SVOp) && V2IsUndef &&
(VT == MVT::v2f64 || VT == MVT::v2i64))
return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
@@ -5792,24 +6616,19 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp);
- if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32))
+ if (HasXMMInt && (VT == MVT::v4f32 || VT == MVT::v4i32))
return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG);
- if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64))
- return getTargetShuffleNode(X86ISD::SHUFPD, dl, VT, V1, V1,
- TargetMask, DAG);
-
- if (VT == MVT::v4f32)
- return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V1, V1,
- TargetMask, DAG);
+ return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V1,
+ TargetMask, DAG);
}
// Check if this can be converted into a logical shift.
bool isLeft = false;
unsigned ShAmt = 0;
SDValue ShVal;
- bool isShift = getSubtarget()->hasSSE2() &&
- isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
+ bool isShift = getSubtarget()->hasXMMInt() &&
+ isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
if (isShift && ShVal.hasOneUse()) {
// If the shifted value has multiple uses, it may be cheaper to use
// v_set0 + movlhps or movhlps, etc.
@@ -5824,7 +6643,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (ISD::isBuildVectorAllZeros(V1.getNode()))
return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl);
if (!X86::isMOVLPMask(SVOp)) {
- if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64))
+ if (HasXMMInt && (VT == MVT::v2i64 || VT == MVT::v2f64))
return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
if (VT == MVT::v4i32 || VT == MVT::v4f32)
@@ -5834,19 +6653,19 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// FIXME: fold these into legal mask.
if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp))
- return getMOVLowToHigh(Op, dl, DAG, HasSSE2);
+ return getMOVLowToHigh(Op, dl, DAG, HasXMMInt);
if (X86::isMOVHLPSMask(SVOp))
return getMOVHighToLow(Op, dl, DAG);
- if (X86::isMOVSHDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
+ if (X86::isMOVSHDUPMask(SVOp, Subtarget))
return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG);
- if (X86::isMOVSLDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
+ if (X86::isMOVSLDUPMask(SVOp, Subtarget))
return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG);
if (X86::isMOVLPMask(SVOp))
- return getMOVLP(Op, dl, DAG, HasSSE2);
+ return getMOVLP(Op, dl, DAG, HasXMMInt);
if (ShouldXformToMOVHLPS(SVOp) ||
ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp))
@@ -5887,8 +6706,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
}
if (X86::isUNPCKLMask(SVOp))
- return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
- dl, VT, V1, V2, DAG);
+ return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG);
if (X86::isUNPCKHMask(SVOp))
return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG);
@@ -5915,8 +6733,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
if (X86::isUNPCKLMask(NewSVOp))
- return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
- dl, VT, V2, V1, DAG);
+ return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG);
if (X86::isUNPCKHMask(NewSVOp))
return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG);
@@ -5932,18 +6749,15 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
SmallVector<int, 16> M;
SVOp->getMask(M);
- if (isPALIGNRMask(M, VT, HasSSSE3))
+ if (isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX()))
return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2,
X86::getShufflePALIGNRImmediate(SVOp),
DAG);
if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
SVOp->getSplatIndex() == 0 && V2IsUndef) {
- if (VT == MVT::v2f64) {
- X86ISD::NodeType Opcode =
- getSubtarget()->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD;
- return getTargetShuffleNode(Opcode, dl, VT, V1, V1, DAG);
- }
+ if (VT == MVT::v2f64)
+ return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG);
if (VT == MVT::v2i64)
return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG);
}
@@ -5958,23 +6772,54 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
X86::getShufflePSHUFLWImmediate(SVOp),
DAG);
- if (isSHUFPMask(M, VT)) {
- unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp);
- if (VT == MVT::v4f32 || VT == MVT::v4i32)
- return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V1, V2,
- TargetMask, DAG);
- if (VT == MVT::v2f64 || VT == MVT::v2i64)
- return getTargetShuffleNode(X86ISD::SHUFPD, dl, VT, V1, V2,
- TargetMask, DAG);
- }
+ if (isSHUFPMask(M, VT))
+ return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
+ X86::getShuffleSHUFImmediate(SVOp), DAG);
if (X86::isUNPCKL_v_undef_Mask(SVOp))
- if (VT != MVT::v2i64 && VT != MVT::v2f64)
- return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
- dl, VT, V1, V1, DAG);
+ return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
if (X86::isUNPCKH_v_undef_Mask(SVOp))
- if (VT != MVT::v2i64 && VT != MVT::v2f64)
- return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+
+ //===--------------------------------------------------------------------===//
+ // Generate target specific nodes for 128 or 256-bit shuffles only
+ // supported in the AVX instruction set.
+ //
+
+ // Handle VMOVDDUPY permutations
+ if (isMOVDDUPYMask(SVOp, Subtarget))
+ return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);
+
+ // Handle VPERMILPS* permutations
+ if (isVPERMILPSMask(M, VT, Subtarget))
+ return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1,
+ getShuffleVPERMILPSImmediate(SVOp), DAG);
+
+ // Handle VPERMILPD* permutations
+ if (isVPERMILPDMask(M, VT, Subtarget))
+ return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1,
+ getShuffleVPERMILPDImmediate(SVOp), DAG);
+
+ // Handle VPERM2F128 permutations
+ if (isVPERM2F128Mask(M, VT, Subtarget))
+ return getTargetShuffleNode(X86ISD::VPERM2F128, dl, VT, V1, V2,
+ getShuffleVPERM2F128Immediate(SVOp), DAG);
+
+ // Handle VSHUFPSY permutations
+ if (isVSHUFPSYMask(M, VT, Subtarget))
+ return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
+ getShuffleVSHUFPSYImmediate(SVOp), DAG);
+
+ // Handle VSHUFPDY permutations
+ if (isVSHUFPDYMask(M, VT, Subtarget))
+ return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
+ getShuffleVSHUFPDYImmediate(SVOp), DAG);
+
+ //===--------------------------------------------------------------------===//
+ // Since no target specific shuffle was selected for this generic one,
+ // lower it into other known shuffles. FIXME: this isn't true yet, but
+ // this is the plan.
+ //
// Handle v8i16 specifically since SSE can do byte extraction and insertion.
if (VT == MVT::v8i16) {
@@ -5989,9 +6834,14 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return NewOp;
}
- // Handle all 4 wide cases with a number of shuffles.
- if (NumElems == 4)
- return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG);
+ // Handle all 128-bit wide vectors with 4 elements, and match them with
+ // several different shuffle types.
+ if (NumElems == 4 && VT.getSizeInBits() == 128)
+ return LowerVECTOR_SHUFFLE_128v4(SVOp, DAG);
+
+ // Handle general 256-bit shuffles
+ if (VT.is256BitVector())
+ return LowerVECTOR_SHUFFLE_256(SVOp, DAG);
return SDValue();
}
@@ -6001,6 +6851,10 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
+
+ if (Op.getOperand(0).getValueType().getSizeInBits() != 128)
+ return SDValue();
+
if (VT.getSizeInBits() == 8) {
SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32,
Op.getOperand(0), Op.getOperand(1));
@@ -6060,36 +6914,26 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SDValue Vec = Op.getOperand(0);
EVT VecVT = Vec.getValueType();
- // If this is a 256-bit vector result, first extract the 128-bit
- // vector and then extract from the 128-bit vector.
- if (VecVT.getSizeInBits() > 128) {
+ // If this is a 256-bit vector result, first extract the 128-bit vector and
+ // then extract the element from the 128-bit vector.
+ if (VecVT.getSizeInBits() == 256) {
DebugLoc dl = Op.getNode()->getDebugLoc();
unsigned NumElems = VecVT.getVectorNumElements();
SDValue Idx = Op.getOperand(1);
-
- if (!isa<ConstantSDNode>(Idx))
- return SDValue();
-
- unsigned ExtractNumElems = NumElems / (VecVT.getSizeInBits() / 128);
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
// Get the 128-bit vector.
- bool Upper = IdxVal >= ExtractNumElems;
- Vec = Extract128BitVector(Vec, Idx, DAG, dl);
-
- // Extract from it.
- SDValue ScaledIdx = Idx;
- if (Upper)
- ScaledIdx = DAG.getNode(ISD::SUB, dl, Idx.getValueType(), Idx,
- DAG.getConstant(ExtractNumElems,
- Idx.getValueType()));
+ bool Upper = IdxVal >= NumElems/2;
+ Vec = Extract128BitVector(Vec,
+ DAG.getConstant(Upper ? NumElems/2 : 0, MVT::i32), DAG, dl);
+
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
- ScaledIdx);
+ Upper ? DAG.getConstant(IdxVal-NumElems/2, MVT::i32) : Idx);
}
assert(Vec.getValueSizeInBits() <= 128 && "Unexpected vector length");
- if (Subtarget->hasSSE41()) {
+ if (Subtarget->hasSSE41() || Subtarget->hasAVX()) {
SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
if (Res.getNode())
return Res;
@@ -6120,7 +6964,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
return Op;
// SHUFPS the element to the lowest double word, then movss.
- int Mask[4] = { Idx, -1, -1, -1 };
+ int Mask[4] = { static_cast<int>(Idx), -1, -1, -1 };
EVT VVT = Op.getOperand(0).getValueType();
SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
DAG.getUNDEF(VVT), Mask);
@@ -6159,6 +7003,9 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op,
SDValue N1 = Op.getOperand(1);
SDValue N2 = Op.getOperand(2);
+ if (VT.getSizeInBits() == 256)
+ return SDValue();
+
if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) &&
isa<ConstantSDNode>(N2)) {
unsigned Opc;
@@ -6206,35 +7053,28 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
SDValue N1 = Op.getOperand(1);
SDValue N2 = Op.getOperand(2);
- // If this is a 256-bit vector result, first insert into a 128-bit
- // vector and then insert into the 256-bit vector.
- if (VT.getSizeInBits() > 128) {
+ // If this is a 256-bit vector result, first extract the 128-bit vector,
+ // insert the element into the extracted half and then place it back.
+ if (VT.getSizeInBits() == 256) {
if (!isa<ConstantSDNode>(N2))
return SDValue();
- // Get the 128-bit vector.
+ // Get the desired 128-bit vector half.
unsigned NumElems = VT.getVectorNumElements();
unsigned IdxVal = cast<ConstantSDNode>(N2)->getZExtValue();
- bool Upper = IdxVal >= NumElems / 2;
-
- SDValue SubN0 = Extract128BitVector(N0, N2, DAG, dl);
+ bool Upper = IdxVal >= NumElems/2;
+ SDValue Ins128Idx = DAG.getConstant(Upper ? NumElems/2 : 0, MVT::i32);
+ SDValue V = Extract128BitVector(N0, Ins128Idx, DAG, dl);
- // Insert into it.
- SDValue ScaledN2 = N2;
- if (Upper)
- ScaledN2 = DAG.getNode(ISD::SUB, dl, N2.getValueType(), N2,
- DAG.getConstant(NumElems /
- (VT.getSizeInBits() / 128),
- N2.getValueType()));
- Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubN0.getValueType(), SubN0,
- N1, ScaledN2);
+ // Insert the element into the desired half.
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V,
+ N1, Upper ? DAG.getConstant(IdxVal-NumElems/2, MVT::i32) : N2);
- // Insert the 128-bit vector
- // FIXME: Why UNDEF?
- return Insert128BitVector(N0, Op, N2, DAG, dl);
+ // Insert the changed part back to the 256-bit vector
+ return Insert128BitVector(N0, V, Ins128Idx, DAG, dl);
}
- if (Subtarget->hasSSE41())
+ if (Subtarget->hasSSE41() || Subtarget->hasAVX())
return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
if (EltVT == MVT::i8)
@@ -6405,12 +7245,17 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
CodeModel::Model M = getTargetMachine().getCodeModel();
if (Subtarget->isPICStyleRIPRel() &&
- (M == CodeModel::Small || M == CodeModel::Kernel))
+ (M == CodeModel::Small || M == CodeModel::Kernel)) {
+ if (Subtarget->isTargetDarwin() || Subtarget->isTargetELF())
+ OpFlag = X86II::MO_GOTPCREL;
WrapperKind = X86ISD::WrapperRIP;
- else if (Subtarget->isPICStyleGOT())
- OpFlag = X86II::MO_GOTOFF;
- else if (Subtarget->isPICStyleStubPIC())
- OpFlag = X86II::MO_PIC_BASE_OFFSET;
+ } else if (Subtarget->isPICStyleGOT()) {
+ OpFlag = X86II::MO_GOT;
+ } else if (Subtarget->isPICStyleStubPIC()) {
+ OpFlag = X86II::MO_DARWIN_NONLAZY_PIC_BASE;
+ } else if (Subtarget->isPICStyleStubNoDynamic()) {
+ OpFlag = X86II::MO_DARWIN_NONLAZY;
+ }
SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlag);
@@ -6427,6 +7272,12 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
Result);
}
+ // For symbols that require a load from a stub to get the address, emit the
+ // load.
+ if (isGlobalStubReference(OpFlag))
+ Result = DAG.getLoad(getPointerTy(), DL, DAG.getEntryNode(), Result,
+ MachinePointerInfo::getGOT(), false, false, 0);
+
return Result;
}
@@ -6676,7 +7527,8 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
// And our return value (tls address) is in the standard call return value
// location.
unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
- return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
+ return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(),
+ Chain.getValue(1));
}
assert(false &&
@@ -6922,9 +7774,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
// Load the 32-bit value into an XMM register.
SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
- DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Op.getOperand(0),
- DAG.getIntPtrConstant(0)));
+ Op.getOperand(0));
+
+ // Zero out the upper parts of the register.
+ Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget->hasXMMInt(),
+ DAG);
Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load),
@@ -7513,6 +8367,9 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
}
SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+
+ if (Op.getValueType().isVector()) return LowerVSETCC(Op, DAG);
+
assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
@@ -7563,6 +8420,39 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
DAG.getConstant(X86CC, MVT::i8), EFLAGS);
}
+// Lower256IntVSETCC - Break a VSETCC 256-bit integer VSETCC into two new 128
+// ones, and then concatenate the result back.
+static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+
+ assert(VT.getSizeInBits() == 256 && Op.getOpcode() == ISD::SETCC &&
+ "Unsupported value type for operation");
+
+ int NumElems = VT.getVectorNumElements();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue CC = Op.getOperand(2);
+ SDValue Idx0 = DAG.getConstant(0, MVT::i32);
+ SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32);
+
+ // Extract the LHS vectors
+ SDValue LHS = Op.getOperand(0);
+ SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl);
+ SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl);
+
+ // Extract the RHS vectors
+ SDValue RHS = Op.getOperand(1);
+ SDValue RHS1 = Extract128BitVector(RHS, Idx0, DAG, dl);
+ SDValue RHS2 = Extract128BitVector(RHS, Idx1, DAG, dl);
+
+ // Issue the operation on the smaller types and concatenate the result back
+ MVT EltVT = VT.getVectorElementType().getSimpleVT();
+ EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+ DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1, CC),
+ DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC));
+}
+
+
SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
SDValue Cond;
SDValue Op0 = Op.getOperand(0);
@@ -7575,11 +8465,21 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
if (isFP) {
unsigned SSECC = 8;
- EVT VT0 = Op0.getValueType();
- assert(VT0 == MVT::v4f32 || VT0 == MVT::v2f64);
- unsigned Opc = VT0 == MVT::v4f32 ? X86ISD::CMPPS : X86ISD::CMPPD;
+ EVT EltVT = Op0.getValueType().getVectorElementType();
+ assert(EltVT == MVT::f32 || EltVT == MVT::f64);
+
+ unsigned Opc = EltVT == MVT::f32 ? X86ISD::CMPPS : X86ISD::CMPPD;
bool Swap = false;
+ // SSE Condition code mapping:
+ // 0 - EQ
+ // 1 - LT
+ // 2 - LE
+ // 3 - UNORD
+ // 4 - NEQ
+ // 5 - NLT
+ // 6 - NLE
+ // 7 - ORD
switch (SetCCOpcode) {
default: break;
case ISD::SETOEQ:
@@ -7624,6 +8524,10 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8));
}
+ // Break 256-bit integer vector compare into smaller ones.
+ if (!isFP && VT.getSizeInBits() == 256)
+ return Lower256IntVSETCC(Op, DAG);
+
// We are handling one of the integer comparisons here. Since SSE only has
// GT and EQ comparisons for integer, swapping operands and multiple
// operations may be required for some comparisons.
@@ -7654,6 +8558,13 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
if (Swap)
std::swap(Op0, Op1);
+ // Check that the operation in question is available (most are plain SSE2,
+ // but PCMPGTQ and PCMPEQQ have different requirements).
+ if (Opc == X86ISD::PCMPGTQ && !Subtarget->hasSSE42() && !Subtarget->hasAVX())
+ return SDValue();
+ if (Opc == X86ISD::PCMPEQQ && !Subtarget->hasSSE41() && !Subtarget->hasAVX())
+ return SDValue();
+
// Since SSE has no unsigned integer comparisons, we need to flip the sign
// bits of the inputs before performing those operations.
if (FlipSigns) {
@@ -8014,9 +8925,11 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue
X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
- assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) &&
- "This should be used only on Windows targets");
- assert(!Subtarget->isTargetEnvMacho());
+ assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows() ||
+ EnableSegmentedStacks) &&
+ "This should be used only on Windows targets or when segmented stacks "
+ "are being used");
+ assert(!Subtarget->isTargetEnvMacho() && "Not implemented");
DebugLoc dl = Op.getDebugLoc();
// Get the inputs.
@@ -8024,23 +8937,49 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDValue Size = Op.getOperand(1);
// FIXME: Ensure alignment here
- SDValue Flag;
+ bool Is64Bit = Subtarget->is64Bit();
+ EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32;
- EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
- unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX);
+ if (EnableSegmentedStacks) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
- Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag);
- Flag = Chain.getValue(1);
+ if (Is64Bit) {
+ // The 64 bit implementation of segmented stacks needs to clobber both r10
+ // r11. This makes it impossible to use it along with nested parameters.
+ const Function *F = MF.getFunction();
+
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; I++)
+ if (I->hasNestAttr())
+ report_fatal_error("Cannot use segmented stacks with functions that "
+ "have nested arguments.");
+ }
+
+ const TargetRegisterClass *AddrRegClass =
+ getRegClassFor(Subtarget->is64Bit() ? MVT::i64:MVT::i32);
+ unsigned Vreg = MRI.createVirtualRegister(AddrRegClass);
+ Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
+ SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
+ DAG.getRegister(Vreg, SPTy));
+ SDValue Ops1[2] = { Value, Chain };
+ return DAG.getMergeValues(Ops1, 2, dl);
+ } else {
+ SDValue Flag;
+ unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX);
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag);
+ Flag = Chain.getValue(1);
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
- Flag = Chain.getValue(1);
+ Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
+ Flag = Chain.getValue(1);
- Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1);
+ Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1);
- SDValue Ops1[2] = { Chain.getValue(0), Chain };
- return DAG.getMergeValues(Ops1, 2, dl);
+ SDValue Ops1[2] = { Chain.getValue(0), Chain };
+ return DAG.getMergeValues(Ops1, 2, dl);
+ }
}
SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
@@ -8118,7 +9057,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
EVT ArgVT = Op.getNode()->getValueType(0);
- const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
uint32_t ArgSize = getTargetData()->getTypeAllocSize(ArgTy);
uint8_t ArgMode;
@@ -8292,6 +9231,19 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
DAG.getConstant(X86CC, MVT::i8), Cond);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
+ // Arithmetic intrinsics.
+ case Intrinsic::x86_sse3_hadd_ps:
+ case Intrinsic::x86_sse3_hadd_pd:
+ case Intrinsic::x86_avx_hadd_ps_256:
+ case Intrinsic::x86_avx_hadd_pd_256:
+ return DAG.getNode(X86ISD::FHADD, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_sse3_hsub_ps:
+ case Intrinsic::x86_sse3_hsub_pd:
+ case Intrinsic::x86_avx_hsub_ps_256:
+ case Intrinsic::x86_avx_hsub_pd_256:
+ return DAG.getNode(X86ISD::FHSUB, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
// ptest and testp intrinsics. The intrinsic these come from are designed to
// return an integer value, not just an instruction so lower it to the ptest
// or testp pattern and a setcc for the result.
@@ -8535,8 +9487,13 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
Chain, DAG.getRegister(StoreAddrReg, getPointerTy()));
}
-SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+ return Op.getOperand(0);
+}
+
+SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
SDValue Root = Op.getOperand(0);
SDValue Trmp = Op.getOperand(1); // trampoline
SDValue FPtr = Op.getOperand(2); // nested function
@@ -8552,8 +9509,8 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
const unsigned char JMP64r = 0xFF; // 64-bit jmp through register opcode.
const unsigned char MOV64ri = 0xB8; // X86::MOV64ri opcode.
- const unsigned char N86R10 = RegInfo->getX86RegNum(X86::R10);
- const unsigned char N86R11 = RegInfo->getX86RegNum(X86::R11);
+ const unsigned char N86R10 = X86_MC::getX86RegNum(X86::R10);
+ const unsigned char N86R11 = X86_MC::getX86RegNum(X86::R11);
const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix
@@ -8600,9 +9557,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
MachinePointerInfo(TrmpAddr, 22),
false, false, 0);
- SDValue Ops[] =
- { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6) };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6);
} else {
const Function *Func =
cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
@@ -8619,7 +9574,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
NestReg = X86::ECX;
// Check that ECX wasn't needed by an 'inreg' parameter.
- const FunctionType *FTy = Func->getFunctionType();
+ FunctionType *FTy = Func->getFunctionType();
const AttrListPtr &Attrs = Func->getAttributes();
if (!Attrs.isEmpty() && !Func->isVarArg()) {
@@ -8657,7 +9612,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
// This is storing the opcode for MOV32ri.
const unsigned char MOV32ri = 0xB8; // X86::MOV32ri's opcode byte.
- const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg);
+ const unsigned char N86Reg = X86_MC::getX86RegNum(NestReg);
OutChains[0] = DAG.getStore(Root, dl,
DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
Trmp, MachinePointerInfo(TrmpAddr),
@@ -8682,9 +9637,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
MachinePointerInfo(TrmpAddr, 6),
false, false, 1);
- SDValue Ops[] =
- { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4) };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4);
}
}
@@ -8822,8 +9775,58 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
return Op;
}
-SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const {
+// Lower256IntArith - Break a 256-bit integer operation into two new 128-bit
+// ones, and then concatenate the result back.
+static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+
+ assert(VT.getSizeInBits() == 256 && VT.isInteger() &&
+ "Unsupported value type for operation");
+
+ int NumElems = VT.getVectorNumElements();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Idx0 = DAG.getConstant(0, MVT::i32);
+ SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32);
+
+ // Extract the LHS vectors
+ SDValue LHS = Op.getOperand(0);
+ SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl);
+ SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl);
+
+ // Extract the RHS vectors
+ SDValue RHS = Op.getOperand(1);
+ SDValue RHS1 = Extract128BitVector(RHS, Idx0, DAG, dl);
+ SDValue RHS2 = Extract128BitVector(RHS, Idx1, DAG, dl);
+
+ MVT EltVT = VT.getVectorElementType().getSimpleVT();
+ EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+ DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1),
+ DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2));
+}
+
+SDValue X86TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType().getSizeInBits() == 256 &&
+ Op.getValueType().isInteger() &&
+ "Only handle AVX 256-bit vector integer operation");
+ return Lower256IntArith(Op, DAG);
+}
+
+SDValue X86TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType().getSizeInBits() == 256 &&
+ Op.getValueType().isInteger() &&
+ "Only handle AVX 256-bit vector integer operation");
+ return Lower256IntArith(Op, DAG);
+}
+
+SDValue X86TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
+
+ // Decompose 256-bit ops into smaller 128-bit ops.
+ if (VT.getSizeInBits() == 256)
+ return Lower256IntArith(Op, DAG);
+
assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply");
DebugLoc dl = Op.getDebugLoc();
@@ -8872,11 +9875,51 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
-
LLVMContext *Context = DAG.getContext();
- // Must have SSE2.
- if (!Subtarget->hasSSE2()) return SDValue();
+ if (!Subtarget->hasXMMInt())
+ return SDValue();
+
+ // Decompose 256-bit shifts into smaller 128-bit shifts.
+ if (VT.getSizeInBits() == 256) {
+ int NumElems = VT.getVectorNumElements();
+ MVT EltVT = VT.getVectorElementType().getSimpleVT();
+ EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+
+ // Extract the two vectors
+ SDValue V1 = Extract128BitVector(R, DAG.getConstant(0, MVT::i32), DAG, dl);
+ SDValue V2 = Extract128BitVector(R, DAG.getConstant(NumElems/2, MVT::i32),
+ DAG, dl);
+
+ // Recreate the shift amount vectors
+ SDValue Amt1, Amt2;
+ if (Amt.getOpcode() == ISD::BUILD_VECTOR) {
+ // Constant shift amount
+ SmallVector<SDValue, 4> Amt1Csts;
+ SmallVector<SDValue, 4> Amt2Csts;
+ for (int i = 0; i < NumElems/2; ++i)
+ Amt1Csts.push_back(Amt->getOperand(i));
+ for (int i = NumElems/2; i < NumElems; ++i)
+ Amt2Csts.push_back(Amt->getOperand(i));
+
+ Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT,
+ &Amt1Csts[0], NumElems/2);
+ Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT,
+ &Amt2Csts[0], NumElems/2);
+ } else {
+ // Variable shift amount
+ Amt1 = Extract128BitVector(Amt, DAG.getConstant(0, MVT::i32), DAG, dl);
+ Amt2 = Extract128BitVector(Amt, DAG.getConstant(NumElems/2, MVT::i32),
+ DAG, dl);
+ }
+
+ // Issue new vector shifts for the smaller types
+ V1 = DAG.getNode(Op.getOpcode(), dl, NewVT, V1, Amt1);
+ V2 = DAG.getNode(Op.getOpcode(), dl, NewVT, V2, Amt2);
+
+ // Concatenate the result back
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, V1, V2);
+ }
// Optimize shl/srl/sra with constant shift amount.
if (isSplatVector(Amt.getNode())) {
@@ -8927,9 +9970,6 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
}
// Lower SHL with variable shift amount.
- // Cannot lower SHL without SSE2 or later.
- if (!Subtarget->hasSSE2()) return SDValue();
-
if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
@@ -8971,7 +10011,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
DAG.getConstant(4, MVT::i32));
- R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op);
+ R = DAG.getNode(ISD::VSELECT, dl, VT, Op, R, M);
// a += a
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
@@ -8986,13 +10026,13 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
DAG.getConstant(2, MVT::i32));
- R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op);
+ R = DAG.getNode(ISD::VSELECT, dl, VT, Op, R, M);
// a += a
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
// return pblendv(r, r+r, a);
- R = DAG.getNode(X86ISD::PBLENDVB, dl, VT,
- R, DAG.getNode(ISD::ADD, dl, VT, R, R), Op);
+ R = DAG.getNode(ISD::VSELECT, dl, VT, Op,
+ R, DAG.getNode(ISD::ADD, dl, VT, R, R));
return R;
}
return SDValue();
@@ -9057,8 +10097,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
DAG.getConstant(X86::COND_O, MVT::i32),
SDValue(Sum.getNode(), 2));
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
- return Sum;
+ return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
}
}
@@ -9071,8 +10110,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
DAG.getConstant(Cond, MVT::i32),
SDValue(Sum.getNode(), 1));
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
- return Sum;
+ return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
}
SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const{
@@ -9080,8 +10118,7 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG)
SDNode* Node = Op.getNode();
EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
EVT VT = Node->getValueType(0);
-
- if (Subtarget->hasSSE2() && VT.isVector()) {
+ if (Subtarget->hasXMMInt() && VT.isVector()) {
unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
ExtraVT.getScalarType().getSizeInBits();
SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32);
@@ -9091,11 +10128,6 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG)
switch (VT.getSimpleVT().SimpleTy) {
default:
return SDValue();
- case MVT::v2i64: {
- SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_q;
- SRAIntrinsicsID = 0;
- break;
- }
case MVT::v4i32: {
SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_d;
SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_d;
@@ -9115,12 +10147,9 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG)
// In case of 1 bit sext, no need to shr
if (ExtraVT.getScalarType().getSizeInBits() == 1) return Tmp1;
- if (SRAIntrinsicsID) {
- Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(SRAIntrinsicsID, MVT::i32),
- Tmp1, ShAmt);
- }
- return Tmp1;
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(SRAIntrinsicsID, MVT::i32),
+ Tmp1, ShAmt);
}
return SDValue();
@@ -9132,7 +10161,7 @@ SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
// Go ahead and emit the fence on x86-64 even if we asked for no-sse2.
// There isn't any reason to disable it if the target processor supports it.
- if (!Subtarget->hasSSE2() && !Subtarget->is64Bit()) {
+ if (!Subtarget->hasXMMInt() && !Subtarget->is64Bit()) {
SDValue Chain = Op.getOperand(0);
SDValue Zero = DAG.getConstant(0, MVT::i32);
SDValue Ops[] = {
@@ -9172,6 +10201,45 @@ SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
}
+SDValue X86TargetLowering::LowerATOMIC_FENCE(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
+ cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
+ SynchronizationScope FenceScope = static_cast<SynchronizationScope>(
+ cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
+
+ // The only fence that needs an instruction is a sequentially-consistent
+ // cross-thread fence.
+ if (FenceOrdering == SequentiallyConsistent && FenceScope == CrossThread) {
+ // Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
+ // no-sse2). There isn't any reason to disable it if the target processor
+ // supports it.
+ if (Subtarget->hasXMMInt() || Subtarget->is64Bit())
+ return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
+
+ SDValue Chain = Op.getOperand(0);
+ SDValue Zero = DAG.getConstant(0, MVT::i32);
+ SDValue Ops[] = {
+ DAG.getRegister(X86::ESP, MVT::i32), // Base
+ DAG.getTargetConstant(1, MVT::i8), // Scale
+ DAG.getRegister(0, MVT::i32), // Index
+ DAG.getTargetConstant(0, MVT::i32), // Disp
+ DAG.getRegister(0, MVT::i32), // Segment.
+ Zero,
+ Chain
+ };
+ SDNode *Res =
+ DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops,
+ array_lengthof(Ops));
+ return SDValue(Res, 0);
+ }
+
+ // MEMBARRIER is a compiler barrier; it codegens to a no-op.
+ return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
+}
+
+
SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
EVT T = Op.getValueType();
DebugLoc DL = Op.getDebugLoc();
@@ -9227,7 +10295,7 @@ SDValue X86TargetLowering::LowerBITCAST(SDValue Op,
SelectionDAG &DAG) const {
EVT SrcVT = Op.getOperand(0).getValueType();
EVT DstVT = Op.getValueType();
- assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
+ assert(Subtarget->is64Bit() && !Subtarget->hasXMMInt() &&
Subtarget->hasMMX() && "Unexpected custom BITCAST");
assert((DstVT == MVT::i64 ||
(DstVT.isVector() && DstVT.getSizeInBits()==64)) &&
@@ -9255,7 +10323,34 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const {
Node->getOperand(0),
Node->getOperand(1), negOp,
cast<AtomicSDNode>(Node)->getSrcValue(),
- cast<AtomicSDNode>(Node)->getAlignment());
+ cast<AtomicSDNode>(Node)->getAlignment(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getSynchScope());
+}
+
+static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) {
+ SDNode *Node = Op.getNode();
+ DebugLoc dl = Node->getDebugLoc();
+ EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT();
+
+ // Convert seq_cst store -> xchg
+ // Convert wide store -> swap (-> cmpxchg8b/cmpxchg16b)
+ // FIXME: On 32-bit, store -> fist or movq would be more efficient
+ // (The only way to get a 16-byte store is cmpxchg16b)
+ // FIXME: 16-byte ATOMIC_SWAP isn't actually hooked up at the moment.
+ if (cast<AtomicSDNode>(Node)->getOrdering() == SequentiallyConsistent ||
+ !DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
+ cast<AtomicSDNode>(Node)->getMemoryVT(),
+ Node->getOperand(0),
+ Node->getOperand(1), Node->getOperand(2),
+ cast<AtomicSDNode>(Node)->getMemOperand(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getSynchScope());
+ return Swap.getValue(1);
+ }
+ // Other atomic stores have a simple pattern.
+ return Op;
}
static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
@@ -9291,8 +10386,10 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
default: llvm_unreachable("Should not custom lower this!");
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op,DAG);
case ISD::MEMBARRIER: return LowerMEMBARRIER(Op,DAG);
+ case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op,DAG);
case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG);
case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
+ case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op,DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
@@ -9318,7 +10415,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
- case ISD::VSETCC: return LowerVSETCC(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
@@ -9332,11 +10428,12 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerFRAME_TO_ARGS_OFFSET(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
- case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG);
+ case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
+ case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
case ISD::CTLZ: return LowerCTLZ(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op, DAG);
- case ISD::MUL: return LowerMUL_V2I64(Op, DAG);
+ case ISD::MUL: return LowerMUL(Op, DAG);
case ISD::SRA:
case ISD::SRL:
case ISD::SHL: return LowerShift(Op, DAG);
@@ -9352,15 +10449,38 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADDE:
case ISD::SUBC:
case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
+ case ISD::ADD: return LowerADD(Op, DAG);
+ case ISD::SUB: return LowerSUB(Op, DAG);
}
}
+static void ReplaceATOMIC_LOAD(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) {
+ DebugLoc dl = Node->getDebugLoc();
+ EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT();
+
+ // Convert wide load -> cmpxchg8b/cmpxchg16b
+ // FIXME: On 32-bit, load -> fild or movq would be more efficient
+ // (The only way to get a 16-byte load is cmpxchg16b)
+ // FIXME: 16-byte ATOMIC_CMP_SWAP isn't actually hooked up at the moment.
+ SDValue Zero = DAG.getConstant(0, VT);
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, VT,
+ Node->getOperand(0),
+ Node->getOperand(1), Zero, Zero,
+ cast<AtomicSDNode>(Node)->getMemOperand(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getSynchScope());
+ Results.push_back(Swap.getValue(0));
+ Results.push_back(Swap.getValue(1));
+}
+
void X86TargetLowering::
ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG, unsigned NewOp) const {
- EVT T = Node->getValueType(0);
DebugLoc dl = Node->getDebugLoc();
- assert (T == MVT::i64 && "Only know how to expand i64 atomics");
+ assert (Node->getValueType(0) == MVT::i64 &&
+ "Only know how to expand i64 atomics");
SDValue Chain = Node->getOperand(0);
SDValue In1 = Node->getOperand(1);
@@ -9423,37 +10543,48 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
case ISD::ATOMIC_CMP_SWAP: {
EVT T = N->getValueType(0);
- assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap");
+ assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair");
+ bool Regs64bit = T == MVT::i128;
+ EVT HalfT = Regs64bit ? MVT::i64 : MVT::i32;
SDValue cpInL, cpInH;
- cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(2),
- DAG.getConstant(0, MVT::i32));
- cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(2),
- DAG.getConstant(1, MVT::i32));
- cpInL = DAG.getCopyToReg(N->getOperand(0), dl, X86::EAX, cpInL, SDValue());
- cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl, X86::EDX, cpInH,
- cpInL.getValue(1));
+ cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2),
+ DAG.getConstant(0, HalfT));
+ cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2),
+ DAG.getConstant(1, HalfT));
+ cpInL = DAG.getCopyToReg(N->getOperand(0), dl,
+ Regs64bit ? X86::RAX : X86::EAX,
+ cpInL, SDValue());
+ cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl,
+ Regs64bit ? X86::RDX : X86::EDX,
+ cpInH, cpInL.getValue(1));
SDValue swapInL, swapInH;
- swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(3),
- DAG.getConstant(0, MVT::i32));
- swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(3),
- DAG.getConstant(1, MVT::i32));
- swapInL = DAG.getCopyToReg(cpInH.getValue(0), dl, X86::EBX, swapInL,
- cpInH.getValue(1));
- swapInH = DAG.getCopyToReg(swapInL.getValue(0), dl, X86::ECX, swapInH,
- swapInL.getValue(1));
+ swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3),
+ DAG.getConstant(0, HalfT));
+ swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3),
+ DAG.getConstant(1, HalfT));
+ swapInL = DAG.getCopyToReg(cpInH.getValue(0), dl,
+ Regs64bit ? X86::RBX : X86::EBX,
+ swapInL, cpInH.getValue(1));
+ swapInH = DAG.getCopyToReg(swapInL.getValue(0), dl,
+ Regs64bit ? X86::RCX : X86::ECX,
+ swapInH, swapInL.getValue(1));
SDValue Ops[] = { swapInH.getValue(0),
N->getOperand(1),
swapInH.getValue(1) };
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
- SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG8_DAG, dl, Tys,
+ unsigned Opcode = Regs64bit ? X86ISD::LCMPXCHG16_DAG :
+ X86ISD::LCMPXCHG8_DAG;
+ SDValue Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys,
Ops, 3, T, MMO);
- SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, X86::EAX,
- MVT::i32, Result.getValue(1));
- SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl, X86::EDX,
- MVT::i32, cpOutL.getValue(2));
+ SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl,
+ Regs64bit ? X86::RAX : X86::EAX,
+ HalfT, Result.getValue(1));
+ SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl,
+ Regs64bit ? X86::RDX : X86::EDX,
+ HalfT, cpOutL.getValue(2));
SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)};
- Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF, 2));
Results.push_back(cpOutH.getValue(1));
return;
}
@@ -9478,6 +10609,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::ATOMIC_SWAP:
ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSWAP64_DAG);
return;
+ case ISD::ATOMIC_LOAD:
+ ReplaceATOMIC_LOAD(N, Results, DAG);
}
}
@@ -9527,11 +10660,12 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PSIGNB: return "X86ISD::PSIGNB";
case X86ISD::PSIGNW: return "X86ISD::PSIGNW";
case X86ISD::PSIGND: return "X86ISD::PSIGND";
- case X86ISD::PBLENDVB: return "X86ISD::PBLENDVB";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMIN: return "X86ISD::FMIN";
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
case X86ISD::FRCP: return "X86ISD::FRCP";
+ case X86ISD::FHADD: return "X86ISD::FHADD";
+ case X86ISD::FHSUB: return "X86ISD::FHSUB";
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
case X86ISD::TLSCALL: return "X86ISD::TLSCALL";
case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
@@ -9570,6 +10704,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::OR: return "X86ISD::OR";
case X86ISD::XOR: return "X86ISD::XOR";
case X86ISD::AND: return "X86ISD::AND";
+ case X86ISD::ANDN: return "X86ISD::ANDN";
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
case X86ISD::PTEST: return "X86ISD::PTEST";
case X86ISD::TESTP: return "X86ISD::TESTP";
@@ -9596,9 +10731,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::MOVSS: return "X86ISD::MOVSS";
case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS";
case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD";
- case X86ISD::VUNPCKLPS: return "X86ISD::VUNPCKLPS";
- case X86ISD::VUNPCKLPD: return "X86ISD::VUNPCKLPD";
- case X86ISD::VUNPCKLPSY: return "X86ISD::VUNPCKLPSY";
case X86ISD::VUNPCKLPDY: return "X86ISD::VUNPCKLPDY";
case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS";
case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD";
@@ -9610,16 +10742,24 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PUNPCKHWD: return "X86ISD::PUNPCKHWD";
case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ";
case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ";
+ case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
+ case X86ISD::VPERMILPS: return "X86ISD::VPERMILPS";
+ case X86ISD::VPERMILPSY: return "X86ISD::VPERMILPSY";
+ case X86ISD::VPERMILPD: return "X86ISD::VPERMILPD";
+ case X86ISD::VPERMILPDY: return "X86ISD::VPERMILPDY";
+ case X86ISD::VPERM2F128: return "X86ISD::VPERM2F128";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
case X86ISD::VAARG_64: return "X86ISD::VAARG_64";
case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA";
+ case X86ISD::MEMBARRIER: return "X86ISD::MEMBARRIER";
+ case X86ISD::SEG_ALLOCA: return "X86ISD::SEG_ALLOCA";
}
}
// isLegalAddressingMode - Return true if the addressing mode represented
// by AM is legal for this target, for a load/store of the specified type.
bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
- const Type *Ty) const {
+ Type *Ty) const {
// X86 supports extremely general addressing modes.
CodeModel::Model M = getTargetMachine().getCodeModel();
Reloc::Model R = getTargetMachine().getRelocationModel();
@@ -9671,7 +10811,7 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
}
-bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const {
+bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
@@ -9691,7 +10831,7 @@ bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
return true;
}
-bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const {
+bool X86TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
// x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget->is64Bit();
}
@@ -9715,7 +10855,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
EVT VT) const {
// Very little shuffling can be done for 64-bit vectors right now.
if (VT.getSizeInBits() == 64)
- return isPALIGNRMask(M, VT, Subtarget->hasSSSE3());
+ return isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX());
// FIXME: pshufb, blends, shifts.
return (VT.getVectorNumElements() == 2 ||
@@ -9725,7 +10865,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
isPSHUFDMask(M, VT) ||
isPSHUFHWMask(M, VT) ||
isPSHUFLWMask(M, VT) ||
- isPALIGNRMask(M, VT, Subtarget->hasSSSE3()) ||
+ isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX()) ||
isUNPCKLMask(M, VT) ||
isUNPCKHMask(M, VT) ||
isUNPCKL_v_undef_Mask(M, VT) ||
@@ -10158,7 +11298,9 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
if (!(Op.isReg() && Op.isImplicit()))
MIB.addOperand(Op);
}
- BuildMI(*BB, MI, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg())
+ BuildMI(*BB, MI, dl,
+ TII->get(Subtarget->hasAVX() ? X86::VMOVAPSrr : X86::MOVAPSrr),
+ MI->getOperand(0).getReg())
.addReg(X86::XMM0);
MI->eraseFromParent();
@@ -10513,6 +11655,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
MBB->addSuccessor(EndMBB);
}
+ unsigned MOVOpc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr;
// In the XMM save block, save all the XMM argument registers.
for (int i = 3, e = MI->getNumOperands(); i != e; ++i) {
int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
@@ -10521,7 +11664,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
MachinePointerInfo::getFixedStack(RegSaveFrameIndex, Offset),
MachineMemOperand::MOStore,
/*Size=*/16, /*Align=*/16);
- BuildMI(XMMSaveMBB, DL, TII->get(X86::MOVAPSmr))
+ BuildMI(XMMSaveMBB, DL, TII->get(MOVOpc))
.addFrameIndex(RegSaveFrameIndex)
.addImm(/*Scale=*/1)
.addReg(/*IndexReg=*/0)
@@ -10565,17 +11708,9 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
// If the EFLAGS register isn't dead in the terminator, then claim that it's
// live into the sink and copy blocks.
- const MachineFunction *MF = BB->getParent();
- const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
- BitVector ReservedRegs = TRI->getReservedRegs(*MF);
-
- for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = MI->getOperand(I);
- if (!MO.isReg() || !MO.isUse() || MO.isKill()) continue;
- unsigned Reg = MO.getReg();
- if (Reg != X86::EFLAGS) continue;
- copy0MBB->addLiveIn(Reg);
- sinkMBB->addLiveIn(Reg);
+ if (!MI->killsRegister(X86::EFLAGS)) {
+ copy0MBB->addLiveIn(X86::EFLAGS);
+ sinkMBB->addLiveIn(X86::EFLAGS);
}
// Transfer the remainder of BB and its successor edges to sinkMBB.
@@ -10611,6 +11746,119 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
}
MachineBasicBlock *
+X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
+ bool Is64Bit) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ MachineFunction *MF = BB->getParent();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+
+ assert(EnableSegmentedStacks);
+
+ unsigned TlsReg = Is64Bit ? X86::FS : X86::GS;
+ unsigned TlsOffset = Is64Bit ? 0x70 : 0x30;
+
+ // BB:
+ // ... [Till the alloca]
+ // If stacklet is not large enough, jump to mallocMBB
+ //
+ // bumpMBB:
+ // Allocate by subtracting from RSP
+ // Jump to continueMBB
+ //
+ // mallocMBB:
+ // Allocate by call to runtime
+ //
+ // continueMBB:
+ // ...
+ // [rest of original BB]
+ //
+
+ MachineBasicBlock *mallocMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *bumpMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *continueMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetRegisterClass *AddrRegClass =
+ getRegClassFor(Is64Bit ? MVT::i64:MVT::i32);
+
+ unsigned mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass),
+ bumpSPPtrVReg = MRI.createVirtualRegister(AddrRegClass),
+ tmpSPVReg = MRI.createVirtualRegister(AddrRegClass),
+ sizeVReg = MI->getOperand(1).getReg(),
+ physSPReg = Is64Bit ? X86::RSP : X86::ESP;
+
+ MachineFunction::iterator MBBIter = BB;
+ ++MBBIter;
+
+ MF->insert(MBBIter, bumpMBB);
+ MF->insert(MBBIter, mallocMBB);
+ MF->insert(MBBIter, continueMBB);
+
+ continueMBB->splice(continueMBB->begin(), BB, llvm::next
+ (MachineBasicBlock::iterator(MI)), BB->end());
+ continueMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Add code to the main basic block to check if the stack limit has been hit,
+ // and if so, jump to mallocMBB otherwise to bumpMBB.
+ BuildMI(BB, DL, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg);
+ BuildMI(BB, DL, TII->get(Is64Bit ? X86::SUB64rr:X86::SUB32rr), tmpSPVReg)
+ .addReg(tmpSPVReg).addReg(sizeVReg);
+ BuildMI(BB, DL, TII->get(Is64Bit ? X86::CMP64mr:X86::CMP32mr))
+ .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg)
+ .addReg(tmpSPVReg);
+ BuildMI(BB, DL, TII->get(X86::JG_4)).addMBB(mallocMBB);
+
+ // bumpMBB simply decreases the stack pointer, since we know the current
+ // stacklet has enough space.
+ BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), physSPReg)
+ .addReg(tmpSPVReg);
+ BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), bumpSPPtrVReg)
+ .addReg(tmpSPVReg);
+ BuildMI(bumpMBB, DL, TII->get(X86::JMP_4)).addMBB(continueMBB);
+
+ // Calls into a routine in libgcc to allocate more space from the heap.
+ if (Is64Bit) {
+ BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI)
+ .addReg(sizeVReg);
+ BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32))
+ .addExternalSymbol("__morestack_allocate_stack_space").addReg(X86::RDI);
+ } else {
+ BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg)
+ .addImm(12);
+ BuildMI(mallocMBB, DL, TII->get(X86::PUSH32r)).addReg(sizeVReg);
+ BuildMI(mallocMBB, DL, TII->get(X86::CALLpcrel32))
+ .addExternalSymbol("__morestack_allocate_stack_space");
+ }
+
+ if (!Is64Bit)
+ BuildMI(mallocMBB, DL, TII->get(X86::ADD32ri), physSPReg).addReg(physSPReg)
+ .addImm(16);
+
+ BuildMI(mallocMBB, DL, TII->get(TargetOpcode::COPY), mallocPtrVReg)
+ .addReg(Is64Bit ? X86::RAX : X86::EAX);
+ BuildMI(mallocMBB, DL, TII->get(X86::JMP_4)).addMBB(continueMBB);
+
+ // Set up the CFG correctly.
+ BB->addSuccessor(bumpMBB);
+ BB->addSuccessor(mallocMBB);
+ mallocMBB->addSuccessor(continueMBB);
+ bumpMBB->addSuccessor(continueMBB);
+
+ // Take care of the PHI nodes.
+ BuildMI(*continueMBB, continueMBB->begin(), DL, TII->get(X86::PHI),
+ MI->getOperand(0).getReg())
+ .addReg(mallocPtrVReg).addMBB(mallocMBB)
+ .addReg(bumpSPPtrVReg).addMBB(bumpMBB);
+
+ // Delete the original pseudo instruction.
+ MI->eraseFromParent();
+
+ // And we're done.
+ return continueMBB;
+}
+
+MachineBasicBlock *
X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
@@ -10718,11 +11966,11 @@ MachineBasicBlock *
X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
switch (MI->getOpcode()) {
- default: assert(false && "Unexpected instr type to insert");
+ default: assert(0 && "Unexpected instr type to insert");
case X86::TAILJMPd64:
case X86::TAILJMPr64:
case X86::TAILJMPm64:
- assert(!"TAILJMP64 would not be touched here.");
+ assert(0 && "TAILJMP64 would not be touched here.");
case X86::TCRETURNdi64:
case X86::TCRETURNri64:
case X86::TCRETURNmi64:
@@ -10745,6 +11993,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return BB;
case X86::WIN_ALLOCA:
return EmitLoweredWinAlloca(MI, BB);
+ case X86::SEG_ALLOCA_32:
+ return EmitLoweredSegAlloca(MI, BB, false);
+ case X86::SEG_ALLOCA_64:
+ return EmitLoweredSegAlloca(MI, BB, true);
case X86::TLSCall_32:
case X86::TLSCall_64:
return EmitLoweredTLSCall(MI, BB);
@@ -10754,6 +12006,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::CMOV_V4F32:
case X86::CMOV_V2F64:
case X86::CMOV_V2I64:
+ case X86::CMOV_V8F32:
+ case X86::CMOV_V4F64:
+ case X86::CMOV_V4I64:
case X86::CMOV_GR16:
case X86::CMOV_GR32:
case X86::CMOV_RFP32:
@@ -11074,6 +12329,33 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(),
Mask.getBitWidth() - 1);
break;
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntId = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned NumLoBits = 0;
+ switch (IntId) {
+ default: break;
+ case Intrinsic::x86_sse_movmsk_ps:
+ case Intrinsic::x86_avx_movmsk_ps_256:
+ case Intrinsic::x86_sse2_movmsk_pd:
+ case Intrinsic::x86_avx_movmsk_pd_256:
+ case Intrinsic::x86_mmx_pmovmskb:
+ case Intrinsic::x86_sse2_pmovmskb_128: {
+ // High bits of movmskp{s|d}, pmovmskb are known zero.
+ switch (IntId) {
+ case Intrinsic::x86_sse_movmsk_ps: NumLoBits = 4; break;
+ case Intrinsic::x86_avx_movmsk_ps_256: NumLoBits = 8; break;
+ case Intrinsic::x86_sse2_movmsk_pd: NumLoBits = 2; break;
+ case Intrinsic::x86_avx_movmsk_pd_256: NumLoBits = 4; break;
+ case Intrinsic::x86_mmx_pmovmskb: NumLoBits = 8; break;
+ case Intrinsic::x86_sse2_pmovmskb_128: NumLoBits = 16; break;
+ }
+ KnownZero = APInt::getHighBitsSet(Mask.getBitWidth(),
+ Mask.getBitWidth() - NumLoBits);
+ break;
+ }
+ }
+ break;
+ }
}
}
@@ -11102,23 +12384,132 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N,
return TargetLowering::isGAPlusOffset(N, GA, Offset);
}
-/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
-/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
-/// if the load addresses are consecutive, non-overlapping, and in the right
-/// order.
+/// isShuffleHigh128VectorInsertLow - Checks whether the shuffle node is the
+/// same as extracting the high 128-bit part of 256-bit vector and then
+/// inserting the result into the low part of a new 256-bit vector
+static bool isShuffleHigh128VectorInsertLow(ShuffleVectorSDNode *SVOp) {
+ EVT VT = SVOp->getValueType(0);
+ int NumElems = VT.getVectorNumElements();
+
+ // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
+ for (int i = 0, j = NumElems/2; i < NumElems/2; ++i, ++j)
+ if (!isUndefOrEqual(SVOp->getMaskElt(i), j) ||
+ SVOp->getMaskElt(j) >= 0)
+ return false;
+
+ return true;
+}
+
+/// isShuffleLow128VectorInsertHigh - Checks whether the shuffle node is the
+/// same as extracting the low 128-bit part of 256-bit vector and then
+/// inserting the result into the high part of a new 256-bit vector
+static bool isShuffleLow128VectorInsertHigh(ShuffleVectorSDNode *SVOp) {
+ EVT VT = SVOp->getValueType(0);
+ int NumElems = VT.getVectorNumElements();
+
+ // vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
+ for (int i = NumElems/2, j = 0; i < NumElems; ++i, ++j)
+ if (!isUndefOrEqual(SVOp->getMaskElt(i), j) ||
+ SVOp->getMaskElt(j) >= 0)
+ return false;
+
+ return true;
+}
+
+/// PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors.
+static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ DebugLoc dl = N->getDebugLoc();
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ EVT VT = SVOp->getValueType(0);
+ int NumElems = VT.getVectorNumElements();
+
+ if (V1.getOpcode() == ISD::CONCAT_VECTORS &&
+ V2.getOpcode() == ISD::CONCAT_VECTORS) {
+ //
+ // 0,0,0,...
+ // |
+ // V UNDEF BUILD_VECTOR UNDEF
+ // \ / \ /
+ // CONCAT_VECTOR CONCAT_VECTOR
+ // \ /
+ // \ /
+ // RESULT: V + zero extended
+ //
+ if (V2.getOperand(0).getOpcode() != ISD::BUILD_VECTOR ||
+ V2.getOperand(1).getOpcode() != ISD::UNDEF ||
+ V1.getOperand(1).getOpcode() != ISD::UNDEF)
+ return SDValue();
+
+ if (!ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()))
+ return SDValue();
+
+ // To match the shuffle mask, the first half of the mask should
+ // be exactly the first vector, and all the rest a splat with the
+ // first element of the second one.
+ for (int i = 0; i < NumElems/2; ++i)
+ if (!isUndefOrEqual(SVOp->getMaskElt(i), i) ||
+ !isUndefOrEqual(SVOp->getMaskElt(i+NumElems/2), NumElems))
+ return SDValue();
+
+ // Emit a zeroed vector and insert the desired subvector on its
+ // first half.
+ SDValue Zeros = getZeroVector(VT, true /* HasXMMInt */, DAG, dl);
+ SDValue InsV = Insert128BitVector(Zeros, V1.getOperand(0),
+ DAG.getConstant(0, MVT::i32), DAG, dl);
+ return DCI.CombineTo(N, InsV);
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Combine some shuffles into subvector extracts and inserts:
+ //
+
+ // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
+ if (isShuffleHigh128VectorInsertLow(SVOp)) {
+ SDValue V = Extract128BitVector(V1, DAG.getConstant(NumElems/2, MVT::i32),
+ DAG, dl);
+ SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT),
+ V, DAG.getConstant(0, MVT::i32), DAG, dl);
+ return DCI.CombineTo(N, InsV);
+ }
+
+ // vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
+ if (isShuffleLow128VectorInsertHigh(SVOp)) {
+ SDValue V = Extract128BitVector(V1, DAG.getConstant(0, MVT::i32), DAG, dl);
+ SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT),
+ V, DAG.getConstant(NumElems/2, MVT::i32), DAG, dl);
+ return DCI.CombineTo(N, InsV);
+ }
+
+ return SDValue();
+}
+
+/// PerformShuffleCombine - Performs several different shuffle combines.
static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
DebugLoc dl = N->getDebugLoc();
EVT VT = N->getValueType(0);
- if (VT.getSizeInBits() != 128)
- return SDValue();
-
// Don't create instructions with illegal types after legalize types has run.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType()))
return SDValue();
+ // Combine 256-bit vector shuffles. This is only profitable when in AVX mode
+ if (Subtarget->hasAVX() && VT.getSizeInBits() == 256 &&
+ N->getOpcode() == ISD::VECTOR_SHUFFLE)
+ return PerformShuffleCombine256(N, DAG, DCI);
+
+ // Only handle 128 wide vector from here on.
+ if (VT.getSizeInBits() != 128)
+ return SDValue();
+
+ // Combine a vector_shuffle that is equal to build_vector load1, load2, load3,
+ // load4, <0, 1, 2, 3> into a 128-bit load if the load addresses are
+ // consecutive, non-overlapping, and in the right order.
SmallVector<SDValue, 16> Elts;
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
@@ -11209,7 +12600,8 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
+/// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT
+/// nodes.
static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
DebugLoc DL = N->getDebugLoc();
@@ -11217,14 +12609,16 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// Get the LHS/RHS of the select.
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
+ EVT VT = LHS.getValueType();
// If we have SSE[12] support, try to form min/max nodes. SSE min/max
// instructions match the semantics of the common C idiom x<y?x:y but not
// x<=y?x:y, because of how they handle negative zero (which can be
// ignored in unsafe-math mode).
- if (Subtarget->hasSSE2() &&
- (LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) &&
- Cond.getOpcode() == ISD::SETCC) {
+ if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() &&
+ VT != MVT::f80 && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
+ (Subtarget->hasXMMInt() ||
+ (Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
unsigned Opcode = 0;
@@ -11267,7 +12661,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// Converting this to a max would handle comparisons between positive
// and negative zero incorrectly.
if (!UnsafeFPMath &&
- !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(LHS))
+ !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
break;
Opcode = X86ISD::FMAX;
break;
@@ -11680,7 +13074,7 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
// all elements are shifted by the same amount. We can't do this in legalize
// because the a constant vector is typically transformed to a constant pool
// so we have no knowledge of the shift amount.
- if (!Subtarget->hasSSE2())
+ if (!Subtarget->hasXMMInt())
return SDValue();
if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16)
@@ -11796,7 +13190,7 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
// SSE1 supports CMP{eq|ne}SS, and SSE2 added CMP{eq|ne}SD, but
// we're requiring SSE2 for both.
- if (Subtarget->hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
+ if (Subtarget->hasXMMInt() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CMP0 = N0->getOperand(1);
@@ -11864,6 +13258,36 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// CanFoldXORWithAllOnes - Test whether the XOR operand is a AllOnes vector
+/// so it can be folded inside ANDNP.
+static bool CanFoldXORWithAllOnes(const SDNode *N) {
+ EVT VT = N->getValueType(0);
+
+ // Match direct AllOnes for 128 and 256-bit vectors
+ if (ISD::isBuildVectorAllOnes(N))
+ return true;
+
+ // Look through a bit convert.
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0).getNode();
+
+ // Sometimes the operand may come from a insert_subvector building a 256-bit
+ // allones vector
+ if (VT.getSizeInBits() == 256 &&
+ N->getOpcode() == ISD::INSERT_SUBVECTOR) {
+ SDValue V1 = N->getOperand(0);
+ SDValue V2 = N->getOperand(1);
+
+ if (V1.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ V1.getOperand(0).getOpcode() == ISD::UNDEF &&
+ ISD::isBuildVectorAllOnes(V1.getOperand(1).getNode()) &&
+ ISD::isBuildVectorAllOnes(V2.getNode()))
+ return true;
+ }
+
+ return false;
+}
+
static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
@@ -11874,11 +13298,28 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
if (R.getNode())
return R;
+ EVT VT = N->getValueType(0);
+
+ // Create ANDN instructions
+ if (Subtarget->hasBMI() && (VT == MVT::i32 || VT == MVT::i64)) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ // Check LHS for not
+ if (N0.getOpcode() == ISD::XOR && isAllOnes(N0.getOperand(1)))
+ return DAG.getNode(X86ISD::ANDN, DL, VT, N0.getOperand(0), N1);
+ // Check RHS for not
+ if (N1.getOpcode() == ISD::XOR && isAllOnes(N1.getOperand(1)))
+ return DAG.getNode(X86ISD::ANDN, DL, VT, N1.getOperand(0), N0);
+
+ return SDValue();
+ }
+
// Want to form ANDNP nodes:
// 1) In the hopes of then easily combining them with OR and AND nodes
// to form PBLEND/PSIGN.
// 2) To match ANDN packed intrinsics
- EVT VT = N->getValueType(0);
if (VT != MVT::v2i64 && VT != MVT::v4i64)
return SDValue();
@@ -11888,12 +13329,14 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
// Check LHS for vnot
if (N0.getOpcode() == ISD::XOR &&
- ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
+ //ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
+ CanFoldXORWithAllOnes(N0.getOperand(1).getNode()))
return DAG.getNode(X86ISD::ANDNP, DL, VT, N0.getOperand(0), N1);
// Check RHS for vnot
if (N1.getOpcode() == ISD::XOR &&
- ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode()))
+ //ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode()))
+ CanFoldXORWithAllOnes(N1.getOperand(1).getNode()))
return DAG.getNode(X86ISD::ANDNP, DL, VT, N1.getOperand(0), N0);
return SDValue();
@@ -11917,7 +13360,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
SDValue N1 = N->getOperand(1);
// look for psign/blend
- if (Subtarget->hasSSSE3()) {
+ if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) {
if (VT == MVT::v2i64) {
// Canonicalize pandn to RHS
if (N0.getOpcode() == X86ISD::ANDNP)
@@ -11990,13 +13433,13 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
}
}
// PBLENDVB only available on SSE 4.1
- if (!Subtarget->hasSSE41())
+ if (!(Subtarget->hasSSE41() || Subtarget->hasAVX()))
return SDValue();
X = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, X);
Y = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Y);
Mask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Mask);
- Mask = DAG.getNode(X86ISD::PBLENDVB, DL, MVT::v16i8, X, Y, Mask);
+ Mask = DAG.getNode(ISD::VSELECT, DL, MVT::v16i8, Mask, X, Y);
return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Mask);
}
}
@@ -12057,24 +13500,211 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// PerformLOADCombine - Do target-specific dag combines on LOAD nodes.
+static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ LoadSDNode *Ld = cast<LoadSDNode>(N);
+ EVT RegVT = Ld->getValueType(0);
+ EVT MemVT = Ld->getMemoryVT();
+ DebugLoc dl = Ld->getDebugLoc();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ ISD::LoadExtType Ext = Ld->getExtensionType();
+
+ // If this is a vector EXT Load then attempt to optimize it using a
+ // shuffle. We need SSE4 for the shuffles.
+ // TODO: It is possible to support ZExt by zeroing the undef values
+ // during the shuffle phase or after the shuffle.
+ if (RegVT.isVector() && Ext == ISD::EXTLOAD && Subtarget->hasSSE41()) {
+ assert(MemVT != RegVT && "Cannot extend to the same type");
+ assert(MemVT.isVector() && "Must load a vector from memory");
+
+ unsigned NumElems = RegVT.getVectorNumElements();
+ unsigned RegSz = RegVT.getSizeInBits();
+ unsigned MemSz = MemVT.getSizeInBits();
+ assert(RegSz > MemSz && "Register size must be greater than the mem size");
+ // All sizes must be a power of two
+ if (!isPowerOf2_32(RegSz * MemSz * NumElems)) return SDValue();
+
+ // Attempt to load the original value using a single load op.
+ // Find a scalar type which is equal to the loaded word size.
+ MVT SclrLoadTy = MVT::i8;
+ for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
+ tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
+ MVT Tp = (MVT::SimpleValueType)tp;
+ if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() == MemSz) {
+ SclrLoadTy = Tp;
+ break;
+ }
+ }
+
+ // Proceed if a load word is found.
+ if (SclrLoadTy.getSizeInBits() != MemSz) return SDValue();
+
+ EVT LoadUnitVecVT = EVT::getVectorVT(*DAG.getContext(), SclrLoadTy,
+ RegSz/SclrLoadTy.getSizeInBits());
+
+ EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
+ RegSz/MemVT.getScalarType().getSizeInBits());
+ // Can't shuffle using an illegal type.
+ if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
+
+ // Perform a single load.
+ SDValue ScalarLoad = DAG.getLoad(SclrLoadTy, dl, Ld->getChain(),
+ Ld->getBasePtr(),
+ Ld->getPointerInfo(), Ld->isVolatile(),
+ Ld->isNonTemporal(), Ld->getAlignment());
+
+ // Insert the word loaded into a vector.
+ SDValue ScalarInVector = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ LoadUnitVecVT, ScalarLoad);
+
+ // Bitcast the loaded value to a vector of the original element type, in
+ // the size of the target vector type.
+ SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, ScalarInVector);
+ unsigned SizeRatio = RegSz/MemSz;
+
+ // Redistribute the loaded elements into the different locations.
+ SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i < NumElems; i++) ShuffleVec[i*SizeRatio] = i;
+
+ SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
+ DAG.getUNDEF(SlicedVec.getValueType()),
+ ShuffleVec.data());
+
+ // Bitcast to the requested type.
+ Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
+ // Replace the original load with the new sequence
+ // and return the new chain.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Shuff);
+ return SDValue(ScalarLoad.getNode(), 1);
+ }
+
+ return SDValue();
+}
+
/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ EVT VT = St->getValue().getValueType();
+ EVT StVT = St->getMemoryVT();
+ DebugLoc dl = St->getDebugLoc();
+ SDValue StoredVal = St->getOperand(1);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // If we are saving a concatination of two XMM registers, perform two stores.
+ // This is better in Sandy Bridge cause one 256-bit mem op is done via two
+ // 128-bit ones. If in the future the cost becomes only one memory access the
+ // first version would be better.
+ if (VT.getSizeInBits() == 256 &&
+ StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS &&
+ StoredVal.getNumOperands() == 2) {
+
+ SDValue Value0 = StoredVal.getOperand(0);
+ SDValue Value1 = StoredVal.getOperand(1);
+
+ SDValue Stride = DAG.getConstant(16, TLI.getPointerTy());
+ SDValue Ptr0 = St->getBasePtr();
+ SDValue Ptr1 = DAG.getNode(ISD::ADD, dl, Ptr0.getValueType(), Ptr0, Stride);
+
+ SDValue Ch0 = DAG.getStore(St->getChain(), dl, Value0, Ptr0,
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+ SDValue Ch1 = DAG.getStore(St->getChain(), dl, Value1, Ptr1,
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
+ }
+
+ // Optimize trunc store (of multiple scalars) to shuffle and store.
+ // First, pack all of the elements in one place. Next, store to memory
+ // in fewer chunks.
+ if (St->isTruncatingStore() && VT.isVector()) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned NumElems = VT.getVectorNumElements();
+ assert(StVT != VT && "Cannot truncate to the same type");
+ unsigned FromSz = VT.getVectorElementType().getSizeInBits();
+ unsigned ToSz = StVT.getVectorElementType().getSizeInBits();
+
+ // From, To sizes and ElemCount must be pow of two
+ if (!isPowerOf2_32(NumElems * FromSz * ToSz)) return SDValue();
+ // We are going to use the original vector elt for storing.
+ // Accumulated smaller vector elements must be a multiple of the store size.
+ if (0 != (NumElems * FromSz) % ToSz) return SDValue();
+
+ unsigned SizeRatio = FromSz / ToSz;
+
+ assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits());
+
+ // Create a type on which we perform the shuffle
+ EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
+ StVT.getScalarType(), NumElems*SizeRatio);
+
+ assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
+
+ SDValue WideVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, St->getValue());
+ SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i < NumElems; i++ ) ShuffleVec[i] = i * SizeRatio;
+
+ // Can't shuffle using an illegal type
+ if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
+
+ SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
+ DAG.getUNDEF(WideVec.getValueType()),
+ ShuffleVec.data());
+ // At this point all of the data is stored at the bottom of the
+ // register. We now need to save it to mem.
+
+ // Find the largest store unit
+ MVT StoreType = MVT::i8;
+ for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
+ tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
+ MVT Tp = (MVT::SimpleValueType)tp;
+ if (TLI.isTypeLegal(Tp) && StoreType.getSizeInBits() < NumElems * ToSz)
+ StoreType = Tp;
+ }
+
+ // Bitcast the original vector into a vector of store-size units
+ EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
+ StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
+ assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
+ SDValue ShuffWide = DAG.getNode(ISD::BITCAST, dl, StoreVecVT, Shuff);
+ SmallVector<SDValue, 8> Chains;
+ SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8,
+ TLI.getPointerTy());
+ SDValue Ptr = St->getBasePtr();
+
+ // Perform one or more big stores into memory.
+ for (unsigned i = 0; i < (ToSz*NumElems)/StoreType.getSizeInBits() ; i++) {
+ SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ StoreType, ShuffWide,
+ DAG.getIntPtrConstant(i));
+ SDValue Ch = DAG.getStore(St->getChain(), dl, SubVec, Ptr,
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ Chains.push_back(Ch);
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0],
+ Chains.size());
+ }
+
+
// Turn load->store of MMX types into GPR load/stores. This avoids clobbering
// the FP state in cases where an emms may be missing.
// A preferable solution to the general problem is to figure out the right
// places to insert EMMS. This qualifies as a quick hack.
// Similarly, turn load->store of i64 into double load/stores in 32-bit mode.
- StoreSDNode *St = cast<StoreSDNode>(N);
- EVT VT = St->getValue().getValueType();
if (VT.getSizeInBits() != 64)
return SDValue();
const Function *F = DAG.getMachineFunction().getFunction();
bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps
- && Subtarget->hasSSE2();
+ && Subtarget->hasXMMInt();
if ((VT.isVector() ||
(VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) &&
isa<LoadSDNode>(St->getValue()) &&
@@ -12172,6 +13802,150 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// isHorizontalBinOp - Return 'true' if this vector operation is "horizontal"
+/// and return the operands for the horizontal operation in LHS and RHS. A
+/// horizontal operation performs the binary operation on successive elements
+/// of its first operand, then on successive elements of its second operand,
+/// returning the resulting values in a vector. For example, if
+/// A = < float a0, float a1, float a2, float a3 >
+/// and
+/// B = < float b0, float b1, float b2, float b3 >
+/// then the result of doing a horizontal operation on A and B is
+/// A horizontal-op B = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 >.
+/// In short, LHS and RHS are inspected to see if LHS op RHS is of the form
+/// A horizontal-op B, for some already available A and B, and if so then LHS is
+/// set to A, RHS to B, and the routine returns 'true'.
+/// Note that the binary operation should have the property that if one of the
+/// operands is UNDEF then the result is UNDEF.
+static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
+ // Look for the following pattern: if
+ // A = < float a0, float a1, float a2, float a3 >
+ // B = < float b0, float b1, float b2, float b3 >
+ // and
+ // LHS = VECTOR_SHUFFLE A, B, <0, 2, 4, 6>
+ // RHS = VECTOR_SHUFFLE A, B, <1, 3, 5, 7>
+ // then LHS op RHS = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 >
+ // which is A horizontal-op B.
+
+ // At least one of the operands should be a vector shuffle.
+ if (LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
+ RHS.getOpcode() != ISD::VECTOR_SHUFFLE)
+ return false;
+
+ EVT VT = LHS.getValueType();
+ unsigned N = VT.getVectorNumElements();
+
+ // View LHS in the form
+ // LHS = VECTOR_SHUFFLE A, B, LMask
+ // If LHS is not a shuffle then pretend it is the shuffle
+ // LHS = VECTOR_SHUFFLE LHS, undef, <0, 1, ..., N-1>
+ // NOTE: in what follows a default initialized SDValue represents an UNDEF of
+ // type VT.
+ SDValue A, B;
+ SmallVector<int, 8> LMask(N);
+ if (LHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
+ if (LHS.getOperand(0).getOpcode() != ISD::UNDEF)
+ A = LHS.getOperand(0);
+ if (LHS.getOperand(1).getOpcode() != ISD::UNDEF)
+ B = LHS.getOperand(1);
+ cast<ShuffleVectorSDNode>(LHS.getNode())->getMask(LMask);
+ } else {
+ if (LHS.getOpcode() != ISD::UNDEF)
+ A = LHS;
+ for (unsigned i = 0; i != N; ++i)
+ LMask[i] = i;
+ }
+
+ // Likewise, view RHS in the form
+ // RHS = VECTOR_SHUFFLE C, D, RMask
+ SDValue C, D;
+ SmallVector<int, 8> RMask(N);
+ if (RHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
+ if (RHS.getOperand(0).getOpcode() != ISD::UNDEF)
+ C = RHS.getOperand(0);
+ if (RHS.getOperand(1).getOpcode() != ISD::UNDEF)
+ D = RHS.getOperand(1);
+ cast<ShuffleVectorSDNode>(RHS.getNode())->getMask(RMask);
+ } else {
+ if (RHS.getOpcode() != ISD::UNDEF)
+ C = RHS;
+ for (unsigned i = 0; i != N; ++i)
+ RMask[i] = i;
+ }
+
+ // Check that the shuffles are both shuffling the same vectors.
+ if (!(A == C && B == D) && !(A == D && B == C))
+ return false;
+
+ // If everything is UNDEF then bail out: it would be better to fold to UNDEF.
+ if (!A.getNode() && !B.getNode())
+ return false;
+
+ // If A and B occur in reverse order in RHS, then "swap" them (which means
+ // rewriting the mask).
+ if (A != C)
+ for (unsigned i = 0; i != N; ++i) {
+ unsigned Idx = RMask[i];
+ if (Idx < N)
+ RMask[i] += N;
+ else if (Idx < 2*N)
+ RMask[i] -= N;
+ }
+
+ // At this point LHS and RHS are equivalent to
+ // LHS = VECTOR_SHUFFLE A, B, LMask
+ // RHS = VECTOR_SHUFFLE A, B, RMask
+ // Check that the masks correspond to performing a horizontal operation.
+ for (unsigned i = 0; i != N; ++i) {
+ unsigned LIdx = LMask[i], RIdx = RMask[i];
+
+ // Ignore any UNDEF components.
+ if (LIdx >= 2*N || RIdx >= 2*N || (!A.getNode() && (LIdx < N || RIdx < N))
+ || (!B.getNode() && (LIdx >= N || RIdx >= N)))
+ continue;
+
+ // Check that successive elements are being operated on. If not, this is
+ // not a horizontal operation.
+ if (!(LIdx == 2*i && RIdx == 2*i + 1) &&
+ !(isCommutative && LIdx == 2*i + 1 && RIdx == 2*i))
+ return false;
+ }
+
+ LHS = A.getNode() ? A : B; // If A is 'UNDEF', use B for it.
+ RHS = B.getNode() ? B : A; // If B is 'UNDEF', use A for it.
+ return true;
+}
+
+/// PerformFADDCombine - Do target-specific dag combines on floating point adds.
+static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ // Try to synthesize horizontal adds from adds of shuffles.
+ if ((Subtarget->hasSSE3() || Subtarget->hasAVX()) &&
+ (VT == MVT::v4f32 || VT == MVT::v2f64) &&
+ isHorizontalBinOp(LHS, RHS, true))
+ return DAG.getNode(X86ISD::FHADD, N->getDebugLoc(), VT, LHS, RHS);
+ return SDValue();
+}
+
+/// PerformFSUBCombine - Do target-specific dag combines on floating point subs.
+static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ // Try to synthesize horizontal subs from subs of shuffles.
+ if ((Subtarget->hasSSE3() || Subtarget->hasAVX()) &&
+ (VT == MVT::v4f32 || VT == MVT::v2f64) &&
+ isHorizontalBinOp(LHS, RHS, false))
+ return DAG.getNode(X86ISD::FHSUB, N->getDebugLoc(), VT, LHS, RHS);
+ return SDValue();
+}
+
/// PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and
/// X86ISD::FXOR nodes.
static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
@@ -12326,7 +14100,7 @@ static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG,
// (add Y, (setne X, 0)) -> sbb -1, Y
// (sub (sete X, 0), Y) -> sbb 0, Y
// (sub (setne X, 0), Y) -> adc -1, Y
-static SDValue OptimizeConditonalInDecrement(SDNode *N, SelectionDAG &DAG) {
+static SDValue OptimizeConditionalInDecrement(SDNode *N, SelectionDAG &DAG) {
DebugLoc DL = N->getDebugLoc();
// Look through ZExts.
@@ -12362,6 +14136,31 @@ static SDValue OptimizeConditonalInDecrement(SDNode *N, SelectionDAG &DAG) {
DAG.getConstant(0, OtherVal.getValueType()), NewCmp);
}
+static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+
+ // X86 can't encode an immediate LHS of a sub. See if we can push the
+ // negation into a preceding instruction.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op0)) {
+ // If the RHS of the sub is a XOR with one use and a constant, invert the
+ // immediate. Then add one to the LHS of the sub so we can turn
+ // X-Y -> X+~Y+1, saving one register.
+ if (Op1->hasOneUse() && Op1.getOpcode() == ISD::XOR &&
+ isa<ConstantSDNode>(Op1.getOperand(1))) {
+ APInt XorC = cast<ConstantSDNode>(Op1.getOperand(1))->getAPIntValue();
+ EVT VT = Op0.getValueType();
+ SDValue NewXor = DAG.getNode(ISD::XOR, Op1.getDebugLoc(), VT,
+ Op1.getOperand(0),
+ DAG.getConstant(~XorC, VT));
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, NewXor,
+ DAG.getConstant(C->getAPIntValue()+1, VT));
+ }
+ }
+
+ return OptimizeConditionalInDecrement(N, DAG);
+}
+
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -12369,10 +14168,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
default: break;
case ISD::EXTRACT_VECTOR_ELT:
return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
+ case ISD::VSELECT:
case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI);
- case ISD::ADD:
- case ISD::SUB: return OptimizeConditonalInDecrement(N, DAG);
+ case ISD::ADD: return OptimizeConditionalInDecrement(N, DAG);
+ case ISD::SUB: return PerformSubCombine(N, DAG);
case X86ISD::ADC: return PerformADCCombine(N, DAG, DCI);
case ISD::MUL: return PerformMulCombine(N, DAG, DCI);
case ISD::SHL:
@@ -12380,8 +14180,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget);
case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget);
case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);
+ case ISD::LOAD: return PerformLOADCombine(N, DAG, Subtarget);
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);
+ case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);
+ case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
case X86ISD::FXOR:
case X86ISD::FOR: return PerformFORCombine(N, DAG);
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
@@ -12398,14 +14201,14 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::PUNPCKHQDQ:
case X86ISD::UNPCKHPS:
case X86ISD::UNPCKHPD:
+ case X86ISD::VUNPCKHPSY:
+ case X86ISD::VUNPCKHPDY:
case X86ISD::PUNPCKLBW:
case X86ISD::PUNPCKLWD:
case X86ISD::PUNPCKLDQ:
case X86ISD::PUNPCKLQDQ:
case X86ISD::UNPCKLPS:
case X86ISD::UNPCKLPD:
- case X86ISD::VUNPCKLPS:
- case X86ISD::VUNPCKLPD:
case X86ISD::VUNPCKLPSY:
case X86ISD::VUNPCKLPDY:
case X86ISD::MOVHLPS:
@@ -12415,7 +14218,12 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::PSHUFLW:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
- case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI);
+ case X86ISD::VPERMILPS:
+ case X86ISD::VPERMILPSY:
+ case X86ISD::VPERMILPD:
+ case X86ISD::VPERMILPDY:
+ case X86ISD::VPERM2F128:
+ case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
}
return SDValue();
@@ -12551,7 +14359,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
AsmPieces[1] == "${0:q}")) {
// No need to check constraints, nothing other than the equivalent of
// "=r,0" would be valid here.
- const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
if (!Ty || Ty->getBitWidth() % 16 != 0)
return false;
return IntrinsicLowering::LowerToByteSwap(CI);
@@ -12572,7 +14380,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
AsmPieces[1] == "~{dirflag}" &&
AsmPieces[2] == "~{flags}" &&
AsmPieces[3] == "~{fpsr}") {
- const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
if (!Ty || Ty->getBitWidth() % 16 != 0)
return false;
return IntrinsicLowering::LowerToByteSwap(CI);
@@ -12603,7 +14411,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
AsmPieces[1] == "~{dirflag}" &&
AsmPieces[2] == "~{flags}" &&
AsmPieces[3] == "~{fpsr}") {
- const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
if (!Ty || Ty->getBitWidth() % 16 != 0)
return false;
return IntrinsicLowering::LowerToByteSwap(CI);
@@ -12629,7 +14437,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
SplitString(AsmPieces[2], Words, " \t,");
if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" &&
Words[2] == "%edx") {
- const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
if (!Ty || Ty->getBitWidth() % 16 != 0)
return false;
return IntrinsicLowering::LowerToByteSwap(CI);
@@ -12700,7 +14508,7 @@ TargetLowering::ConstraintWeight
// but allow it at the lowest weight.
if (CallOperandVal == NULL)
return CW_Default;
- const Type *type = CallOperandVal->getType();
+ Type *type = CallOperandVal->getType();
// Look at the constraint type.
switch (*constraint) {
default:
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index b603678..342a5e6 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -175,8 +175,14 @@ namespace llvm {
/// PSIGNB/W/D - Copy integer sign.
PSIGNB, PSIGNW, PSIGND,
- /// PBLENDVB - Variable blend
- PBLENDVB,
+ /// BLEND family of opcodes
+ BLENDV,
+
+ /// FHADD - Floating point horizontal add.
+ FHADD,
+
+ /// FHSUB - Floating point horizontal sub.
+ FHSUB,
/// FMAX, FMIN - Floating point max and min.
///
@@ -222,6 +228,8 @@ namespace llvm {
ADD, SUB, ADC, SBB, SMUL,
INC, DEC, OR, XOR, AND,
+ ANDN, // ANDN - Bitwise AND NOT with FLAGS results.
+
UMUL, // LOW, HI, FLAGS = umul LHS, RHS
// MUL_IMM - X86 specific multiply by immediate.
@@ -257,12 +265,12 @@ namespace llvm {
MOVSS,
UNPCKLPS,
UNPCKLPD,
- VUNPCKLPS,
- VUNPCKLPD,
VUNPCKLPSY,
VUNPCKLPDY,
UNPCKHPS,
UNPCKHPD,
+ VUNPCKHPSY,
+ VUNPCKHPDY,
PUNPCKLBW,
PUNPCKLWD,
PUNPCKLDQ,
@@ -271,6 +279,12 @@ namespace llvm {
PUNPCKHWD,
PUNPCKHDQ,
PUNPCKHQDQ,
+ VPERMILPS,
+ VPERMILPSY,
+ VPERMILPD,
+ VPERMILPDY,
+ VPERM2F128,
+ VBROADCAST,
// VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
// according to %al. An operator is needed so that this can be expanded
@@ -280,6 +294,11 @@ namespace llvm {
// WIN_ALLOCA - Windows's _chkstk call to do stack probing.
WIN_ALLOCA,
+ // SEG_ALLOCA - For allocating variable amounts of stack space when using
+ // segmented stacks. Check if the current stacklet has enough space, and
+ // falls back to heap allocation if not.
+ SEG_ALLOCA,
+
// Memory barrier
MEMBARRIER,
MFENCE,
@@ -297,9 +316,10 @@ namespace llvm {
ATOMNAND64_DAG,
ATOMSWAP64_DAG,
- // LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap.
+ // LCMPXCHG_DAG, LCMPXCHG8_DAG, LCMPXCHG16_DAG - Compare and swap.
LCMPXCHG_DAG,
LCMPXCHG8_DAG,
+ LCMPXCHG16_DAG,
// VZEXT_LOAD - Load, scalar_to_vector, and zero extend.
VZEXT_LOAD,
@@ -407,20 +427,16 @@ namespace llvm {
/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
- bool isMOVSHDUPMask(ShuffleVectorSDNode *N);
+ bool isMOVSHDUPMask(ShuffleVectorSDNode *N, const X86Subtarget *Subtarget);
/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
- bool isMOVSLDUPMask(ShuffleVectorSDNode *N);
+ bool isMOVSLDUPMask(ShuffleVectorSDNode *N, const X86Subtarget *Subtarget);
/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVDDUP.
bool isMOVDDUPMask(ShuffleVectorSDNode *N);
- /// isPALIGNRMask - Return true if the specified VECTOR_SHUFFLE operand
- /// specifies a shuffle of elements that is suitable for input to PALIGNR.
- bool isPALIGNRMask(ShuffleVectorSDNode *N);
-
/// isVEXTRACTF128Index - Return true if the specified
/// EXTRACT_SUBVECTOR operand specifies a vector extract that is
/// suitable for input to VEXTRACTF128.
@@ -505,7 +521,7 @@ namespace llvm {
/// function arguments in the caller parameter area. For X86, aggregates
/// that contains are placed at 16-byte boundaries while the rest are at
/// 4-byte boundaries.
- virtual unsigned getByValTypeAlignment(const Type *Ty) const;
+ virtual unsigned getByValTypeAlignment(Type *Ty) const;
/// getOptimalMemOpType - Returns the target specific optimal type for load
/// and store operations as a result of memset, memcpy, and memmove
@@ -564,8 +580,8 @@ namespace llvm {
/// DAG node.
virtual const char *getTargetNodeName(unsigned Opcode) const;
- /// getSetCCResultType - Return the ISD::SETCC ValueType
- virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+ /// getSetCCResultType - Return the value type to use for ISD::SETCC.
+ virtual EVT getSetCCResultType(EVT VT) const;
/// computeMaskedBitsForTargetNode - Determine which of the bits specified
/// in Mask are known to be either zero or one and return them in the
@@ -617,12 +633,12 @@ namespace llvm {
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
- virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
+ virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;
/// isTruncateFree - Return true if it's free to truncate a value of
/// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
/// register EAX to i16 by referencing its sub-register AX.
- virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const;
+ virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
virtual bool isTruncateFree(EVT VT1, EVT VT2) const;
/// isZExtFree - Return true if any actual instruction that defines a
@@ -633,7 +649,7 @@ namespace llvm {
/// does not necessarily apply to truncate instructions. e.g. on x86-64,
/// all instructions that define 32-bit values implicit zero-extend the
/// result out to 64 bits.
- virtual bool isZExtFree(const Type *Ty1, const Type *Ty2) const;
+ virtual bool isZExtFree(Type *Ty1, Type *Ty2) const;
virtual bool isZExtFree(EVT VT1, EVT VT2) const;
/// isNarrowingProfitable - Return true if it's profitable to narrow
@@ -813,11 +829,14 @@ namespace llvm {
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
@@ -825,6 +844,7 @@ namespace llvm {
SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
// Utility functions to help LowerVECTOR_SHUFFLE
@@ -931,6 +951,10 @@ namespace llvm {
MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ bool Is64Bit) const;
+
MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI,
MachineBasicBlock *BB) const;
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index 9f7a4b0..74b647a 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -650,6 +650,15 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
let isCodeGenOnly = 1;
}
+// BinOpRR_F_Rev - Instructions like "cmp reg, reg" (reversed encoding).
+class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
+ : ITy<opcode, MRMSrcReg, typeinfo, (outs),
+ (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
+ mnemonic, "{$src2, $src1|$src1, $src2}", []> {
+ // The disassembler should know about this, but not the asmparser.
+ let isCodeGenOnly = 1;
+}
+
// BinOpRM - Instructions like "add reg, reg, [mem]".
class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
dag outlist, list<dag> pattern>
@@ -857,11 +866,10 @@ class BinOpMI8_F<string mnemonic, X86TypeInfo typeinfo,
// BinOpAI - Instructions like "add %eax, %eax, imm".
class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- Register areg>
+ Register areg, string operands>
: ITy<opcode, RawFrm, typeinfo,
(outs), (ins typeinfo.ImmOperand:$src),
- mnemonic, !strconcat("{$src, %", areg.AsmName, "|%",
- areg.AsmName, ", $src}"), []> {
+ mnemonic, operands, []> {
let ImmT = typeinfo.ImmEncoding;
let Uses = [areg];
let Defs = [areg];
@@ -926,10 +934,14 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
def #NAME#32mi : BinOpMI_RMW<mnemonic, Xi32, opnode, MemMRM>;
def #NAME#64mi32 : BinOpMI_RMW<mnemonic, Xi64, opnode, MemMRM>;
- def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>;
- def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>;
- def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>;
- def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>;
+ def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
+ "{$src, %al|AL, $src}">;
+ def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
+ "{$src, %ax|AX, $src}">;
+ def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
+ "{$src, %eax|EAX, $src}">;
+ def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
+ "{$src, %rax|RAX, $src}">;
}
}
@@ -993,10 +1005,14 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
def #NAME#32mi : BinOpMI_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
def #NAME#64mi32 : BinOpMI_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
- def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>;
- def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>;
- def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>;
- def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>;
+ def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
+ "{$src, %al|AL, $src}">;
+ def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
+ "{$src, %ax|AX, $src}">;
+ def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
+ "{$src, %eax|EAX, $src}">;
+ def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
+ "{$src, %rax|RAX, $src}">;
}
}
@@ -1017,10 +1033,10 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
def #NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>;
} // isCommutable
- def #NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
- def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
- def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
- def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+ def #NAME#8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>;
+ def #NAME#16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>;
+ def #NAME#32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>;
+ def #NAME#64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>;
def #NAME#8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>;
def #NAME#16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>;
@@ -1056,10 +1072,14 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
def #NAME#32mi : BinOpMI_F<mnemonic, Xi32, opnode, MemMRM>;
def #NAME#64mi32 : BinOpMI_F<mnemonic, Xi64, opnode, MemMRM>;
- def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>;
- def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>;
- def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>;
- def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>;
+ def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
+ "{$src, %al|AL, $src}">;
+ def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
+ "{$src, %ax|AX, $src}">;
+ def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
+ "{$src, %eax|EAX, $src}">;
+ def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
+ "{$src, %rax|RAX, $src}">;
}
}
@@ -1117,9 +1137,37 @@ let Defs = [EFLAGS] in {
def TEST32mi : BinOpMI_F<"test", Xi32, X86testpat, MRM0m, 0xF6>;
def TEST64mi32 : BinOpMI_F<"test", Xi64, X86testpat, MRM0m, 0xF6>;
- def TEST8i8 : BinOpAI<0xA8, "test", Xi8 , AL>;
- def TEST16i16 : BinOpAI<0xA8, "test", Xi16, AX>;
- def TEST32i32 : BinOpAI<0xA8, "test", Xi32, EAX>;
- def TEST64i32 : BinOpAI<0xA8, "test", Xi64, RAX>;
-}
+ def TEST8i8 : BinOpAI<0xA8, "test", Xi8 , AL,
+ "{$src, %al|AL, $src}">;
+ def TEST16i16 : BinOpAI<0xA8, "test", Xi16, AX,
+ "{$src, %ax|AX, $src}">;
+ def TEST32i32 : BinOpAI<0xA8, "test", Xi32, EAX,
+ "{$src, %eax|EAX, $src}">;
+ def TEST64i32 : BinOpAI<0xA8, "test", Xi64, RAX,
+ "{$src, %rax|RAX, $src}">;
+
+ // When testing the result of EXTRACT_SUBREG sub_8bit_hi, make sure the
+ // register class is constrained to GR8_NOREX.
+ let isPseudo = 1 in
+ def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask),
+ "", []>;
+}
+//===----------------------------------------------------------------------===//
+// ANDN Instruction
+//
+multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
+ PatFrag ld_frag> {
+ def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, EFLAGS, (X86andn_flag RC:$src1, RC:$src2))]>;
+ def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, EFLAGS,
+ (X86andn_flag RC:$src1, (ld_frag addr:$src2)))]>;
+}
+
+let Predicates = [HasBMI], Defs = [EFLAGS] in {
+ defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32>, T8, VEX_4V;
+ defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64>, T8, VEX_4V, VEX_W;
+}
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index adcc747..da28690 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -106,6 +106,26 @@ let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
def WIN_ALLOCA : I<0, Pseudo, (outs), (ins),
"# dynamic stack allocation",
[(X86WinAlloca)]>;
+
+// When using segmented stacks these are lowered into instructions which first
+// check if the current stacklet has enough free memory. If it does, memory is
+// allocated by bumping the stack pointer. Otherwise memory is allocated from
+// the heap.
+
+let Defs = [EAX, ESP, EFLAGS], Uses = [ESP, EAX] in
+def SEG_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size),
+ "# variable sized alloca for segmented stacks",
+ [(set GR32:$dst,
+ (X86SegAlloca GR32:$size))]>,
+ Requires<[In32BitMode]>;
+
+let Defs = [RAX, RSP, EFLAGS], Uses = [RSP, RAX] in
+def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
+ "# variable sized alloca for segmented stacks",
+ [(set GR64:$dst,
+ (X86SegAlloca GR64:$size))]>,
+ Requires<[In64BitMode]>;
+
}
@@ -329,18 +349,11 @@ def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
//===----------------------------------------------------------------------===//
// Conditional Move Pseudo Instructions
-let Constraints = "$src1 = $dst" in {
-
-// Conditional moves
-let Uses = [EFLAGS] in {
-
// X86 doesn't have 8-bit conditional moves. Use a customInserter to
// emit control flow. An alternative to this is to mark i8 SELECT as Promote,
// however that requires promoting the operands, and can induce additional
-// i8 register pressure. Note that CMOV_GR8 is conservatively considered to
-// clobber EFLAGS, because if one of the operands is zero, the expansion
-// could involve an xor.
-let usesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] in {
+// i8 register pressure.
+let usesCustomInserter = 1, Uses = [EFLAGS] in {
def CMOV_GR8 : I<0, Pseudo,
(outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond),
"#CMOV_GR8 PSEUDO!",
@@ -380,10 +393,7 @@ def CMOV_RFP80 : I<0, Pseudo,
(X86cmov RFP80:$src1, RFP80:$src2, imm:$cond,
EFLAGS))]>;
} // Predicates = [NoCMov]
-} // UsesCustomInserter = 1, Constraints = "", Defs = [EFLAGS]
-} // Uses = [EFLAGS]
-
-} // Constraints = "$src1 = $dst" in
+} // UsesCustomInserter = 1, Uses = [EFLAGS]
//===----------------------------------------------------------------------===//
@@ -532,7 +542,7 @@ def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
let hasSideEffects = 1 in
def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
"#MEMBARRIER",
- [(X86MemBarrier)]>, Requires<[HasSSE2]>;
+ [(X86MemBarrier)]>;
// TODO: Get this to fold the constant into the instruction.
let hasSideEffects = 1, Defs = [ESP], isCodeGenOnly = 1 in
@@ -630,8 +640,8 @@ def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
defm LOCK_ADD : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, "add">;
defm LOCK_SUB : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, "sub">;
defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, "or">;
-defm LOCK_AND : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM4m, "and">;
-defm LOCK_XOR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM6m, "xor">;
+defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, "and">;
+defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, "xor">;
// Optimized codegen when the non-memory output is not used.
let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
@@ -665,12 +675,20 @@ def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
// Atomic compare and swap.
let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
- isCodeGenOnly = 1 in {
+ isCodeGenOnly = 1 in
def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),
"lock\n\t"
"cmpxchg8b\t$ptr",
[(X86cas8 addr:$ptr)]>, TB, LOCK;
-}
+
+let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
+ isCodeGenOnly = 1 in
+def LCMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$ptr),
+ "lock\n\t"
+ "cmpxchg16b\t$ptr",
+ [(X86cas16 addr:$ptr)]>, TB, LOCK,
+ Requires<[HasCmpxchg16b]>;
+
let Defs = [AL, EFLAGS], Uses = [AL], isCodeGenOnly = 1 in {
def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap),
"lock\n\t"
@@ -695,7 +713,7 @@ def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap),
let Defs = [RAX, EFLAGS], Uses = [RAX], isCodeGenOnly = 1 in {
def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap),
"lock\n\t"
- "cmpxchgq\t$swap,$ptr",
+ "cmpxchg{q}\t{$swap, $ptr|$ptr, $swap}",
[(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK;
}
@@ -718,11 +736,37 @@ def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr),
TB, LOCK;
def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr),
"lock\n\t"
- "xadd\t$val, $ptr",
+ "xadd{q}\t{$val, $ptr|$ptr, $val}",
[(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>,
TB, LOCK;
}
+def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src),
+ "#ACQUIRE_MOV PSEUDO!",
+ [(set GR8:$dst, (atomic_load_8 addr:$src))]>;
+def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src),
+ "#ACQUIRE_MOV PSEUDO!",
+ [(set GR16:$dst, (atomic_load_16 addr:$src))]>;
+def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src),
+ "#ACQUIRE_MOV PSEUDO!",
+ [(set GR32:$dst, (atomic_load_32 addr:$src))]>;
+def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src),
+ "#ACQUIRE_MOV PSEUDO!",
+ [(set GR64:$dst, (atomic_load_64 addr:$src))]>;
+
+def RELEASE_MOV8mr : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src),
+ "#RELEASE_MOV PSEUDO!",
+ [(atomic_store_8 addr:$dst, GR8 :$src)]>;
+def RELEASE_MOV16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src),
+ "#RELEASE_MOV PSEUDO!",
+ [(atomic_store_16 addr:$dst, GR16:$src)]>;
+def RELEASE_MOV32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src),
+ "#RELEASE_MOV PSEUDO!",
+ [(atomic_store_32 addr:$dst, GR32:$src)]>;
+def RELEASE_MOV64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),
+ "#RELEASE_MOV PSEUDO!",
+ [(atomic_store_64 addr:$dst, GR64:$src)]>;
+
//===----------------------------------------------------------------------===//
// Conditional Move Pseudo Instructions.
//===----------------------------------------------------------------------===//
@@ -759,6 +803,24 @@ let Uses = [EFLAGS], usesCustomInserter = 1 in {
[(set VR128:$dst,
(v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
EFLAGS)))]>;
+ def CMOV_V8F32 : I<0, Pseudo,
+ (outs VR256:$dst), (ins VR256:$t, VR256:$f, i8imm:$cond),
+ "#CMOV_V8F32 PSEUDO!",
+ [(set VR256:$dst,
+ (v8f32 (X86cmov VR256:$t, VR256:$f, imm:$cond,
+ EFLAGS)))]>;
+ def CMOV_V4F64 : I<0, Pseudo,
+ (outs VR256:$dst), (ins VR256:$t, VR256:$f, i8imm:$cond),
+ "#CMOV_V4F64 PSEUDO!",
+ [(set VR256:$dst,
+ (v4f64 (X86cmov VR256:$t, VR256:$f, imm:$cond,
+ EFLAGS)))]>;
+ def CMOV_V4I64 : I<0, Pseudo,
+ (outs VR256:$dst), (ins VR256:$t, VR256:$f, i8imm:$cond),
+ "#CMOV_V4I64 PSEUDO!",
+ [(set VR256:$dst,
+ (v4i64 (X86cmov VR256:$t, VR256:$f, imm:$cond,
+ EFLAGS)))]>;
}
diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td
index 2e1d523..e62e6b7 100644
--- a/lib/Target/X86/X86InstrExtension.td
+++ b/lib/Target/X86/X86InstrExtension.td
@@ -76,12 +76,12 @@ def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
// except that they use GR32_NOREX for the output operand register class
// instead of GR32. This allows them to operate on h registers on x86-64.
def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
- (outs GR32_NOREX:$dst), (ins GR8:$src),
+ (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
[]>, TB;
let mayLoad = 1 in
def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
- (outs GR32_NOREX:$dst), (ins i8mem:$src),
+ (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
[]>, TB;
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 6d89bcc..0a1590b 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -113,6 +113,7 @@ class VEX_W { bit hasVEX_WPrefix = 1; }
class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
class VEX_L { bit hasVEX_L = 1; }
+class VEX_LIG { bit ignoresVEX_L = 1; }
class Has3DNow0F0FOpcode { bit has3DNow0F0FOpcode = 1; }
class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
@@ -150,6 +151,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bit hasVEX_i8ImmReg = 0; // Does this inst require the last source register
// to be encoded in a immediate field?
bit hasVEX_L = 0; // Does this inst use large (256-bit) registers?
+ bit ignoresVEX_L = 0; // Does this instruction ignore the L-bit
bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding?
// TSFlags layout should be kept in sync with X86InstrInfo.h.
@@ -169,7 +171,8 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
let TSFlags{35} = hasVEX_4VPrefix;
let TSFlags{36} = hasVEX_i8ImmReg;
let TSFlags{37} = hasVEX_L;
- let TSFlags{38} = has3DNow0F0FOpcode;
+ let TSFlags{38} = ignoresVEX_L;
+ let TSFlags{39} = has3DNow0F0FOpcode;
}
class PseudoI<dag oops, dag iops, list<dag> pattern>
@@ -501,6 +504,9 @@ class RSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
class RPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: PDI<o, F, outs, ins, asm, pattern>, REX_W;
+class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : VPDI<o, F, outs, ins, asm, pattern>, VEX_W;
// MMX Instruction templates
//
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index b00109c..af919fb 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -39,6 +39,8 @@ def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>;
+def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>;
+def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>;
def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
def X86cmpss : SDNode<"X86ISD::FSETCCss", SDTX86Cmpss>;
@@ -49,18 +51,15 @@ def X86pshufb : SDNode<"X86ISD::PSHUFB",
def X86andnp : SDNode<"X86ISD::ANDNP",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
-def X86psignb : SDNode<"X86ISD::PSIGNB",
+def X86psignb : SDNode<"X86ISD::PSIGNB",
SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
-def X86psignw : SDNode<"X86ISD::PSIGNW",
+def X86psignw : SDNode<"X86ISD::PSIGNW",
SDTypeProfile<1, 2, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
-def X86psignd : SDNode<"X86ISD::PSIGND",
+def X86psignd : SDNode<"X86ISD::PSIGND",
SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
-def X86pblendv : SDNode<"X86ISD::PBLENDVB",
- SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
def X86pextrb : SDNode<"X86ISD::PEXTRB",
SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
def X86pextrw : SDNode<"X86ISD::PEXTRW",
@@ -109,6 +108,8 @@ def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisInt<3>]>;
+def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
+
def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>;
def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
@@ -133,12 +134,15 @@ def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>;
def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
-def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
-def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
+def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
+def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
def X86Unpcklpsy : SDNode<"X86ISD::VUNPCKLPSY", SDTShuff2Op>;
def X86Unpcklpdy : SDNode<"X86ISD::VUNPCKLPDY", SDTShuff2Op>;
-def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
-def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
+
+def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
+def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
+def X86Unpckhpsy : SDNode<"X86ISD::VUNPCKHPSY", SDTShuff2Op>;
+def X86Unpckhpdy : SDNode<"X86ISD::VUNPCKHPDY", SDTShuff2Op>;
def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>;
def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>;
@@ -150,6 +154,15 @@ def X86Punpckhwd : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>;
def X86Punpckhdq : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>;
def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>;
+def X86VPermilps : SDNode<"X86ISD::VPERMILPS", SDTShuff2OpI>;
+def X86VPermilpsy : SDNode<"X86ISD::VPERMILPSY", SDTShuff2OpI>;
+def X86VPermilpd : SDNode<"X86ISD::VPERMILPD", SDTShuff2OpI>;
+def X86VPermilpdy : SDNode<"X86ISD::VPERMILPDY", SDTShuff2OpI>;
+
+def X86VPerm2f128 : SDNode<"X86ISD::VPERM2F128", SDTShuff3OpI>;
+
+def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
+
//===----------------------------------------------------------------------===//
// SSE Complex Patterns
//===----------------------------------------------------------------------===//
@@ -193,17 +206,28 @@ def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
def loadv8i32 : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>;
def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
-// Like 'store', but always requires vector alignment.
+// Like 'store', but always requires 128-bit vector alignment.
def alignedstore : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
return cast<StoreSDNode>(N)->getAlignment() >= 16;
}]>;
-// Like 'load', but always requires vector alignment.
+// Like 'store', but always requires 256-bit vector alignment.
+def alignedstore256 : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 32;
+}]>;
+
+// Like 'load', but always requires 128-bit vector alignment.
def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() >= 16;
}]>;
+// Like 'load', but always requires 256-bit vector alignment.
+def alignedload256 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 32;
+}]>;
+
def alignedloadfsf32 : PatFrag<(ops node:$ptr),
(f32 (alignedload node:$ptr))>;
def alignedloadfsf64 : PatFrag<(ops node:$ptr),
@@ -221,13 +245,13 @@ def alignedloadv2i64 : PatFrag<(ops node:$ptr),
// 256-bit aligned load pattern fragments
def alignedloadv8f32 : PatFrag<(ops node:$ptr),
- (v8f32 (alignedload node:$ptr))>;
+ (v8f32 (alignedload256 node:$ptr))>;
def alignedloadv4f64 : PatFrag<(ops node:$ptr),
- (v4f64 (alignedload node:$ptr))>;
+ (v4f64 (alignedload256 node:$ptr))>;
def alignedloadv8i32 : PatFrag<(ops node:$ptr),
- (v8i32 (alignedload node:$ptr))>;
+ (v8i32 (alignedload256 node:$ptr))>;
def alignedloadv4i64 : PatFrag<(ops node:$ptr),
- (v4i64 (alignedload node:$ptr))>;
+ (v4i64 (alignedload256 node:$ptr))>;
// Like 'load', but uses special alignment checks suitable for use in
// memory operands in most SSE instructions, which are required to
@@ -356,7 +380,7 @@ def EXTRACT_get_vextractf128_imm : SDNodeXForm<extract_subvector, [{
return getI8Imm(X86::getExtractVEXTRACTF128Immediate(N));
}]>;
-// INSERT_get_vinsertf128_imm xform function: convert insert_subvector index to
+// INSERT_get_vinsertf128_imm xform function: convert insert_subvector index to
// VINSERTF128 imm.
def INSERT_get_vinsertf128_imm : SDNodeXForm<insert_subvector, [{
return getI8Imm(X86::getInsertVINSERTF128Immediate(N));
@@ -398,16 +422,6 @@ def movl : PatFrag<(ops node:$lhs, node:$rhs),
return X86::isMOVLMask(cast<ShuffleVectorSDNode>(N));
}]>;
-def movshdup : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVSHDUPMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def movsldup : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVSLDUPMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
def unpckl : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
@@ -418,16 +432,6 @@ def unpckh : PatFrag<(ops node:$lhs, node:$rhs),
return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
}]>;
-def unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
def pshufd : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
@@ -448,11 +452,6 @@ def pshuflw : PatFrag<(ops node:$lhs, node:$rhs),
return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N));
}], SHUFFLE_get_pshuflw_imm>;
-def palign : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isPALIGNRMask(cast<ShuffleVectorSDNode>(N));
-}], SHUFFLE_get_palign_imm>;
-
def vextractf128_extract : PatFrag<(ops node:$bigvec, node:$index),
(extract_subvector node:$bigvec,
node:$index), [{
@@ -465,3 +464,4 @@ def vinsertf128_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
node:$index), [{
return X86::isVINSERTF128Index(N);
}], INSERT_get_vinsertf128_imm>;
+
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 55b5835..3a02de0 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -53,6 +53,36 @@ ReMatPICStubLoad("remat-pic-stub-load",
cl::desc("Re-materialize load from stub in PIC mode"),
cl::init(false), cl::Hidden);
+enum {
+ // Select which memory operand is being unfolded.
+ // (stored in bits 0 - 7)
+ TB_INDEX_0 = 0,
+ TB_INDEX_1 = 1,
+ TB_INDEX_2 = 2,
+ TB_INDEX_MASK = 0xff,
+
+ // Minimum alignment required for load/store.
+ // Used for RegOp->MemOp conversion.
+ // (stored in bits 8 - 15)
+ TB_ALIGN_SHIFT = 8,
+ TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT,
+ TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT,
+ TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT,
+ TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT,
+
+ // Do not insert the reverse map (MemOp -> RegOp) into the table.
+ // This may be needed because there is a many -> one mapping.
+ TB_NO_REVERSE = 1 << 16,
+
+ // Do not insert the forward map (RegOp -> MemOp) into the table.
+ // This is needed for Native Client, which prohibits branch
+ // instructions from using a memory operand.
+ TB_NO_FORWARD = 1 << 17,
+
+ TB_FOLDED_LOAD = 1 << 18,
+ TB_FOLDED_STORE = 1 << 19
+};
+
X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
: X86GenInstrInfo((tm.getSubtarget<X86Subtarget>().is64Bit()
? X86::ADJCALLSTACKDOWN64
@@ -61,655 +91,829 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
? X86::ADJCALLSTACKUP64
: X86::ADJCALLSTACKUP32)),
TM(tm), RI(tm, *this) {
- enum {
- TB_NOT_REVERSABLE = 1U << 31,
- TB_FLAGS = TB_NOT_REVERSABLE
- };
- static const unsigned OpTbl2Addr[][2] = {
- { X86::ADC32ri, X86::ADC32mi },
- { X86::ADC32ri8, X86::ADC32mi8 },
- { X86::ADC32rr, X86::ADC32mr },
- { X86::ADC64ri32, X86::ADC64mi32 },
- { X86::ADC64ri8, X86::ADC64mi8 },
- { X86::ADC64rr, X86::ADC64mr },
- { X86::ADD16ri, X86::ADD16mi },
- { X86::ADD16ri8, X86::ADD16mi8 },
- { X86::ADD16ri_DB, X86::ADD16mi | TB_NOT_REVERSABLE },
- { X86::ADD16ri8_DB, X86::ADD16mi8 | TB_NOT_REVERSABLE },
- { X86::ADD16rr, X86::ADD16mr },
- { X86::ADD16rr_DB, X86::ADD16mr | TB_NOT_REVERSABLE },
- { X86::ADD32ri, X86::ADD32mi },
- { X86::ADD32ri8, X86::ADD32mi8 },
- { X86::ADD32ri_DB, X86::ADD32mi | TB_NOT_REVERSABLE },
- { X86::ADD32ri8_DB, X86::ADD32mi8 | TB_NOT_REVERSABLE },
- { X86::ADD32rr, X86::ADD32mr },
- { X86::ADD32rr_DB, X86::ADD32mr | TB_NOT_REVERSABLE },
- { X86::ADD64ri32, X86::ADD64mi32 },
- { X86::ADD64ri8, X86::ADD64mi8 },
- { X86::ADD64ri32_DB,X86::ADD64mi32 | TB_NOT_REVERSABLE },
- { X86::ADD64ri8_DB, X86::ADD64mi8 | TB_NOT_REVERSABLE },
- { X86::ADD64rr, X86::ADD64mr },
- { X86::ADD64rr_DB, X86::ADD64mr | TB_NOT_REVERSABLE },
- { X86::ADD8ri, X86::ADD8mi },
- { X86::ADD8rr, X86::ADD8mr },
- { X86::AND16ri, X86::AND16mi },
- { X86::AND16ri8, X86::AND16mi8 },
- { X86::AND16rr, X86::AND16mr },
- { X86::AND32ri, X86::AND32mi },
- { X86::AND32ri8, X86::AND32mi8 },
- { X86::AND32rr, X86::AND32mr },
- { X86::AND64ri32, X86::AND64mi32 },
- { X86::AND64ri8, X86::AND64mi8 },
- { X86::AND64rr, X86::AND64mr },
- { X86::AND8ri, X86::AND8mi },
- { X86::AND8rr, X86::AND8mr },
- { X86::DEC16r, X86::DEC16m },
- { X86::DEC32r, X86::DEC32m },
- { X86::DEC64_16r, X86::DEC64_16m },
- { X86::DEC64_32r, X86::DEC64_32m },
- { X86::DEC64r, X86::DEC64m },
- { X86::DEC8r, X86::DEC8m },
- { X86::INC16r, X86::INC16m },
- { X86::INC32r, X86::INC32m },
- { X86::INC64_16r, X86::INC64_16m },
- { X86::INC64_32r, X86::INC64_32m },
- { X86::INC64r, X86::INC64m },
- { X86::INC8r, X86::INC8m },
- { X86::NEG16r, X86::NEG16m },
- { X86::NEG32r, X86::NEG32m },
- { X86::NEG64r, X86::NEG64m },
- { X86::NEG8r, X86::NEG8m },
- { X86::NOT16r, X86::NOT16m },
- { X86::NOT32r, X86::NOT32m },
- { X86::NOT64r, X86::NOT64m },
- { X86::NOT8r, X86::NOT8m },
- { X86::OR16ri, X86::OR16mi },
- { X86::OR16ri8, X86::OR16mi8 },
- { X86::OR16rr, X86::OR16mr },
- { X86::OR32ri, X86::OR32mi },
- { X86::OR32ri8, X86::OR32mi8 },
- { X86::OR32rr, X86::OR32mr },
- { X86::OR64ri32, X86::OR64mi32 },
- { X86::OR64ri8, X86::OR64mi8 },
- { X86::OR64rr, X86::OR64mr },
- { X86::OR8ri, X86::OR8mi },
- { X86::OR8rr, X86::OR8mr },
- { X86::ROL16r1, X86::ROL16m1 },
- { X86::ROL16rCL, X86::ROL16mCL },
- { X86::ROL16ri, X86::ROL16mi },
- { X86::ROL32r1, X86::ROL32m1 },
- { X86::ROL32rCL, X86::ROL32mCL },
- { X86::ROL32ri, X86::ROL32mi },
- { X86::ROL64r1, X86::ROL64m1 },
- { X86::ROL64rCL, X86::ROL64mCL },
- { X86::ROL64ri, X86::ROL64mi },
- { X86::ROL8r1, X86::ROL8m1 },
- { X86::ROL8rCL, X86::ROL8mCL },
- { X86::ROL8ri, X86::ROL8mi },
- { X86::ROR16r1, X86::ROR16m1 },
- { X86::ROR16rCL, X86::ROR16mCL },
- { X86::ROR16ri, X86::ROR16mi },
- { X86::ROR32r1, X86::ROR32m1 },
- { X86::ROR32rCL, X86::ROR32mCL },
- { X86::ROR32ri, X86::ROR32mi },
- { X86::ROR64r1, X86::ROR64m1 },
- { X86::ROR64rCL, X86::ROR64mCL },
- { X86::ROR64ri, X86::ROR64mi },
- { X86::ROR8r1, X86::ROR8m1 },
- { X86::ROR8rCL, X86::ROR8mCL },
- { X86::ROR8ri, X86::ROR8mi },
- { X86::SAR16r1, X86::SAR16m1 },
- { X86::SAR16rCL, X86::SAR16mCL },
- { X86::SAR16ri, X86::SAR16mi },
- { X86::SAR32r1, X86::SAR32m1 },
- { X86::SAR32rCL, X86::SAR32mCL },
- { X86::SAR32ri, X86::SAR32mi },
- { X86::SAR64r1, X86::SAR64m1 },
- { X86::SAR64rCL, X86::SAR64mCL },
- { X86::SAR64ri, X86::SAR64mi },
- { X86::SAR8r1, X86::SAR8m1 },
- { X86::SAR8rCL, X86::SAR8mCL },
- { X86::SAR8ri, X86::SAR8mi },
- { X86::SBB32ri, X86::SBB32mi },
- { X86::SBB32ri8, X86::SBB32mi8 },
- { X86::SBB32rr, X86::SBB32mr },
- { X86::SBB64ri32, X86::SBB64mi32 },
- { X86::SBB64ri8, X86::SBB64mi8 },
- { X86::SBB64rr, X86::SBB64mr },
- { X86::SHL16rCL, X86::SHL16mCL },
- { X86::SHL16ri, X86::SHL16mi },
- { X86::SHL32rCL, X86::SHL32mCL },
- { X86::SHL32ri, X86::SHL32mi },
- { X86::SHL64rCL, X86::SHL64mCL },
- { X86::SHL64ri, X86::SHL64mi },
- { X86::SHL8rCL, X86::SHL8mCL },
- { X86::SHL8ri, X86::SHL8mi },
- { X86::SHLD16rrCL, X86::SHLD16mrCL },
- { X86::SHLD16rri8, X86::SHLD16mri8 },
- { X86::SHLD32rrCL, X86::SHLD32mrCL },
- { X86::SHLD32rri8, X86::SHLD32mri8 },
- { X86::SHLD64rrCL, X86::SHLD64mrCL },
- { X86::SHLD64rri8, X86::SHLD64mri8 },
- { X86::SHR16r1, X86::SHR16m1 },
- { X86::SHR16rCL, X86::SHR16mCL },
- { X86::SHR16ri, X86::SHR16mi },
- { X86::SHR32r1, X86::SHR32m1 },
- { X86::SHR32rCL, X86::SHR32mCL },
- { X86::SHR32ri, X86::SHR32mi },
- { X86::SHR64r1, X86::SHR64m1 },
- { X86::SHR64rCL, X86::SHR64mCL },
- { X86::SHR64ri, X86::SHR64mi },
- { X86::SHR8r1, X86::SHR8m1 },
- { X86::SHR8rCL, X86::SHR8mCL },
- { X86::SHR8ri, X86::SHR8mi },
- { X86::SHRD16rrCL, X86::SHRD16mrCL },
- { X86::SHRD16rri8, X86::SHRD16mri8 },
- { X86::SHRD32rrCL, X86::SHRD32mrCL },
- { X86::SHRD32rri8, X86::SHRD32mri8 },
- { X86::SHRD64rrCL, X86::SHRD64mrCL },
- { X86::SHRD64rri8, X86::SHRD64mri8 },
- { X86::SUB16ri, X86::SUB16mi },
- { X86::SUB16ri8, X86::SUB16mi8 },
- { X86::SUB16rr, X86::SUB16mr },
- { X86::SUB32ri, X86::SUB32mi },
- { X86::SUB32ri8, X86::SUB32mi8 },
- { X86::SUB32rr, X86::SUB32mr },
- { X86::SUB64ri32, X86::SUB64mi32 },
- { X86::SUB64ri8, X86::SUB64mi8 },
- { X86::SUB64rr, X86::SUB64mr },
- { X86::SUB8ri, X86::SUB8mi },
- { X86::SUB8rr, X86::SUB8mr },
- { X86::XOR16ri, X86::XOR16mi },
- { X86::XOR16ri8, X86::XOR16mi8 },
- { X86::XOR16rr, X86::XOR16mr },
- { X86::XOR32ri, X86::XOR32mi },
- { X86::XOR32ri8, X86::XOR32mi8 },
- { X86::XOR32rr, X86::XOR32mr },
- { X86::XOR64ri32, X86::XOR64mi32 },
- { X86::XOR64ri8, X86::XOR64mi8 },
- { X86::XOR64rr, X86::XOR64mr },
- { X86::XOR8ri, X86::XOR8mi },
- { X86::XOR8rr, X86::XOR8mr }
+ static const unsigned OpTbl2Addr[][3] = {
+ { X86::ADC32ri, X86::ADC32mi, 0 },
+ { X86::ADC32ri8, X86::ADC32mi8, 0 },
+ { X86::ADC32rr, X86::ADC32mr, 0 },
+ { X86::ADC64ri32, X86::ADC64mi32, 0 },
+ { X86::ADC64ri8, X86::ADC64mi8, 0 },
+ { X86::ADC64rr, X86::ADC64mr, 0 },
+ { X86::ADD16ri, X86::ADD16mi, 0 },
+ { X86::ADD16ri8, X86::ADD16mi8, 0 },
+ { X86::ADD16ri_DB, X86::ADD16mi, TB_NO_REVERSE },
+ { X86::ADD16ri8_DB, X86::ADD16mi8, TB_NO_REVERSE },
+ { X86::ADD16rr, X86::ADD16mr, 0 },
+ { X86::ADD16rr_DB, X86::ADD16mr, TB_NO_REVERSE },
+ { X86::ADD32ri, X86::ADD32mi, 0 },
+ { X86::ADD32ri8, X86::ADD32mi8, 0 },
+ { X86::ADD32ri_DB, X86::ADD32mi, TB_NO_REVERSE },
+ { X86::ADD32ri8_DB, X86::ADD32mi8, TB_NO_REVERSE },
+ { X86::ADD32rr, X86::ADD32mr, 0 },
+ { X86::ADD32rr_DB, X86::ADD32mr, TB_NO_REVERSE },
+ { X86::ADD64ri32, X86::ADD64mi32, 0 },
+ { X86::ADD64ri8, X86::ADD64mi8, 0 },
+ { X86::ADD64ri32_DB,X86::ADD64mi32, TB_NO_REVERSE },
+ { X86::ADD64ri8_DB, X86::ADD64mi8, TB_NO_REVERSE },
+ { X86::ADD64rr, X86::ADD64mr, 0 },
+ { X86::ADD64rr_DB, X86::ADD64mr, TB_NO_REVERSE },
+ { X86::ADD8ri, X86::ADD8mi, 0 },
+ { X86::ADD8rr, X86::ADD8mr, 0 },
+ { X86::AND16ri, X86::AND16mi, 0 },
+ { X86::AND16ri8, X86::AND16mi8, 0 },
+ { X86::AND16rr, X86::AND16mr, 0 },
+ { X86::AND32ri, X86::AND32mi, 0 },
+ { X86::AND32ri8, X86::AND32mi8, 0 },
+ { X86::AND32rr, X86::AND32mr, 0 },
+ { X86::AND64ri32, X86::AND64mi32, 0 },
+ { X86::AND64ri8, X86::AND64mi8, 0 },
+ { X86::AND64rr, X86::AND64mr, 0 },
+ { X86::AND8ri, X86::AND8mi, 0 },
+ { X86::AND8rr, X86::AND8mr, 0 },
+ { X86::DEC16r, X86::DEC16m, 0 },
+ { X86::DEC32r, X86::DEC32m, 0 },
+ { X86::DEC64_16r, X86::DEC64_16m, 0 },
+ { X86::DEC64_32r, X86::DEC64_32m, 0 },
+ { X86::DEC64r, X86::DEC64m, 0 },
+ { X86::DEC8r, X86::DEC8m, 0 },
+ { X86::INC16r, X86::INC16m, 0 },
+ { X86::INC32r, X86::INC32m, 0 },
+ { X86::INC64_16r, X86::INC64_16m, 0 },
+ { X86::INC64_32r, X86::INC64_32m, 0 },
+ { X86::INC64r, X86::INC64m, 0 },
+ { X86::INC8r, X86::INC8m, 0 },
+ { X86::NEG16r, X86::NEG16m, 0 },
+ { X86::NEG32r, X86::NEG32m, 0 },
+ { X86::NEG64r, X86::NEG64m, 0 },
+ { X86::NEG8r, X86::NEG8m, 0 },
+ { X86::NOT16r, X86::NOT16m, 0 },
+ { X86::NOT32r, X86::NOT32m, 0 },
+ { X86::NOT64r, X86::NOT64m, 0 },
+ { X86::NOT8r, X86::NOT8m, 0 },
+ { X86::OR16ri, X86::OR16mi, 0 },
+ { X86::OR16ri8, X86::OR16mi8, 0 },
+ { X86::OR16rr, X86::OR16mr, 0 },
+ { X86::OR32ri, X86::OR32mi, 0 },
+ { X86::OR32ri8, X86::OR32mi8, 0 },
+ { X86::OR32rr, X86::OR32mr, 0 },
+ { X86::OR64ri32, X86::OR64mi32, 0 },
+ { X86::OR64ri8, X86::OR64mi8, 0 },
+ { X86::OR64rr, X86::OR64mr, 0 },
+ { X86::OR8ri, X86::OR8mi, 0 },
+ { X86::OR8rr, X86::OR8mr, 0 },
+ { X86::ROL16r1, X86::ROL16m1, 0 },
+ { X86::ROL16rCL, X86::ROL16mCL, 0 },
+ { X86::ROL16ri, X86::ROL16mi, 0 },
+ { X86::ROL32r1, X86::ROL32m1, 0 },
+ { X86::ROL32rCL, X86::ROL32mCL, 0 },
+ { X86::ROL32ri, X86::ROL32mi, 0 },
+ { X86::ROL64r1, X86::ROL64m1, 0 },
+ { X86::ROL64rCL, X86::ROL64mCL, 0 },
+ { X86::ROL64ri, X86::ROL64mi, 0 },
+ { X86::ROL8r1, X86::ROL8m1, 0 },
+ { X86::ROL8rCL, X86::ROL8mCL, 0 },
+ { X86::ROL8ri, X86::ROL8mi, 0 },
+ { X86::ROR16r1, X86::ROR16m1, 0 },
+ { X86::ROR16rCL, X86::ROR16mCL, 0 },
+ { X86::ROR16ri, X86::ROR16mi, 0 },
+ { X86::ROR32r1, X86::ROR32m1, 0 },
+ { X86::ROR32rCL, X86::ROR32mCL, 0 },
+ { X86::ROR32ri, X86::ROR32mi, 0 },
+ { X86::ROR64r1, X86::ROR64m1, 0 },
+ { X86::ROR64rCL, X86::ROR64mCL, 0 },
+ { X86::ROR64ri, X86::ROR64mi, 0 },
+ { X86::ROR8r1, X86::ROR8m1, 0 },
+ { X86::ROR8rCL, X86::ROR8mCL, 0 },
+ { X86::ROR8ri, X86::ROR8mi, 0 },
+ { X86::SAR16r1, X86::SAR16m1, 0 },
+ { X86::SAR16rCL, X86::SAR16mCL, 0 },
+ { X86::SAR16ri, X86::SAR16mi, 0 },
+ { X86::SAR32r1, X86::SAR32m1, 0 },
+ { X86::SAR32rCL, X86::SAR32mCL, 0 },
+ { X86::SAR32ri, X86::SAR32mi, 0 },
+ { X86::SAR64r1, X86::SAR64m1, 0 },
+ { X86::SAR64rCL, X86::SAR64mCL, 0 },
+ { X86::SAR64ri, X86::SAR64mi, 0 },
+ { X86::SAR8r1, X86::SAR8m1, 0 },
+ { X86::SAR8rCL, X86::SAR8mCL, 0 },
+ { X86::SAR8ri, X86::SAR8mi, 0 },
+ { X86::SBB32ri, X86::SBB32mi, 0 },
+ { X86::SBB32ri8, X86::SBB32mi8, 0 },
+ { X86::SBB32rr, X86::SBB32mr, 0 },
+ { X86::SBB64ri32, X86::SBB64mi32, 0 },
+ { X86::SBB64ri8, X86::SBB64mi8, 0 },
+ { X86::SBB64rr, X86::SBB64mr, 0 },
+ { X86::SHL16rCL, X86::SHL16mCL, 0 },
+ { X86::SHL16ri, X86::SHL16mi, 0 },
+ { X86::SHL32rCL, X86::SHL32mCL, 0 },
+ { X86::SHL32ri, X86::SHL32mi, 0 },
+ { X86::SHL64rCL, X86::SHL64mCL, 0 },
+ { X86::SHL64ri, X86::SHL64mi, 0 },
+ { X86::SHL8rCL, X86::SHL8mCL, 0 },
+ { X86::SHL8ri, X86::SHL8mi, 0 },
+ { X86::SHLD16rrCL, X86::SHLD16mrCL, 0 },
+ { X86::SHLD16rri8, X86::SHLD16mri8, 0 },
+ { X86::SHLD32rrCL, X86::SHLD32mrCL, 0 },
+ { X86::SHLD32rri8, X86::SHLD32mri8, 0 },
+ { X86::SHLD64rrCL, X86::SHLD64mrCL, 0 },
+ { X86::SHLD64rri8, X86::SHLD64mri8, 0 },
+ { X86::SHR16r1, X86::SHR16m1, 0 },
+ { X86::SHR16rCL, X86::SHR16mCL, 0 },
+ { X86::SHR16ri, X86::SHR16mi, 0 },
+ { X86::SHR32r1, X86::SHR32m1, 0 },
+ { X86::SHR32rCL, X86::SHR32mCL, 0 },
+ { X86::SHR32ri, X86::SHR32mi, 0 },
+ { X86::SHR64r1, X86::SHR64m1, 0 },
+ { X86::SHR64rCL, X86::SHR64mCL, 0 },
+ { X86::SHR64ri, X86::SHR64mi, 0 },
+ { X86::SHR8r1, X86::SHR8m1, 0 },
+ { X86::SHR8rCL, X86::SHR8mCL, 0 },
+ { X86::SHR8ri, X86::SHR8mi, 0 },
+ { X86::SHRD16rrCL, X86::SHRD16mrCL, 0 },
+ { X86::SHRD16rri8, X86::SHRD16mri8, 0 },
+ { X86::SHRD32rrCL, X86::SHRD32mrCL, 0 },
+ { X86::SHRD32rri8, X86::SHRD32mri8, 0 },
+ { X86::SHRD64rrCL, X86::SHRD64mrCL, 0 },
+ { X86::SHRD64rri8, X86::SHRD64mri8, 0 },
+ { X86::SUB16ri, X86::SUB16mi, 0 },
+ { X86::SUB16ri8, X86::SUB16mi8, 0 },
+ { X86::SUB16rr, X86::SUB16mr, 0 },
+ { X86::SUB32ri, X86::SUB32mi, 0 },
+ { X86::SUB32ri8, X86::SUB32mi8, 0 },
+ { X86::SUB32rr, X86::SUB32mr, 0 },
+ { X86::SUB64ri32, X86::SUB64mi32, 0 },
+ { X86::SUB64ri8, X86::SUB64mi8, 0 },
+ { X86::SUB64rr, X86::SUB64mr, 0 },
+ { X86::SUB8ri, X86::SUB8mi, 0 },
+ { X86::SUB8rr, X86::SUB8mr, 0 },
+ { X86::XOR16ri, X86::XOR16mi, 0 },
+ { X86::XOR16ri8, X86::XOR16mi8, 0 },
+ { X86::XOR16rr, X86::XOR16mr, 0 },
+ { X86::XOR32ri, X86::XOR32mi, 0 },
+ { X86::XOR32ri8, X86::XOR32mi8, 0 },
+ { X86::XOR32rr, X86::XOR32mr, 0 },
+ { X86::XOR64ri32, X86::XOR64mi32, 0 },
+ { X86::XOR64ri8, X86::XOR64mi8, 0 },
+ { X86::XOR64rr, X86::XOR64mr, 0 },
+ { X86::XOR8ri, X86::XOR8mi, 0 },
+ { X86::XOR8rr, X86::XOR8mr, 0 }
};
for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) {
unsigned RegOp = OpTbl2Addr[i][0];
- unsigned MemOp = OpTbl2Addr[i][1] & ~TB_FLAGS;
- assert(!RegOp2MemOpTable2Addr.count(RegOp) && "Duplicated entries?");
- RegOp2MemOpTable2Addr[RegOp] = std::make_pair(MemOp, 0U);
-
- // If this is not a reversible operation (because there is a many->one)
- // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
- if (OpTbl2Addr[i][1] & TB_NOT_REVERSABLE)
- continue;
-
- // Index 0, folded load and store, no alignment requirement.
- unsigned AuxInfo = 0 | (1 << 4) | (1 << 5);
-
- assert(!MemOp2RegOpTable.count(MemOp) &&
- "Duplicated entries in unfolding maps?");
- MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
- }
-
- // If the third value is 1, then it's folding either a load or a store.
- static const unsigned OpTbl0[][4] = {
- { X86::BT16ri8, X86::BT16mi8, 1, 0 },
- { X86::BT32ri8, X86::BT32mi8, 1, 0 },
- { X86::BT64ri8, X86::BT64mi8, 1, 0 },
- { X86::CALL32r, X86::CALL32m, 1, 0 },
- { X86::CALL64r, X86::CALL64m, 1, 0 },
- { X86::WINCALL64r, X86::WINCALL64m, 1, 0 },
- { X86::CMP16ri, X86::CMP16mi, 1, 0 },
- { X86::CMP16ri8, X86::CMP16mi8, 1, 0 },
- { X86::CMP16rr, X86::CMP16mr, 1, 0 },
- { X86::CMP32ri, X86::CMP32mi, 1, 0 },
- { X86::CMP32ri8, X86::CMP32mi8, 1, 0 },
- { X86::CMP32rr, X86::CMP32mr, 1, 0 },
- { X86::CMP64ri32, X86::CMP64mi32, 1, 0 },
- { X86::CMP64ri8, X86::CMP64mi8, 1, 0 },
- { X86::CMP64rr, X86::CMP64mr, 1, 0 },
- { X86::CMP8ri, X86::CMP8mi, 1, 0 },
- { X86::CMP8rr, X86::CMP8mr, 1, 0 },
- { X86::DIV16r, X86::DIV16m, 1, 0 },
- { X86::DIV32r, X86::DIV32m, 1, 0 },
- { X86::DIV64r, X86::DIV64m, 1, 0 },
- { X86::DIV8r, X86::DIV8m, 1, 0 },
- { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 },
- { X86::FsMOVAPDrr, X86::MOVSDmr | TB_NOT_REVERSABLE , 0, 0 },
- { X86::FsMOVAPSrr, X86::MOVSSmr | TB_NOT_REVERSABLE , 0, 0 },
- { X86::IDIV16r, X86::IDIV16m, 1, 0 },
- { X86::IDIV32r, X86::IDIV32m, 1, 0 },
- { X86::IDIV64r, X86::IDIV64m, 1, 0 },
- { X86::IDIV8r, X86::IDIV8m, 1, 0 },
- { X86::IMUL16r, X86::IMUL16m, 1, 0 },
- { X86::IMUL32r, X86::IMUL32m, 1, 0 },
- { X86::IMUL64r, X86::IMUL64m, 1, 0 },
- { X86::IMUL8r, X86::IMUL8m, 1, 0 },
- { X86::JMP32r, X86::JMP32m, 1, 0 },
- { X86::JMP64r, X86::JMP64m, 1, 0 },
- { X86::MOV16ri, X86::MOV16mi, 0, 0 },
- { X86::MOV16rr, X86::MOV16mr, 0, 0 },
- { X86::MOV32ri, X86::MOV32mi, 0, 0 },
- { X86::MOV32rr, X86::MOV32mr, 0, 0 },
- { X86::MOV64ri32, X86::MOV64mi32, 0, 0 },
- { X86::MOV64rr, X86::MOV64mr, 0, 0 },
- { X86::MOV8ri, X86::MOV8mi, 0, 0 },
- { X86::MOV8rr, X86::MOV8mr, 0, 0 },
- { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0, 0 },
- { X86::MOVAPDrr, X86::MOVAPDmr, 0, 16 },
- { X86::MOVAPSrr, X86::MOVAPSmr, 0, 16 },
- { X86::MOVDQArr, X86::MOVDQAmr, 0, 16 },
- { X86::VMOVAPDYrr, X86::VMOVAPDYmr, 0, 32 },
- { X86::VMOVAPSYrr, X86::VMOVAPSYmr, 0, 32 },
- { X86::VMOVDQAYrr, X86::VMOVDQAYmr, 0, 32 },
- { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 },
- { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 },
- { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 },
- { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 },
- { X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 },
- { X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 },
- { X86::VMOVUPDYrr, X86::VMOVUPDYmr, 0, 0 },
- { X86::VMOVUPSYrr, X86::VMOVUPSYmr, 0, 0 },
- { X86::MUL16r, X86::MUL16m, 1, 0 },
- { X86::MUL32r, X86::MUL32m, 1, 0 },
- { X86::MUL64r, X86::MUL64m, 1, 0 },
- { X86::MUL8r, X86::MUL8m, 1, 0 },
- { X86::SETAEr, X86::SETAEm, 0, 0 },
- { X86::SETAr, X86::SETAm, 0, 0 },
- { X86::SETBEr, X86::SETBEm, 0, 0 },
- { X86::SETBr, X86::SETBm, 0, 0 },
- { X86::SETEr, X86::SETEm, 0, 0 },
- { X86::SETGEr, X86::SETGEm, 0, 0 },
- { X86::SETGr, X86::SETGm, 0, 0 },
- { X86::SETLEr, X86::SETLEm, 0, 0 },
- { X86::SETLr, X86::SETLm, 0, 0 },
- { X86::SETNEr, X86::SETNEm, 0, 0 },
- { X86::SETNOr, X86::SETNOm, 0, 0 },
- { X86::SETNPr, X86::SETNPm, 0, 0 },
- { X86::SETNSr, X86::SETNSm, 0, 0 },
- { X86::SETOr, X86::SETOm, 0, 0 },
- { X86::SETPr, X86::SETPm, 0, 0 },
- { X86::SETSr, X86::SETSm, 0, 0 },
- { X86::TAILJMPr, X86::TAILJMPm, 1, 0 },
- { X86::TAILJMPr64, X86::TAILJMPm64, 1, 0 },
- { X86::TEST16ri, X86::TEST16mi, 1, 0 },
- { X86::TEST32ri, X86::TEST32mi, 1, 0 },
- { X86::TEST64ri32, X86::TEST64mi32, 1, 0 },
- { X86::TEST8ri, X86::TEST8mi, 1, 0 }
+ unsigned MemOp = OpTbl2Addr[i][1];
+ unsigned Flags = OpTbl2Addr[i][2];
+ AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable,
+ RegOp, MemOp,
+ // Index 0, folded load and store, no alignment requirement.
+ Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
+ }
+
+ static const unsigned OpTbl0[][3] = {
+ { X86::BT16ri8, X86::BT16mi8, TB_FOLDED_LOAD },
+ { X86::BT32ri8, X86::BT32mi8, TB_FOLDED_LOAD },
+ { X86::BT64ri8, X86::BT64mi8, TB_FOLDED_LOAD },
+ { X86::CALL32r, X86::CALL32m, TB_FOLDED_LOAD },
+ { X86::CALL64r, X86::CALL64m, TB_FOLDED_LOAD },
+ { X86::WINCALL64r, X86::WINCALL64m, TB_FOLDED_LOAD },
+ { X86::CMP16ri, X86::CMP16mi, TB_FOLDED_LOAD },
+ { X86::CMP16ri8, X86::CMP16mi8, TB_FOLDED_LOAD },
+ { X86::CMP16rr, X86::CMP16mr, TB_FOLDED_LOAD },
+ { X86::CMP32ri, X86::CMP32mi, TB_FOLDED_LOAD },
+ { X86::CMP32ri8, X86::CMP32mi8, TB_FOLDED_LOAD },
+ { X86::CMP32rr, X86::CMP32mr, TB_FOLDED_LOAD },
+ { X86::CMP64ri32, X86::CMP64mi32, TB_FOLDED_LOAD },
+ { X86::CMP64ri8, X86::CMP64mi8, TB_FOLDED_LOAD },
+ { X86::CMP64rr, X86::CMP64mr, TB_FOLDED_LOAD },
+ { X86::CMP8ri, X86::CMP8mi, TB_FOLDED_LOAD },
+ { X86::CMP8rr, X86::CMP8mr, TB_FOLDED_LOAD },
+ { X86::DIV16r, X86::DIV16m, TB_FOLDED_LOAD },
+ { X86::DIV32r, X86::DIV32m, TB_FOLDED_LOAD },
+ { X86::DIV64r, X86::DIV64m, TB_FOLDED_LOAD },
+ { X86::DIV8r, X86::DIV8m, TB_FOLDED_LOAD },
+ { X86::EXTRACTPSrr, X86::EXTRACTPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::FsMOVAPDrr, X86::MOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE },
+ { X86::FsMOVAPSrr, X86::MOVSSmr, TB_FOLDED_STORE | TB_NO_REVERSE },
+ { X86::IDIV16r, X86::IDIV16m, TB_FOLDED_LOAD },
+ { X86::IDIV32r, X86::IDIV32m, TB_FOLDED_LOAD },
+ { X86::IDIV64r, X86::IDIV64m, TB_FOLDED_LOAD },
+ { X86::IDIV8r, X86::IDIV8m, TB_FOLDED_LOAD },
+ { X86::IMUL16r, X86::IMUL16m, TB_FOLDED_LOAD },
+ { X86::IMUL32r, X86::IMUL32m, TB_FOLDED_LOAD },
+ { X86::IMUL64r, X86::IMUL64m, TB_FOLDED_LOAD },
+ { X86::IMUL8r, X86::IMUL8m, TB_FOLDED_LOAD },
+ { X86::JMP32r, X86::JMP32m, TB_FOLDED_LOAD },
+ { X86::JMP64r, X86::JMP64m, TB_FOLDED_LOAD },
+ { X86::MOV16ri, X86::MOV16mi, TB_FOLDED_STORE },
+ { X86::MOV16rr, X86::MOV16mr, TB_FOLDED_STORE },
+ { X86::MOV32ri, X86::MOV32mi, TB_FOLDED_STORE },
+ { X86::MOV32rr, X86::MOV32mr, TB_FOLDED_STORE },
+ { X86::MOV64ri32, X86::MOV64mi32, TB_FOLDED_STORE },
+ { X86::MOV64rr, X86::MOV64mr, TB_FOLDED_STORE },
+ { X86::MOV8ri, X86::MOV8mi, TB_FOLDED_STORE },
+ { X86::MOV8rr, X86::MOV8mr, TB_FOLDED_STORE },
+ { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, TB_FOLDED_STORE },
+ { X86::MOVAPDrr, X86::MOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::MOVAPSrr, X86::MOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::MOVDQArr, X86::MOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, TB_FOLDED_STORE },
+ { X86::MOVPQIto64rr,X86::MOVPQI2QImr, TB_FOLDED_STORE },
+ { X86::MOVSDto64rr, X86::MOVSDto64mr, TB_FOLDED_STORE },
+ { X86::MOVSS2DIrr, X86::MOVSS2DImr, TB_FOLDED_STORE },
+ { X86::MOVUPDrr, X86::MOVUPDmr, TB_FOLDED_STORE },
+ { X86::MOVUPSrr, X86::MOVUPSmr, TB_FOLDED_STORE },
+ { X86::MUL16r, X86::MUL16m, TB_FOLDED_LOAD },
+ { X86::MUL32r, X86::MUL32m, TB_FOLDED_LOAD },
+ { X86::MUL64r, X86::MUL64m, TB_FOLDED_LOAD },
+ { X86::MUL8r, X86::MUL8m, TB_FOLDED_LOAD },
+ { X86::SETAEr, X86::SETAEm, TB_FOLDED_STORE },
+ { X86::SETAr, X86::SETAm, TB_FOLDED_STORE },
+ { X86::SETBEr, X86::SETBEm, TB_FOLDED_STORE },
+ { X86::SETBr, X86::SETBm, TB_FOLDED_STORE },
+ { X86::SETEr, X86::SETEm, TB_FOLDED_STORE },
+ { X86::SETGEr, X86::SETGEm, TB_FOLDED_STORE },
+ { X86::SETGr, X86::SETGm, TB_FOLDED_STORE },
+ { X86::SETLEr, X86::SETLEm, TB_FOLDED_STORE },
+ { X86::SETLr, X86::SETLm, TB_FOLDED_STORE },
+ { X86::SETNEr, X86::SETNEm, TB_FOLDED_STORE },
+ { X86::SETNOr, X86::SETNOm, TB_FOLDED_STORE },
+ { X86::SETNPr, X86::SETNPm, TB_FOLDED_STORE },
+ { X86::SETNSr, X86::SETNSm, TB_FOLDED_STORE },
+ { X86::SETOr, X86::SETOm, TB_FOLDED_STORE },
+ { X86::SETPr, X86::SETPm, TB_FOLDED_STORE },
+ { X86::SETSr, X86::SETSm, TB_FOLDED_STORE },
+ { X86::TAILJMPr, X86::TAILJMPm, TB_FOLDED_LOAD },
+ { X86::TAILJMPr64, X86::TAILJMPm64, TB_FOLDED_LOAD },
+ { X86::TEST16ri, X86::TEST16mi, TB_FOLDED_LOAD },
+ { X86::TEST32ri, X86::TEST32mi, TB_FOLDED_LOAD },
+ { X86::TEST64ri32, X86::TEST64mi32, TB_FOLDED_LOAD },
+ { X86::TEST8ri, X86::TEST8mi, TB_FOLDED_LOAD },
+ // AVX 128-bit versions of foldable instructions
+ { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::FsVMOVAPDrr, X86::VMOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE },
+ { X86::FsVMOVAPSrr, X86::VMOVSSmr, TB_FOLDED_STORE | TB_NO_REVERSE },
+ { X86::VMOVAPDrr, X86::VMOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVAPSrr, X86::VMOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVDQArr, X86::VMOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVPDI2DIrr,X86::VMOVPDI2DImr, TB_FOLDED_STORE },
+ { X86::VMOVPQIto64rr, X86::VMOVPQI2QImr,TB_FOLDED_STORE },
+ { X86::VMOVSDto64rr,X86::VMOVSDto64mr, TB_FOLDED_STORE },
+ { X86::VMOVSS2DIrr, X86::VMOVSS2DImr, TB_FOLDED_STORE },
+ { X86::VMOVUPDrr, X86::VMOVUPDmr, TB_FOLDED_STORE },
+ { X86::VMOVUPSrr, X86::VMOVUPSmr, TB_FOLDED_STORE },
+ // AVX 256-bit foldable instructions
+ { X86::VMOVAPDYrr, X86::VMOVAPDYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVAPSYrr, X86::VMOVAPSYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVDQAYrr, X86::VMOVDQAYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVUPDYrr, X86::VMOVUPDYmr, TB_FOLDED_STORE },
+ { X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE }
};
for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
unsigned RegOp = OpTbl0[i][0];
- unsigned MemOp = OpTbl0[i][1] & ~TB_FLAGS;
- unsigned FoldedLoad = OpTbl0[i][2];
- unsigned Align = OpTbl0[i][3];
- assert(!RegOp2MemOpTable0.count(RegOp) && "Duplicated entries?");
- RegOp2MemOpTable0[RegOp] = std::make_pair(MemOp, Align);
-
- // If this is not a reversible operation (because there is a many->one)
- // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
- if (OpTbl0[i][1] & TB_NOT_REVERSABLE)
- continue;
-
- // Index 0, folded load or store.
- unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5);
- assert(!MemOp2RegOpTable.count(MemOp) && "Duplicated entries?");
- MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
+ unsigned MemOp = OpTbl0[i][1];
+ unsigned Flags = OpTbl0[i][2];
+ AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable,
+ RegOp, MemOp, TB_INDEX_0 | Flags);
}
static const unsigned OpTbl1[][3] = {
- { X86::CMP16rr, X86::CMP16rm, 0 },
- { X86::CMP32rr, X86::CMP32rm, 0 },
- { X86::CMP64rr, X86::CMP64rm, 0 },
- { X86::CMP8rr, X86::CMP8rm, 0 },
- { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 },
- { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 },
- { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 },
- { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 },
- { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 },
- { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 },
- { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 },
- { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 },
- { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 },
- { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 },
- { X86::FsMOVAPDrr, X86::MOVSDrm | TB_NOT_REVERSABLE , 0 },
- { X86::FsMOVAPSrr, X86::MOVSSrm | TB_NOT_REVERSABLE , 0 },
- { X86::IMUL16rri, X86::IMUL16rmi, 0 },
- { X86::IMUL16rri8, X86::IMUL16rmi8, 0 },
- { X86::IMUL32rri, X86::IMUL32rmi, 0 },
- { X86::IMUL32rri8, X86::IMUL32rmi8, 0 },
- { X86::IMUL64rri32, X86::IMUL64rmi32, 0 },
- { X86::IMUL64rri8, X86::IMUL64rmi8, 0 },
- { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 },
- { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 },
- { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, 16 },
- { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, 16 },
- { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, 16 },
- { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, 16 },
- { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, 16 },
- { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 },
- { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 },
- { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 },
- { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 },
- { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
- { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 },
- { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 },
- { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 },
- { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 },
- { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 },
- { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm, 0 },
- { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, 16 },
- { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, 16 },
- { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 },
- { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 },
- { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 },
- { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 },
- { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, 0 },
- { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 },
- { X86::MOV16rr, X86::MOV16rm, 0 },
- { X86::MOV32rr, X86::MOV32rm, 0 },
- { X86::MOV64rr, X86::MOV64rm, 0 },
- { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 },
- { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 },
- { X86::MOV8rr, X86::MOV8rm, 0 },
- { X86::MOVAPDrr, X86::MOVAPDrm, 16 },
- { X86::MOVAPSrr, X86::MOVAPSrm, 16 },
- { X86::VMOVAPDYrr, X86::VMOVAPDYrm, 32 },
- { X86::VMOVAPSYrr, X86::VMOVAPSYrm, 32 },
- { X86::MOVDDUPrr, X86::MOVDDUPrm, 0 },
- { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 },
- { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 },
- { X86::MOVDQArr, X86::MOVDQArm, 16 },
- { X86::VMOVDQAYrr, X86::VMOVDQAYrm, 16 },
- { X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 },
- { X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 },
- { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 },
- { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 },
- { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 },
- { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 },
- { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 },
- { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 },
- { X86::MOVUPDrr, X86::MOVUPDrm, 16 },
- { X86::MOVUPSrr, X86::MOVUPSrm, 0 },
- { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 },
- { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 },
- { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 },
- { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 },
- { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 },
- { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 },
- { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 },
- { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 },
- { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 },
- { X86::MOVZX64rr16, X86::MOVZX64rm16, 0 },
- { X86::MOVZX64rr32, X86::MOVZX64rm32, 0 },
- { X86::MOVZX64rr8, X86::MOVZX64rm8, 0 },
- { X86::PSHUFDri, X86::PSHUFDmi, 16 },
- { X86::PSHUFHWri, X86::PSHUFHWmi, 16 },
- { X86::PSHUFLWri, X86::PSHUFLWmi, 16 },
- { X86::RCPPSr, X86::RCPPSm, 16 },
- { X86::RCPPSr_Int, X86::RCPPSm_Int, 16 },
- { X86::RSQRTPSr, X86::RSQRTPSm, 16 },
- { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int, 16 },
- { X86::RSQRTSSr, X86::RSQRTSSm, 0 },
- { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 },
- { X86::SQRTPDr, X86::SQRTPDm, 16 },
- { X86::SQRTPDr_Int, X86::SQRTPDm_Int, 16 },
- { X86::SQRTPSr, X86::SQRTPSm, 16 },
- { X86::SQRTPSr_Int, X86::SQRTPSm_Int, 16 },
- { X86::SQRTSDr, X86::SQRTSDm, 0 },
- { X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 },
- { X86::SQRTSSr, X86::SQRTSSm, 0 },
- { X86::SQRTSSr_Int, X86::SQRTSSm_Int, 0 },
- { X86::TEST16rr, X86::TEST16rm, 0 },
- { X86::TEST32rr, X86::TEST32rm, 0 },
- { X86::TEST64rr, X86::TEST64rm, 0 },
- { X86::TEST8rr, X86::TEST8rm, 0 },
+ { X86::CMP16rr, X86::CMP16rm, 0 },
+ { X86::CMP32rr, X86::CMP32rm, 0 },
+ { X86::CMP64rr, X86::CMP64rm, 0 },
+ { X86::CMP8rr, X86::CMP8rm, 0 },
+ { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 },
+ { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 },
+ { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 },
+ { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 },
+ { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 },
+ { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 },
+ { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 },
+ { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 },
+ { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 },
+ { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 },
+ { X86::FsMOVAPDrr, X86::MOVSDrm, TB_NO_REVERSE },
+ { X86::FsMOVAPSrr, X86::MOVSSrm, TB_NO_REVERSE },
+ { X86::IMUL16rri, X86::IMUL16rmi, 0 },
+ { X86::IMUL16rri8, X86::IMUL16rmi8, 0 },
+ { X86::IMUL32rri, X86::IMUL32rmi, 0 },
+ { X86::IMUL32rri8, X86::IMUL32rmi8, 0 },
+ { X86::IMUL64rri32, X86::IMUL64rmi32, 0 },
+ { X86::IMUL64rri8, X86::IMUL64rmi8, 0 },
+ { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 },
+ { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 },
+ { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, TB_ALIGN_16 },
+ { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, TB_ALIGN_16 },
+ { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, TB_ALIGN_16 },
+ { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, TB_ALIGN_16 },
+ { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, TB_ALIGN_16 },
+ { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 },
+ { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 },
+ { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 },
+ { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 },
+ { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
+ { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 },
+ { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 },
+ { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 },
+ { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 },
+ { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 },
+ { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 },
+ { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 },
+ { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 },
+ { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 },
+ { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 },
+ { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, 0 },
+ { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 },
+ { X86::MOV16rr, X86::MOV16rm, 0 },
+ { X86::MOV32rr, X86::MOV32rm, 0 },
+ { X86::MOV64rr, X86::MOV64rm, 0 },
+ { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 },
+ { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 },
+ { X86::MOV8rr, X86::MOV8rm, 0 },
+ { X86::MOVAPDrr, X86::MOVAPDrm, TB_ALIGN_16 },
+ { X86::MOVAPSrr, X86::MOVAPSrm, TB_ALIGN_16 },
+ { X86::MOVDDUPrr, X86::MOVDDUPrm, 0 },
+ { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 },
+ { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 },
+ { X86::MOVDQArr, X86::MOVDQArm, TB_ALIGN_16 },
+ { X86::MOVSHDUPrr, X86::MOVSHDUPrm, TB_ALIGN_16 },
+ { X86::MOVSLDUPrr, X86::MOVSLDUPrm, TB_ALIGN_16 },
+ { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 },
+ { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 },
+ { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 },
+ { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 },
+ { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 },
+ { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 },
+ { X86::MOVUPDrr, X86::MOVUPDrm, TB_ALIGN_16 },
+ { X86::MOVUPSrr, X86::MOVUPSrm, 0 },
+ { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 },
+ { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 },
+ { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, TB_ALIGN_16 },
+ { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 },
+ { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 },
+ { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 },
+ { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 },
+ { X86::MOVZX64rr16, X86::MOVZX64rm16, 0 },
+ { X86::MOVZX64rr32, X86::MOVZX64rm32, 0 },
+ { X86::MOVZX64rr8, X86::MOVZX64rm8, 0 },
+ { X86::PSHUFDri, X86::PSHUFDmi, TB_ALIGN_16 },
+ { X86::PSHUFHWri, X86::PSHUFHWmi, TB_ALIGN_16 },
+ { X86::PSHUFLWri, X86::PSHUFLWmi, TB_ALIGN_16 },
+ { X86::RCPPSr, X86::RCPPSm, TB_ALIGN_16 },
+ { X86::RCPPSr_Int, X86::RCPPSm_Int, TB_ALIGN_16 },
+ { X86::RSQRTPSr, X86::RSQRTPSm, TB_ALIGN_16 },
+ { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int, TB_ALIGN_16 },
+ { X86::RSQRTSSr, X86::RSQRTSSm, 0 },
+ { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 },
+ { X86::SQRTPDr, X86::SQRTPDm, TB_ALIGN_16 },
+ { X86::SQRTPDr_Int, X86::SQRTPDm_Int, TB_ALIGN_16 },
+ { X86::SQRTPSr, X86::SQRTPSm, TB_ALIGN_16 },
+ { X86::SQRTPSr_Int, X86::SQRTPSm_Int, TB_ALIGN_16 },
+ { X86::SQRTSDr, X86::SQRTSDm, 0 },
+ { X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 },
+ { X86::SQRTSSr, X86::SQRTSSm, 0 },
+ { X86::SQRTSSr_Int, X86::SQRTSSm_Int, 0 },
+ { X86::TEST16rr, X86::TEST16rm, 0 },
+ { X86::TEST32rr, X86::TEST32rm, 0 },
+ { X86::TEST64rr, X86::TEST64rm, 0 },
+ { X86::TEST8rr, X86::TEST8rm, 0 },
// FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0
- { X86::UCOMISDrr, X86::UCOMISDrm, 0 },
- { X86::UCOMISSrr, X86::UCOMISSrm, 0 }
+ { X86::UCOMISDrr, X86::UCOMISDrm, 0 },
+ { X86::UCOMISSrr, X86::UCOMISSrm, 0 },
+ // AVX 128-bit versions of foldable instructions
+ { X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, 0 },
+ { X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, 0 },
+ { X86::Int_VCVTDQ2PDrr, X86::Int_VCVTDQ2PDrm, TB_ALIGN_16 },
+ { X86::Int_VCVTDQ2PSrr, X86::Int_VCVTDQ2PSrm, TB_ALIGN_16 },
+ { X86::Int_VCVTPD2DQrr, X86::Int_VCVTPD2DQrm, TB_ALIGN_16 },
+ { X86::Int_VCVTPD2PSrr, X86::Int_VCVTPD2PSrm, TB_ALIGN_16 },
+ { X86::Int_VCVTPS2DQrr, X86::Int_VCVTPS2DQrm, TB_ALIGN_16 },
+ { X86::Int_VCVTPS2PDrr, X86::Int_VCVTPS2PDrm, 0 },
+ { X86::Int_VUCOMISDrr, X86::Int_VUCOMISDrm, 0 },
+ { X86::Int_VUCOMISSrr, X86::Int_VUCOMISSrm, 0 },
+ { X86::FsVMOVAPDrr, X86::VMOVSDrm, TB_NO_REVERSE },
+ { X86::FsVMOVAPSrr, X86::VMOVSSrm, TB_NO_REVERSE },
+ { X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 },
+ { X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 },
+ { X86::VMOVAPDrr, X86::VMOVAPDrm, TB_ALIGN_16 },
+ { X86::VMOVAPSrr, X86::VMOVAPSrm, TB_ALIGN_16 },
+ { X86::VMOVDDUPrr, X86::VMOVDDUPrm, 0 },
+ { X86::VMOVDI2PDIrr, X86::VMOVDI2PDIrm, 0 },
+ { X86::VMOVDI2SSrr, X86::VMOVDI2SSrm, 0 },
+ { X86::VMOVDQArr, X86::VMOVDQArm, TB_ALIGN_16 },
+ { X86::VMOVSLDUPrr, X86::VMOVSLDUPrm, TB_ALIGN_16 },
+ { X86::VMOVSHDUPrr, X86::VMOVSHDUPrm, TB_ALIGN_16 },
+ { X86::VMOVUPDrr, X86::VMOVUPDrm, TB_ALIGN_16 },
+ { X86::VMOVUPSrr, X86::VMOVUPSrm, 0 },
+ { X86::VMOVZDI2PDIrr, X86::VMOVZDI2PDIrm, 0 },
+ { X86::VMOVZQI2PQIrr, X86::VMOVZQI2PQIrm, 0 },
+ { X86::VMOVZPQILo2PQIrr,X86::VMOVZPQILo2PQIrm, TB_ALIGN_16 },
+ { X86::VPSHUFDri, X86::VPSHUFDmi, TB_ALIGN_16 },
+ { X86::VPSHUFHWri, X86::VPSHUFHWmi, TB_ALIGN_16 },
+ { X86::VPSHUFLWri, X86::VPSHUFLWmi, TB_ALIGN_16 },
+ { X86::VRCPPSr, X86::VRCPPSm, TB_ALIGN_16 },
+ { X86::VRCPPSr_Int, X86::VRCPPSm_Int, TB_ALIGN_16 },
+ { X86::VRSQRTPSr, X86::VRSQRTPSm, TB_ALIGN_16 },
+ { X86::VRSQRTPSr_Int, X86::VRSQRTPSm_Int, TB_ALIGN_16 },
+ { X86::VSQRTPDr, X86::VSQRTPDm, TB_ALIGN_16 },
+ { X86::VSQRTPDr_Int, X86::VSQRTPDm_Int, TB_ALIGN_16 },
+ { X86::VSQRTPSr, X86::VSQRTPSm, TB_ALIGN_16 },
+ { X86::VSQRTPSr_Int, X86::VSQRTPSm_Int, TB_ALIGN_16 },
+ { X86::VUCOMISDrr, X86::VUCOMISDrm, 0 },
+ { X86::VUCOMISSrr, X86::VUCOMISSrm, 0 },
+ // AVX 256-bit foldable instructions
+ { X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 },
+ { X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 },
+ { X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_16 },
+ { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 },
+ { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 }
};
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
unsigned RegOp = OpTbl1[i][0];
- unsigned MemOp = OpTbl1[i][1] & ~TB_FLAGS;
- unsigned Align = OpTbl1[i][2];
- assert(!RegOp2MemOpTable1.count(RegOp) && "Duplicate entries");
- RegOp2MemOpTable1[RegOp] = std::make_pair(MemOp, Align);
-
- // If this is not a reversible operation (because there is a many->one)
- // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
- if (OpTbl1[i][1] & TB_NOT_REVERSABLE)
- continue;
-
- // Index 1, folded load
- unsigned AuxInfo = 1 | (1 << 4);
- assert(!MemOp2RegOpTable.count(MemOp) && "Duplicate entries");
- MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
+ unsigned MemOp = OpTbl1[i][1];
+ unsigned Flags = OpTbl1[i][2];
+ AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable,
+ RegOp, MemOp,
+ // Index 1, folded load
+ Flags | TB_INDEX_1 | TB_FOLDED_LOAD);
}
static const unsigned OpTbl2[][3] = {
- { X86::ADC32rr, X86::ADC32rm, 0 },
- { X86::ADC64rr, X86::ADC64rm, 0 },
- { X86::ADD16rr, X86::ADD16rm, 0 },
- { X86::ADD16rr_DB, X86::ADD16rm | TB_NOT_REVERSABLE, 0 },
- { X86::ADD32rr, X86::ADD32rm, 0 },
- { X86::ADD32rr_DB, X86::ADD32rm | TB_NOT_REVERSABLE, 0 },
- { X86::ADD64rr, X86::ADD64rm, 0 },
- { X86::ADD64rr_DB, X86::ADD64rm | TB_NOT_REVERSABLE, 0 },
- { X86::ADD8rr, X86::ADD8rm, 0 },
- { X86::ADDPDrr, X86::ADDPDrm, 16 },
- { X86::ADDPSrr, X86::ADDPSrm, 16 },
- { X86::ADDSDrr, X86::ADDSDrm, 0 },
- { X86::ADDSSrr, X86::ADDSSrm, 0 },
- { X86::ADDSUBPDrr, X86::ADDSUBPDrm, 16 },
- { X86::ADDSUBPSrr, X86::ADDSUBPSrm, 16 },
- { X86::AND16rr, X86::AND16rm, 0 },
- { X86::AND32rr, X86::AND32rm, 0 },
- { X86::AND64rr, X86::AND64rm, 0 },
- { X86::AND8rr, X86::AND8rm, 0 },
- { X86::ANDNPDrr, X86::ANDNPDrm, 16 },
- { X86::ANDNPSrr, X86::ANDNPSrm, 16 },
- { X86::ANDPDrr, X86::ANDPDrm, 16 },
- { X86::ANDPSrr, X86::ANDPSrm, 16 },
- { X86::CMOVA16rr, X86::CMOVA16rm, 0 },
- { X86::CMOVA32rr, X86::CMOVA32rm, 0 },
- { X86::CMOVA64rr, X86::CMOVA64rm, 0 },
- { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 },
- { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 },
- { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 },
- { X86::CMOVB16rr, X86::CMOVB16rm, 0 },
- { X86::CMOVB32rr, X86::CMOVB32rm, 0 },
- { X86::CMOVB64rr, X86::CMOVB64rm, 0 },
- { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 },
- { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 },
- { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 },
- { X86::CMOVE16rr, X86::CMOVE16rm, 0 },
- { X86::CMOVE32rr, X86::CMOVE32rm, 0 },
- { X86::CMOVE64rr, X86::CMOVE64rm, 0 },
- { X86::CMOVG16rr, X86::CMOVG16rm, 0 },
- { X86::CMOVG32rr, X86::CMOVG32rm, 0 },
- { X86::CMOVG64rr, X86::CMOVG64rm, 0 },
- { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 },
- { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 },
- { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 },
- { X86::CMOVL16rr, X86::CMOVL16rm, 0 },
- { X86::CMOVL32rr, X86::CMOVL32rm, 0 },
- { X86::CMOVL64rr, X86::CMOVL64rm, 0 },
- { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 },
- { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 },
- { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 },
- { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 },
- { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 },
- { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 },
- { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 },
- { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 },
- { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 },
- { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 },
- { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 },
- { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 },
- { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 },
- { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 },
- { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 },
- { X86::CMOVO16rr, X86::CMOVO16rm, 0 },
- { X86::CMOVO32rr, X86::CMOVO32rm, 0 },
- { X86::CMOVO64rr, X86::CMOVO64rm, 0 },
- { X86::CMOVP16rr, X86::CMOVP16rm, 0 },
- { X86::CMOVP32rr, X86::CMOVP32rm, 0 },
- { X86::CMOVP64rr, X86::CMOVP64rm, 0 },
- { X86::CMOVS16rr, X86::CMOVS16rm, 0 },
- { X86::CMOVS32rr, X86::CMOVS32rm, 0 },
- { X86::CMOVS64rr, X86::CMOVS64rm, 0 },
- { X86::CMPPDrri, X86::CMPPDrmi, 16 },
- { X86::CMPPSrri, X86::CMPPSrmi, 16 },
- { X86::CMPSDrr, X86::CMPSDrm, 0 },
- { X86::CMPSSrr, X86::CMPSSrm, 0 },
- { X86::DIVPDrr, X86::DIVPDrm, 16 },
- { X86::DIVPSrr, X86::DIVPSrm, 16 },
- { X86::DIVSDrr, X86::DIVSDrm, 0 },
- { X86::DIVSSrr, X86::DIVSSrm, 0 },
- { X86::FsANDNPDrr, X86::FsANDNPDrm, 16 },
- { X86::FsANDNPSrr, X86::FsANDNPSrm, 16 },
- { X86::FsANDPDrr, X86::FsANDPDrm, 16 },
- { X86::FsANDPSrr, X86::FsANDPSrm, 16 },
- { X86::FsORPDrr, X86::FsORPDrm, 16 },
- { X86::FsORPSrr, X86::FsORPSrm, 16 },
- { X86::FsXORPDrr, X86::FsXORPDrm, 16 },
- { X86::FsXORPSrr, X86::FsXORPSrm, 16 },
- { X86::HADDPDrr, X86::HADDPDrm, 16 },
- { X86::HADDPSrr, X86::HADDPSrm, 16 },
- { X86::HSUBPDrr, X86::HSUBPDrm, 16 },
- { X86::HSUBPSrr, X86::HSUBPSrm, 16 },
- { X86::IMUL16rr, X86::IMUL16rm, 0 },
- { X86::IMUL32rr, X86::IMUL32rm, 0 },
- { X86::IMUL64rr, X86::IMUL64rm, 0 },
- { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 },
- { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 },
- { X86::MAXPDrr, X86::MAXPDrm, 16 },
- { X86::MAXPDrr_Int, X86::MAXPDrm_Int, 16 },
- { X86::MAXPSrr, X86::MAXPSrm, 16 },
- { X86::MAXPSrr_Int, X86::MAXPSrm_Int, 16 },
- { X86::MAXSDrr, X86::MAXSDrm, 0 },
- { X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 },
- { X86::MAXSSrr, X86::MAXSSrm, 0 },
- { X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 },
- { X86::MINPDrr, X86::MINPDrm, 16 },
- { X86::MINPDrr_Int, X86::MINPDrm_Int, 16 },
- { X86::MINPSrr, X86::MINPSrm, 16 },
- { X86::MINPSrr_Int, X86::MINPSrm_Int, 16 },
- { X86::MINSDrr, X86::MINSDrm, 0 },
- { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 },
- { X86::MINSSrr, X86::MINSSrm, 0 },
- { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 },
- { X86::MULPDrr, X86::MULPDrm, 16 },
- { X86::MULPSrr, X86::MULPSrm, 16 },
- { X86::MULSDrr, X86::MULSDrm, 0 },
- { X86::MULSSrr, X86::MULSSrm, 0 },
- { X86::OR16rr, X86::OR16rm, 0 },
- { X86::OR32rr, X86::OR32rm, 0 },
- { X86::OR64rr, X86::OR64rm, 0 },
- { X86::OR8rr, X86::OR8rm, 0 },
- { X86::ORPDrr, X86::ORPDrm, 16 },
- { X86::ORPSrr, X86::ORPSrm, 16 },
- { X86::PACKSSDWrr, X86::PACKSSDWrm, 16 },
- { X86::PACKSSWBrr, X86::PACKSSWBrm, 16 },
- { X86::PACKUSWBrr, X86::PACKUSWBrm, 16 },
- { X86::PADDBrr, X86::PADDBrm, 16 },
- { X86::PADDDrr, X86::PADDDrm, 16 },
- { X86::PADDQrr, X86::PADDQrm, 16 },
- { X86::PADDSBrr, X86::PADDSBrm, 16 },
- { X86::PADDSWrr, X86::PADDSWrm, 16 },
- { X86::PADDWrr, X86::PADDWrm, 16 },
- { X86::PANDNrr, X86::PANDNrm, 16 },
- { X86::PANDrr, X86::PANDrm, 16 },
- { X86::PAVGBrr, X86::PAVGBrm, 16 },
- { X86::PAVGWrr, X86::PAVGWrm, 16 },
- { X86::PCMPEQBrr, X86::PCMPEQBrm, 16 },
- { X86::PCMPEQDrr, X86::PCMPEQDrm, 16 },
- { X86::PCMPEQWrr, X86::PCMPEQWrm, 16 },
- { X86::PCMPGTBrr, X86::PCMPGTBrm, 16 },
- { X86::PCMPGTDrr, X86::PCMPGTDrm, 16 },
- { X86::PCMPGTWrr, X86::PCMPGTWrm, 16 },
- { X86::PINSRWrri, X86::PINSRWrmi, 16 },
- { X86::PMADDWDrr, X86::PMADDWDrm, 16 },
- { X86::PMAXSWrr, X86::PMAXSWrm, 16 },
- { X86::PMAXUBrr, X86::PMAXUBrm, 16 },
- { X86::PMINSWrr, X86::PMINSWrm, 16 },
- { X86::PMINUBrr, X86::PMINUBrm, 16 },
- { X86::PMULDQrr, X86::PMULDQrm, 16 },
- { X86::PMULHUWrr, X86::PMULHUWrm, 16 },
- { X86::PMULHWrr, X86::PMULHWrm, 16 },
- { X86::PMULLDrr, X86::PMULLDrm, 16 },
- { X86::PMULLWrr, X86::PMULLWrm, 16 },
- { X86::PMULUDQrr, X86::PMULUDQrm, 16 },
- { X86::PORrr, X86::PORrm, 16 },
- { X86::PSADBWrr, X86::PSADBWrm, 16 },
- { X86::PSLLDrr, X86::PSLLDrm, 16 },
- { X86::PSLLQrr, X86::PSLLQrm, 16 },
- { X86::PSLLWrr, X86::PSLLWrm, 16 },
- { X86::PSRADrr, X86::PSRADrm, 16 },
- { X86::PSRAWrr, X86::PSRAWrm, 16 },
- { X86::PSRLDrr, X86::PSRLDrm, 16 },
- { X86::PSRLQrr, X86::PSRLQrm, 16 },
- { X86::PSRLWrr, X86::PSRLWrm, 16 },
- { X86::PSUBBrr, X86::PSUBBrm, 16 },
- { X86::PSUBDrr, X86::PSUBDrm, 16 },
- { X86::PSUBSBrr, X86::PSUBSBrm, 16 },
- { X86::PSUBSWrr, X86::PSUBSWrm, 16 },
- { X86::PSUBWrr, X86::PSUBWrm, 16 },
- { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, 16 },
- { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, 16 },
- { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, 16 },
- { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, 16 },
- { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, 16 },
- { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, 16 },
- { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, 16 },
- { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, 16 },
- { X86::PXORrr, X86::PXORrm, 16 },
- { X86::SBB32rr, X86::SBB32rm, 0 },
- { X86::SBB64rr, X86::SBB64rm, 0 },
- { X86::SHUFPDrri, X86::SHUFPDrmi, 16 },
- { X86::SHUFPSrri, X86::SHUFPSrmi, 16 },
- { X86::SUB16rr, X86::SUB16rm, 0 },
- { X86::SUB32rr, X86::SUB32rm, 0 },
- { X86::SUB64rr, X86::SUB64rm, 0 },
- { X86::SUB8rr, X86::SUB8rm, 0 },
- { X86::SUBPDrr, X86::SUBPDrm, 16 },
- { X86::SUBPSrr, X86::SUBPSrm, 16 },
- { X86::SUBSDrr, X86::SUBSDrm, 0 },
- { X86::SUBSSrr, X86::SUBSSrm, 0 },
+ { X86::ADC32rr, X86::ADC32rm, 0 },
+ { X86::ADC64rr, X86::ADC64rm, 0 },
+ { X86::ADD16rr, X86::ADD16rm, 0 },
+ { X86::ADD16rr_DB, X86::ADD16rm, TB_NO_REVERSE },
+ { X86::ADD32rr, X86::ADD32rm, 0 },
+ { X86::ADD32rr_DB, X86::ADD32rm, TB_NO_REVERSE },
+ { X86::ADD64rr, X86::ADD64rm, 0 },
+ { X86::ADD64rr_DB, X86::ADD64rm, TB_NO_REVERSE },
+ { X86::ADD8rr, X86::ADD8rm, 0 },
+ { X86::ADDPDrr, X86::ADDPDrm, TB_ALIGN_16 },
+ { X86::ADDPSrr, X86::ADDPSrm, TB_ALIGN_16 },
+ { X86::ADDSDrr, X86::ADDSDrm, 0 },
+ { X86::ADDSSrr, X86::ADDSSrm, 0 },
+ { X86::ADDSUBPDrr, X86::ADDSUBPDrm, TB_ALIGN_16 },
+ { X86::ADDSUBPSrr, X86::ADDSUBPSrm, TB_ALIGN_16 },
+ { X86::AND16rr, X86::AND16rm, 0 },
+ { X86::AND32rr, X86::AND32rm, 0 },
+ { X86::AND64rr, X86::AND64rm, 0 },
+ { X86::AND8rr, X86::AND8rm, 0 },
+ { X86::ANDNPDrr, X86::ANDNPDrm, TB_ALIGN_16 },
+ { X86::ANDNPSrr, X86::ANDNPSrm, TB_ALIGN_16 },
+ { X86::ANDPDrr, X86::ANDPDrm, TB_ALIGN_16 },
+ { X86::ANDPSrr, X86::ANDPSrm, TB_ALIGN_16 },
+ { X86::CMOVA16rr, X86::CMOVA16rm, 0 },
+ { X86::CMOVA32rr, X86::CMOVA32rm, 0 },
+ { X86::CMOVA64rr, X86::CMOVA64rm, 0 },
+ { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 },
+ { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 },
+ { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 },
+ { X86::CMOVB16rr, X86::CMOVB16rm, 0 },
+ { X86::CMOVB32rr, X86::CMOVB32rm, 0 },
+ { X86::CMOVB64rr, X86::CMOVB64rm, 0 },
+ { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 },
+ { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 },
+ { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 },
+ { X86::CMOVE16rr, X86::CMOVE16rm, 0 },
+ { X86::CMOVE32rr, X86::CMOVE32rm, 0 },
+ { X86::CMOVE64rr, X86::CMOVE64rm, 0 },
+ { X86::CMOVG16rr, X86::CMOVG16rm, 0 },
+ { X86::CMOVG32rr, X86::CMOVG32rm, 0 },
+ { X86::CMOVG64rr, X86::CMOVG64rm, 0 },
+ { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 },
+ { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 },
+ { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 },
+ { X86::CMOVL16rr, X86::CMOVL16rm, 0 },
+ { X86::CMOVL32rr, X86::CMOVL32rm, 0 },
+ { X86::CMOVL64rr, X86::CMOVL64rm, 0 },
+ { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 },
+ { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 },
+ { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 },
+ { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 },
+ { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 },
+ { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 },
+ { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 },
+ { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 },
+ { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 },
+ { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 },
+ { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 },
+ { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 },
+ { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 },
+ { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 },
+ { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 },
+ { X86::CMOVO16rr, X86::CMOVO16rm, 0 },
+ { X86::CMOVO32rr, X86::CMOVO32rm, 0 },
+ { X86::CMOVO64rr, X86::CMOVO64rm, 0 },
+ { X86::CMOVP16rr, X86::CMOVP16rm, 0 },
+ { X86::CMOVP32rr, X86::CMOVP32rm, 0 },
+ { X86::CMOVP64rr, X86::CMOVP64rm, 0 },
+ { X86::CMOVS16rr, X86::CMOVS16rm, 0 },
+ { X86::CMOVS32rr, X86::CMOVS32rm, 0 },
+ { X86::CMOVS64rr, X86::CMOVS64rm, 0 },
+ { X86::CMPPDrri, X86::CMPPDrmi, TB_ALIGN_16 },
+ { X86::CMPPSrri, X86::CMPPSrmi, TB_ALIGN_16 },
+ { X86::CMPSDrr, X86::CMPSDrm, 0 },
+ { X86::CMPSSrr, X86::CMPSSrm, 0 },
+ { X86::DIVPDrr, X86::DIVPDrm, TB_ALIGN_16 },
+ { X86::DIVPSrr, X86::DIVPSrm, TB_ALIGN_16 },
+ { X86::DIVSDrr, X86::DIVSDrm, 0 },
+ { X86::DIVSSrr, X86::DIVSSrm, 0 },
+ { X86::FsANDNPDrr, X86::FsANDNPDrm, TB_ALIGN_16 },
+ { X86::FsANDNPSrr, X86::FsANDNPSrm, TB_ALIGN_16 },
+ { X86::FsANDPDrr, X86::FsANDPDrm, TB_ALIGN_16 },
+ { X86::FsANDPSrr, X86::FsANDPSrm, TB_ALIGN_16 },
+ { X86::FsORPDrr, X86::FsORPDrm, TB_ALIGN_16 },
+ { X86::FsORPSrr, X86::FsORPSrm, TB_ALIGN_16 },
+ { X86::FsXORPDrr, X86::FsXORPDrm, TB_ALIGN_16 },
+ { X86::FsXORPSrr, X86::FsXORPSrm, TB_ALIGN_16 },
+ { X86::HADDPDrr, X86::HADDPDrm, TB_ALIGN_16 },
+ { X86::HADDPSrr, X86::HADDPSrm, TB_ALIGN_16 },
+ { X86::HSUBPDrr, X86::HSUBPDrm, TB_ALIGN_16 },
+ { X86::HSUBPSrr, X86::HSUBPSrm, TB_ALIGN_16 },
+ { X86::IMUL16rr, X86::IMUL16rm, 0 },
+ { X86::IMUL32rr, X86::IMUL32rm, 0 },
+ { X86::IMUL64rr, X86::IMUL64rm, 0 },
+ { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 },
+ { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 },
+ { X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 },
+ { X86::MAXPDrr_Int, X86::MAXPDrm_Int, TB_ALIGN_16 },
+ { X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 },
+ { X86::MAXPSrr_Int, X86::MAXPSrm_Int, TB_ALIGN_16 },
+ { X86::MAXSDrr, X86::MAXSDrm, 0 },
+ { X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 },
+ { X86::MAXSSrr, X86::MAXSSrm, 0 },
+ { X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 },
+ { X86::MINPDrr, X86::MINPDrm, TB_ALIGN_16 },
+ { X86::MINPDrr_Int, X86::MINPDrm_Int, TB_ALIGN_16 },
+ { X86::MINPSrr, X86::MINPSrm, TB_ALIGN_16 },
+ { X86::MINPSrr_Int, X86::MINPSrm_Int, TB_ALIGN_16 },
+ { X86::MINSDrr, X86::MINSDrm, 0 },
+ { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 },
+ { X86::MINSSrr, X86::MINSSrm, 0 },
+ { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 },
+ { X86::MULPDrr, X86::MULPDrm, TB_ALIGN_16 },
+ { X86::MULPSrr, X86::MULPSrm, TB_ALIGN_16 },
+ { X86::MULSDrr, X86::MULSDrm, 0 },
+ { X86::MULSSrr, X86::MULSSrm, 0 },
+ { X86::OR16rr, X86::OR16rm, 0 },
+ { X86::OR32rr, X86::OR32rm, 0 },
+ { X86::OR64rr, X86::OR64rm, 0 },
+ { X86::OR8rr, X86::OR8rm, 0 },
+ { X86::ORPDrr, X86::ORPDrm, TB_ALIGN_16 },
+ { X86::ORPSrr, X86::ORPSrm, TB_ALIGN_16 },
+ { X86::PACKSSDWrr, X86::PACKSSDWrm, TB_ALIGN_16 },
+ { X86::PACKSSWBrr, X86::PACKSSWBrm, TB_ALIGN_16 },
+ { X86::PACKUSWBrr, X86::PACKUSWBrm, TB_ALIGN_16 },
+ { X86::PADDBrr, X86::PADDBrm, TB_ALIGN_16 },
+ { X86::PADDDrr, X86::PADDDrm, TB_ALIGN_16 },
+ { X86::PADDQrr, X86::PADDQrm, TB_ALIGN_16 },
+ { X86::PADDSBrr, X86::PADDSBrm, TB_ALIGN_16 },
+ { X86::PADDSWrr, X86::PADDSWrm, TB_ALIGN_16 },
+ { X86::PADDWrr, X86::PADDWrm, TB_ALIGN_16 },
+ { X86::PANDNrr, X86::PANDNrm, TB_ALIGN_16 },
+ { X86::PANDrr, X86::PANDrm, TB_ALIGN_16 },
+ { X86::PAVGBrr, X86::PAVGBrm, TB_ALIGN_16 },
+ { X86::PAVGWrr, X86::PAVGWrm, TB_ALIGN_16 },
+ { X86::PCMPEQBrr, X86::PCMPEQBrm, TB_ALIGN_16 },
+ { X86::PCMPEQDrr, X86::PCMPEQDrm, TB_ALIGN_16 },
+ { X86::PCMPEQWrr, X86::PCMPEQWrm, TB_ALIGN_16 },
+ { X86::PCMPGTBrr, X86::PCMPGTBrm, TB_ALIGN_16 },
+ { X86::PCMPGTDrr, X86::PCMPGTDrm, TB_ALIGN_16 },
+ { X86::PCMPGTWrr, X86::PCMPGTWrm, TB_ALIGN_16 },
+ { X86::PINSRWrri, X86::PINSRWrmi, TB_ALIGN_16 },
+ { X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 },
+ { X86::PMAXSWrr, X86::PMAXSWrm, TB_ALIGN_16 },
+ { X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 },
+ { X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 },
+ { X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 },
+ { X86::PMULDQrr, X86::PMULDQrm, TB_ALIGN_16 },
+ { X86::PMULHUWrr, X86::PMULHUWrm, TB_ALIGN_16 },
+ { X86::PMULHWrr, X86::PMULHWrm, TB_ALIGN_16 },
+ { X86::PMULLDrr, X86::PMULLDrm, TB_ALIGN_16 },
+ { X86::PMULLWrr, X86::PMULLWrm, TB_ALIGN_16 },
+ { X86::PMULUDQrr, X86::PMULUDQrm, TB_ALIGN_16 },
+ { X86::PORrr, X86::PORrm, TB_ALIGN_16 },
+ { X86::PSADBWrr, X86::PSADBWrm, TB_ALIGN_16 },
+ { X86::PSLLDrr, X86::PSLLDrm, TB_ALIGN_16 },
+ { X86::PSLLQrr, X86::PSLLQrm, TB_ALIGN_16 },
+ { X86::PSLLWrr, X86::PSLLWrm, TB_ALIGN_16 },
+ { X86::PSRADrr, X86::PSRADrm, TB_ALIGN_16 },
+ { X86::PSRAWrr, X86::PSRAWrm, TB_ALIGN_16 },
+ { X86::PSRLDrr, X86::PSRLDrm, TB_ALIGN_16 },
+ { X86::PSRLQrr, X86::PSRLQrm, TB_ALIGN_16 },
+ { X86::PSRLWrr, X86::PSRLWrm, TB_ALIGN_16 },
+ { X86::PSUBBrr, X86::PSUBBrm, TB_ALIGN_16 },
+ { X86::PSUBDrr, X86::PSUBDrm, TB_ALIGN_16 },
+ { X86::PSUBSBrr, X86::PSUBSBrm, TB_ALIGN_16 },
+ { X86::PSUBSWrr, X86::PSUBSWrm, TB_ALIGN_16 },
+ { X86::PSUBWrr, X86::PSUBWrm, TB_ALIGN_16 },
+ { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, TB_ALIGN_16 },
+ { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, TB_ALIGN_16 },
+ { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, TB_ALIGN_16 },
+ { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, TB_ALIGN_16 },
+ { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, TB_ALIGN_16 },
+ { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, TB_ALIGN_16 },
+ { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, TB_ALIGN_16 },
+ { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, TB_ALIGN_16 },
+ { X86::PXORrr, X86::PXORrm, TB_ALIGN_16 },
+ { X86::SBB32rr, X86::SBB32rm, 0 },
+ { X86::SBB64rr, X86::SBB64rm, 0 },
+ { X86::SHUFPDrri, X86::SHUFPDrmi, TB_ALIGN_16 },
+ { X86::SHUFPSrri, X86::SHUFPSrmi, TB_ALIGN_16 },
+ { X86::SUB16rr, X86::SUB16rm, 0 },
+ { X86::SUB32rr, X86::SUB32rm, 0 },
+ { X86::SUB64rr, X86::SUB64rm, 0 },
+ { X86::SUB8rr, X86::SUB8rm, 0 },
+ { X86::SUBPDrr, X86::SUBPDrm, TB_ALIGN_16 },
+ { X86::SUBPSrr, X86::SUBPSrm, TB_ALIGN_16 },
+ { X86::SUBSDrr, X86::SUBSDrm, 0 },
+ { X86::SUBSSrr, X86::SUBSSrm, 0 },
// FIXME: TEST*rr -> swapped operand of TEST*mr.
- { X86::UNPCKHPDrr, X86::UNPCKHPDrm, 16 },
- { X86::UNPCKHPSrr, X86::UNPCKHPSrm, 16 },
- { X86::UNPCKLPDrr, X86::UNPCKLPDrm, 16 },
- { X86::UNPCKLPSrr, X86::UNPCKLPSrm, 16 },
- { X86::XOR16rr, X86::XOR16rm, 0 },
- { X86::XOR32rr, X86::XOR32rm, 0 },
- { X86::XOR64rr, X86::XOR64rm, 0 },
- { X86::XOR8rr, X86::XOR8rm, 0 },
- { X86::XORPDrr, X86::XORPDrm, 16 },
- { X86::XORPSrr, X86::XORPSrm, 16 }
+ { X86::UNPCKHPDrr, X86::UNPCKHPDrm, TB_ALIGN_16 },
+ { X86::UNPCKHPSrr, X86::UNPCKHPSrm, TB_ALIGN_16 },
+ { X86::UNPCKLPDrr, X86::UNPCKLPDrm, TB_ALIGN_16 },
+ { X86::UNPCKLPSrr, X86::UNPCKLPSrm, TB_ALIGN_16 },
+ { X86::XOR16rr, X86::XOR16rm, 0 },
+ { X86::XOR32rr, X86::XOR32rm, 0 },
+ { X86::XOR64rr, X86::XOR64rm, 0 },
+ { X86::XOR8rr, X86::XOR8rm, 0 },
+ { X86::XORPDrr, X86::XORPDrm, TB_ALIGN_16 },
+ { X86::XORPSrr, X86::XORPSrm, TB_ALIGN_16 },
+ // AVX 128-bit versions of foldable instructions
+ { X86::VCVTSD2SSrr, X86::VCVTSD2SSrm, 0 },
+ { X86::Int_VCVTSD2SSrr, X86::Int_VCVTSD2SSrm, 0 },
+ { X86::VCVTSI2SD64rr, X86::VCVTSI2SD64rm, 0 },
+ { X86::Int_VCVTSI2SD64rr, X86::Int_VCVTSI2SD64rm, 0 },
+ { X86::VCVTSI2SDrr, X86::VCVTSI2SDrm, 0 },
+ { X86::Int_VCVTSI2SDrr, X86::Int_VCVTSI2SDrm, 0 },
+ { X86::VCVTSI2SS64rr, X86::VCVTSI2SS64rm, 0 },
+ { X86::Int_VCVTSI2SS64rr, X86::Int_VCVTSI2SS64rm, 0 },
+ { X86::VCVTSI2SSrr, X86::VCVTSI2SSrm, 0 },
+ { X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 },
+ { X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 },
+ { X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, 0 },
+ { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 },
+ { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm, 0 },
+ { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 },
+ { X86::Int_VCVTTSD2SIrr, X86::Int_VCVTTSD2SIrm, 0 },
+ { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 },
+ { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm, 0 },
+ { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 },
+ { X86::Int_VCVTTSS2SIrr, X86::Int_VCVTTSS2SIrm, 0 },
+ { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, 0 },
+ { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 },
+ { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQrm, TB_ALIGN_16 },
+ { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, TB_ALIGN_16 },
+ { X86::VRSQRTSSr, X86::VRSQRTSSm, 0 },
+ { X86::VSQRTSDr, X86::VSQRTSDm, 0 },
+ { X86::VSQRTSSr, X86::VSQRTSSm, 0 },
+ { X86::VADDPDrr, X86::VADDPDrm, TB_ALIGN_16 },
+ { X86::VADDPSrr, X86::VADDPSrm, TB_ALIGN_16 },
+ { X86::VADDSDrr, X86::VADDSDrm, 0 },
+ { X86::VADDSSrr, X86::VADDSSrm, 0 },
+ { X86::VADDSUBPDrr, X86::VADDSUBPDrm, TB_ALIGN_16 },
+ { X86::VADDSUBPSrr, X86::VADDSUBPSrm, TB_ALIGN_16 },
+ { X86::VANDNPDrr, X86::VANDNPDrm, TB_ALIGN_16 },
+ { X86::VANDNPSrr, X86::VANDNPSrm, TB_ALIGN_16 },
+ { X86::VANDPDrr, X86::VANDPDrm, TB_ALIGN_16 },
+ { X86::VANDPSrr, X86::VANDPSrm, TB_ALIGN_16 },
+ { X86::VCMPPDrri, X86::VCMPPDrmi, TB_ALIGN_16 },
+ { X86::VCMPPSrri, X86::VCMPPSrmi, TB_ALIGN_16 },
+ { X86::VCMPSDrr, X86::VCMPSDrm, 0 },
+ { X86::VCMPSSrr, X86::VCMPSSrm, 0 },
+ { X86::VDIVPDrr, X86::VDIVPDrm, TB_ALIGN_16 },
+ { X86::VDIVPSrr, X86::VDIVPSrm, TB_ALIGN_16 },
+ { X86::VDIVSDrr, X86::VDIVSDrm, 0 },
+ { X86::VDIVSSrr, X86::VDIVSSrm, 0 },
+ { X86::VFsANDNPDrr, X86::VFsANDNPDrm, TB_ALIGN_16 },
+ { X86::VFsANDNPSrr, X86::VFsANDNPSrm, TB_ALIGN_16 },
+ { X86::VFsANDPDrr, X86::VFsANDPDrm, TB_ALIGN_16 },
+ { X86::VFsANDPSrr, X86::VFsANDPSrm, TB_ALIGN_16 },
+ { X86::VFsORPDrr, X86::VFsORPDrm, TB_ALIGN_16 },
+ { X86::VFsORPSrr, X86::VFsORPSrm, TB_ALIGN_16 },
+ { X86::VFsXORPDrr, X86::VFsXORPDrm, TB_ALIGN_16 },
+ { X86::VFsXORPSrr, X86::VFsXORPSrm, TB_ALIGN_16 },
+ { X86::VHADDPDrr, X86::VHADDPDrm, TB_ALIGN_16 },
+ { X86::VHADDPSrr, X86::VHADDPSrm, TB_ALIGN_16 },
+ { X86::VHSUBPDrr, X86::VHSUBPDrm, TB_ALIGN_16 },
+ { X86::VHSUBPSrr, X86::VHSUBPSrm, TB_ALIGN_16 },
+ { X86::Int_VCMPSDrr, X86::Int_VCMPSDrm, 0 },
+ { X86::Int_VCMPSSrr, X86::Int_VCMPSSrm, 0 },
+ { X86::VMAXPDrr, X86::VMAXPDrm, TB_ALIGN_16 },
+ { X86::VMAXPDrr_Int, X86::VMAXPDrm_Int, TB_ALIGN_16 },
+ { X86::VMAXPSrr, X86::VMAXPSrm, TB_ALIGN_16 },
+ { X86::VMAXPSrr_Int, X86::VMAXPSrm_Int, TB_ALIGN_16 },
+ { X86::VMAXSDrr, X86::VMAXSDrm, 0 },
+ { X86::VMAXSDrr_Int, X86::VMAXSDrm_Int, 0 },
+ { X86::VMAXSSrr, X86::VMAXSSrm, 0 },
+ { X86::VMAXSSrr_Int, X86::VMAXSSrm_Int, 0 },
+ { X86::VMINPDrr, X86::VMINPDrm, TB_ALIGN_16 },
+ { X86::VMINPDrr_Int, X86::VMINPDrm_Int, TB_ALIGN_16 },
+ { X86::VMINPSrr, X86::VMINPSrm, TB_ALIGN_16 },
+ { X86::VMINPSrr_Int, X86::VMINPSrm_Int, TB_ALIGN_16 },
+ { X86::VMINSDrr, X86::VMINSDrm, 0 },
+ { X86::VMINSDrr_Int, X86::VMINSDrm_Int, 0 },
+ { X86::VMINSSrr, X86::VMINSSrm, 0 },
+ { X86::VMINSSrr_Int, X86::VMINSSrm_Int, 0 },
+ { X86::VMULPDrr, X86::VMULPDrm, TB_ALIGN_16 },
+ { X86::VMULPSrr, X86::VMULPSrm, TB_ALIGN_16 },
+ { X86::VMULSDrr, X86::VMULSDrm, 0 },
+ { X86::VMULSSrr, X86::VMULSSrm, 0 },
+ { X86::VORPDrr, X86::VORPDrm, TB_ALIGN_16 },
+ { X86::VORPSrr, X86::VORPSrm, TB_ALIGN_16 },
+ { X86::VPACKSSDWrr, X86::VPACKSSDWrm, TB_ALIGN_16 },
+ { X86::VPACKSSWBrr, X86::VPACKSSWBrm, TB_ALIGN_16 },
+ { X86::VPACKUSWBrr, X86::VPACKUSWBrm, TB_ALIGN_16 },
+ { X86::VPADDBrr, X86::VPADDBrm, TB_ALIGN_16 },
+ { X86::VPADDDrr, X86::VPADDDrm, TB_ALIGN_16 },
+ { X86::VPADDQrr, X86::VPADDQrm, TB_ALIGN_16 },
+ { X86::VPADDSBrr, X86::VPADDSBrm, TB_ALIGN_16 },
+ { X86::VPADDSWrr, X86::VPADDSWrm, TB_ALIGN_16 },
+ { X86::VPADDWrr, X86::VPADDWrm, TB_ALIGN_16 },
+ { X86::VPANDNrr, X86::VPANDNrm, TB_ALIGN_16 },
+ { X86::VPANDrr, X86::VPANDrm, TB_ALIGN_16 },
+ { X86::VPCMPEQBrr, X86::VPCMPEQBrm, TB_ALIGN_16 },
+ { X86::VPCMPEQDrr, X86::VPCMPEQDrm, TB_ALIGN_16 },
+ { X86::VPCMPEQWrr, X86::VPCMPEQWrm, TB_ALIGN_16 },
+ { X86::VPCMPGTBrr, X86::VPCMPGTBrm, TB_ALIGN_16 },
+ { X86::VPCMPGTDrr, X86::VPCMPGTDrm, TB_ALIGN_16 },
+ { X86::VPCMPGTWrr, X86::VPCMPGTWrm, TB_ALIGN_16 },
+ { X86::VPINSRWrri, X86::VPINSRWrmi, TB_ALIGN_16 },
+ { X86::VPMADDWDrr, X86::VPMADDWDrm, TB_ALIGN_16 },
+ { X86::VPMAXSWrr, X86::VPMAXSWrm, TB_ALIGN_16 },
+ { X86::VPMAXUBrr, X86::VPMAXUBrm, TB_ALIGN_16 },
+ { X86::VPMINSWrr, X86::VPMINSWrm, TB_ALIGN_16 },
+ { X86::VPMINUBrr, X86::VPMINUBrm, TB_ALIGN_16 },
+ { X86::VPMULDQrr, X86::VPMULDQrm, TB_ALIGN_16 },
+ { X86::VPMULHUWrr, X86::VPMULHUWrm, TB_ALIGN_16 },
+ { X86::VPMULHWrr, X86::VPMULHWrm, TB_ALIGN_16 },
+ { X86::VPMULLDrr, X86::VPMULLDrm, TB_ALIGN_16 },
+ { X86::VPMULLWrr, X86::VPMULLWrm, TB_ALIGN_16 },
+ { X86::VPMULUDQrr, X86::VPMULUDQrm, TB_ALIGN_16 },
+ { X86::VPORrr, X86::VPORrm, TB_ALIGN_16 },
+ { X86::VPSADBWrr, X86::VPSADBWrm, TB_ALIGN_16 },
+ { X86::VPSLLDrr, X86::VPSLLDrm, TB_ALIGN_16 },
+ { X86::VPSLLQrr, X86::VPSLLQrm, TB_ALIGN_16 },
+ { X86::VPSLLWrr, X86::VPSLLWrm, TB_ALIGN_16 },
+ { X86::VPSRADrr, X86::VPSRADrm, TB_ALIGN_16 },
+ { X86::VPSRAWrr, X86::VPSRAWrm, TB_ALIGN_16 },
+ { X86::VPSRLDrr, X86::VPSRLDrm, TB_ALIGN_16 },
+ { X86::VPSRLQrr, X86::VPSRLQrm, TB_ALIGN_16 },
+ { X86::VPSRLWrr, X86::VPSRLWrm, TB_ALIGN_16 },
+ { X86::VPSUBBrr, X86::VPSUBBrm, TB_ALIGN_16 },
+ { X86::VPSUBDrr, X86::VPSUBDrm, TB_ALIGN_16 },
+ { X86::VPSUBSBrr, X86::VPSUBSBrm, TB_ALIGN_16 },
+ { X86::VPSUBSWrr, X86::VPSUBSWrm, TB_ALIGN_16 },
+ { X86::VPSUBWrr, X86::VPSUBWrm, TB_ALIGN_16 },
+ { X86::VPUNPCKHBWrr, X86::VPUNPCKHBWrm, TB_ALIGN_16 },
+ { X86::VPUNPCKHDQrr, X86::VPUNPCKHDQrm, TB_ALIGN_16 },
+ { X86::VPUNPCKHQDQrr, X86::VPUNPCKHQDQrm, TB_ALIGN_16 },
+ { X86::VPUNPCKHWDrr, X86::VPUNPCKHWDrm, TB_ALIGN_16 },
+ { X86::VPUNPCKLBWrr, X86::VPUNPCKLBWrm, TB_ALIGN_16 },
+ { X86::VPUNPCKLDQrr, X86::VPUNPCKLDQrm, TB_ALIGN_16 },
+ { X86::VPUNPCKLQDQrr, X86::VPUNPCKLQDQrm, TB_ALIGN_16 },
+ { X86::VPUNPCKLWDrr, X86::VPUNPCKLWDrm, TB_ALIGN_16 },
+ { X86::VPXORrr, X86::VPXORrm, TB_ALIGN_16 },
+ { X86::VSHUFPDrri, X86::VSHUFPDrmi, TB_ALIGN_16 },
+ { X86::VSHUFPSrri, X86::VSHUFPSrmi, TB_ALIGN_16 },
+ { X86::VSUBPDrr, X86::VSUBPDrm, TB_ALIGN_16 },
+ { X86::VSUBPSrr, X86::VSUBPSrm, TB_ALIGN_16 },
+ { X86::VSUBSDrr, X86::VSUBSDrm, 0 },
+ { X86::VSUBSSrr, X86::VSUBSSrm, 0 },
+ { X86::VUNPCKHPDrr, X86::VUNPCKHPDrm, TB_ALIGN_16 },
+ { X86::VUNPCKHPSrr, X86::VUNPCKHPSrm, TB_ALIGN_16 },
+ { X86::VUNPCKLPDrr, X86::VUNPCKLPDrm, TB_ALIGN_16 },
+ { X86::VUNPCKLPSrr, X86::VUNPCKLPSrm, TB_ALIGN_16 },
+ { X86::VXORPDrr, X86::VXORPDrm, TB_ALIGN_16 },
+ { X86::VXORPSrr, X86::VXORPSrm, TB_ALIGN_16 }
+ // FIXME: add AVX 256-bit foldable instructions
};
for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
unsigned RegOp = OpTbl2[i][0];
- unsigned MemOp = OpTbl2[i][1] & ~TB_FLAGS;
- unsigned Align = OpTbl2[i][2];
-
- assert(!RegOp2MemOpTable2.count(RegOp) && "Duplicate entry!");
- RegOp2MemOpTable2[RegOp] = std::make_pair(MemOp, Align);
-
- // If this is not a reversible operation (because there is a many->one)
- // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
- if (OpTbl2[i][1] & TB_NOT_REVERSABLE)
- continue;
+ unsigned MemOp = OpTbl2[i][1];
+ unsigned Flags = OpTbl2[i][2];
+ AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable,
+ RegOp, MemOp,
+ // Index 2, folded load
+ Flags | TB_INDEX_2 | TB_FOLDED_LOAD);
+ }
+}
- // Index 2, folded load
- unsigned AuxInfo = 2 | (1 << 4);
- assert(!MemOp2RegOpTable.count(MemOp) &&
+void
+X86InstrInfo::AddTableEntry(RegOp2MemOpTableType &R2MTable,
+ MemOp2RegOpTableType &M2RTable,
+ unsigned RegOp, unsigned MemOp, unsigned Flags) {
+ if ((Flags & TB_NO_FORWARD) == 0) {
+ assert(!R2MTable.count(RegOp) && "Duplicate entry!");
+ R2MTable[RegOp] = std::make_pair(MemOp, Flags);
+ }
+ if ((Flags & TB_NO_REVERSE) == 0) {
+ assert(!M2RTable.count(MemOp) &&
"Duplicated entries in unfolding maps?");
- MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
- }
+ M2RTable[MemOp] = std::make_pair(RegOp, Flags);
+ }
}
bool
@@ -796,6 +1000,11 @@ static bool isFrameLoadOpcode(int Opcode) {
case X86::MOVAPSrm:
case X86::MOVAPDrm:
case X86::MOVDQArm:
+ case X86::VMOVSSrm:
+ case X86::VMOVSDrm:
+ case X86::VMOVAPSrm:
+ case X86::VMOVAPDrm:
+ case X86::VMOVDQArm:
case X86::VMOVAPSYrm:
case X86::VMOVAPDYrm:
case X86::VMOVDQAYrm:
@@ -820,6 +1029,11 @@ static bool isFrameStoreOpcode(int Opcode) {
case X86::MOVAPSmr:
case X86::MOVAPDmr:
case X86::MOVDQAmr:
+ case X86::VMOVSSmr:
+ case X86::VMOVSDmr:
+ case X86::VMOVAPSmr:
+ case X86::VMOVAPDmr:
+ case X86::VMOVDQAmr:
case X86::VMOVAPSYmr:
case X86::VMOVAPDYmr:
case X86::VMOVDQAYmr:
@@ -852,24 +1066,6 @@ unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
return 0;
}
-bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const {
- for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
- oe = MI->memoperands_end();
- o != oe;
- ++o) {
- if ((*o)->isLoad() && (*o)->getValue())
- if (const FixedStackPseudoSourceValue *Value =
- dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
- FrameIndex = Value->getFrameIndex();
- MMO = *o;
- return true;
- }
- }
- return false;
-}
-
unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
if (isFrameStoreOpcode(MI->getOpcode()))
@@ -892,24 +1088,6 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
return 0;
}
-bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const {
- for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
- oe = MI->memoperands_end();
- o != oe;
- ++o) {
- if ((*o)->isStore() && (*o)->getValue())
- if (const FixedStackPseudoSourceValue *Value =
- dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
- FrameIndex = Value->getFrameIndex();
- MMO = *o;
- return true;
- }
- }
- return false;
-}
-
/// regIsPICBase - Return true if register is PIC base (i.e.g defined by
/// X86::MOVPC32r.
static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
@@ -941,12 +1119,20 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
case X86::MOVUPSrm:
case X86::MOVAPDrm:
case X86::MOVDQArm:
+ case X86::VMOVSSrm:
+ case X86::VMOVSDrm:
+ case X86::VMOVAPSrm:
+ case X86::VMOVUPSrm:
+ case X86::VMOVAPDrm:
+ case X86::VMOVDQArm:
case X86::VMOVAPSYrm:
case X86::VMOVUPSYrm:
case X86::VMOVAPDYrm:
case X86::VMOVDQAYrm:
case X86::MMX_MOVD64rm:
case X86::MMX_MOVQ64rm:
+ case X86::FsVMOVAPSrm:
+ case X86::FsVMOVAPDrm:
case X86::FsMOVAPSrm:
case X86::FsMOVAPDrm: {
// Loads from constant pools are trivially rematerializable.
@@ -1009,15 +1195,11 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) {
MachineBasicBlock::iterator E = MBB.end();
- // It's always safe to clobber EFLAGS at the end of a block.
- if (I == E)
- return true;
-
// For compile time consideration, if we are not able to determine the
// safety after visiting 4 instructions in each direction, we will assume
// it's not safe.
MachineBasicBlock::iterator Iter = I;
- for (unsigned i = 0; i < 4; ++i) {
+ for (unsigned i = 0; Iter != E && i < 4; ++i) {
bool SeenDef = false;
for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
MachineOperand &MO = Iter->getOperand(j);
@@ -1037,10 +1219,16 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
// Skip over DBG_VALUE.
while (Iter != E && Iter->isDebugValue())
++Iter;
+ }
- // If we make it to the end of the block, it's safe to clobber EFLAGS.
- if (Iter == E)
- return true;
+ // It is safe to clobber EFLAGS at the end of a block of no successor has it
+ // live in.
+ if (Iter == E) {
+ for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(),
+ SE = MBB.succ_end(); SI != SE; ++SI)
+ if ((*SI)->isLiveIn(X86::EFLAGS))
+ return false;
+ return true;
}
MachineBasicBlock::iterator B = MBB.begin();
@@ -1946,7 +2134,8 @@ static bool isHReg(unsigned Reg) {
}
// Try and copy between VR128/VR64 and GR64 registers.
-static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) {
+static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
+ bool HasAVX) {
// SrcReg(VR128) -> DestReg(GR64)
// SrcReg(VR64) -> DestReg(GR64)
// SrcReg(GR64) -> DestReg(VR128)
@@ -1955,7 +2144,7 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) {
if (X86::GR64RegClass.contains(DestReg)) {
if (X86::VR128RegClass.contains(SrcReg)) {
// Copy from a VR128 register to a GR64 register.
- return X86::MOVPQIto64rr;
+ return HasAVX ? X86::VMOVPQIto64rr : X86::MOVPQIto64rr;
} else if (X86::VR64RegClass.contains(SrcReg)) {
// Copy from a VR64 register to a GR64 register.
return X86::MOVSDto64rr;
@@ -1963,12 +2152,23 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) {
} else if (X86::GR64RegClass.contains(SrcReg)) {
// Copy from a GR64 register to a VR128 register.
if (X86::VR128RegClass.contains(DestReg))
- return X86::MOV64toPQIrr;
+ return HasAVX ? X86::VMOV64toPQIrr : X86::MOV64toPQIrr;
// Copy from a GR64 register to a VR64 register.
else if (X86::VR64RegClass.contains(DestReg))
return X86::MOV64toSDrr;
}
+ // SrcReg(FR32) -> DestReg(GR32)
+ // SrcReg(GR32) -> DestReg(FR32)
+
+ if (X86::GR32RegClass.contains(DestReg) && X86::FR32RegClass.contains(SrcReg))
+ // Copy from a FR32 register to a GR32 register.
+ return HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr;
+
+ if (X86::FR32RegClass.contains(DestReg) && X86::GR32RegClass.contains(SrcReg))
+ // Copy from a GR32 register to a FR32 register.
+ return HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr;
+
return 0;
}
@@ -1977,6 +2177,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
// First deal with the normal symmetric copies.
+ bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
unsigned Opc = 0;
if (X86::GR64RegClass.contains(DestReg, SrcReg))
Opc = X86::MOV64rr;
@@ -1988,18 +2189,21 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Copying to or from a physical H register on x86-64 requires a NOREX
// move. Otherwise use a normal move.
if ((isHReg(DestReg) || isHReg(SrcReg)) &&
- TM.getSubtarget<X86Subtarget>().is64Bit())
+ TM.getSubtarget<X86Subtarget>().is64Bit()) {
Opc = X86::MOV8rr_NOREX;
- else
+ // Both operands must be encodable without an REX prefix.
+ assert(X86::GR8_NOREXRegClass.contains(SrcReg, DestReg) &&
+ "8-bit H register can not be copied outside GR8_NOREX");
+ } else
Opc = X86::MOV8rr;
} else if (X86::VR128RegClass.contains(DestReg, SrcReg))
- Opc = X86::MOVAPSrr;
+ Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr;
else if (X86::VR256RegClass.contains(DestReg, SrcReg))
Opc = X86::VMOVAPSYrr;
else if (X86::VR64RegClass.contains(DestReg, SrcReg))
Opc = X86::MMX_MOVQ64rr;
else
- Opc = CopyToFromAsymmetricReg(DestReg, SrcReg);
+ Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, HasAVX);
if (Opc) {
BuildMI(MBB, MI, DL, get(Opc), DestReg)
@@ -2043,6 +2247,7 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
bool isStackAligned,
const TargetMachine &TM,
bool load) {
+ bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
switch (RC->getSize()) {
default:
llvm_unreachable("Unknown spill size");
@@ -2061,7 +2266,9 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
if (X86::GR32RegClass.hasSubClassEq(RC))
return load ? X86::MOV32rm : X86::MOV32mr;
if (X86::FR32RegClass.hasSubClassEq(RC))
- return load ? X86::MOVSSrm : X86::MOVSSmr;
+ return load ?
+ (HasAVX ? X86::VMOVSSrm : X86::MOVSSrm) :
+ (HasAVX ? X86::VMOVSSmr : X86::MOVSSmr);
if (X86::RFP32RegClass.hasSubClassEq(RC))
return load ? X86::LD_Fp32m : X86::ST_Fp32m;
llvm_unreachable("Unknown 4-byte regclass");
@@ -2069,7 +2276,9 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
if (X86::GR64RegClass.hasSubClassEq(RC))
return load ? X86::MOV64rm : X86::MOV64mr;
if (X86::FR64RegClass.hasSubClassEq(RC))
- return load ? X86::MOVSDrm : X86::MOVSDmr;
+ return load ?
+ (HasAVX ? X86::VMOVSDrm : X86::MOVSDrm) :
+ (HasAVX ? X86::VMOVSDmr : X86::MOVSDmr);
if (X86::VR64RegClass.hasSubClassEq(RC))
return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
if (X86::RFP64RegClass.hasSubClassEq(RC))
@@ -2078,13 +2287,18 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
case 10:
assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass");
return load ? X86::LD_Fp80m : X86::ST_FpP80m;
- case 16:
+ case 16: {
assert(X86::VR128RegClass.hasSubClassEq(RC) && "Unknown 16-byte regclass");
// If stack is realigned we can use aligned stores.
if (isStackAligned)
- return load ? X86::MOVAPSrm : X86::MOVAPSmr;
+ return load ?
+ (HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm) :
+ (HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr);
else
- return load ? X86::MOVUPSrm : X86::MOVUPSmr;
+ return load ?
+ (HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm) :
+ (HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr);
+ }
case 32:
assert(X86::VR256RegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass");
// If stack is realigned we can use aligned stores.
@@ -2118,7 +2332,8 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
const MachineFunction &MF = *MBB.getParent();
assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&
"Stack slot too small for store");
- bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) ||
+ unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) ||
RI.canRealignStack(MF);
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
DebugLoc DL = MBB.findDebugLoc(MI);
@@ -2133,7 +2348,9 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
MachineInstr::mmo_iterator MMOBegin,
MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
- bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16;
+ unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ bool isAligned = MMOBegin != MMOEnd &&
+ (*MMOBegin)->getAlignment() >= Alignment;
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
DebugLoc DL;
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
@@ -2151,7 +2368,8 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
- bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) ||
+ unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) ||
RI.canRealignStack(MF);
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
DebugLoc DL = MBB.findDebugLoc(MI);
@@ -2164,7 +2382,9 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
MachineInstr::mmo_iterator MMOBegin,
MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
- bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16;
+ unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ bool isAligned = MMOBegin != MMOEnd &&
+ (*MMOBegin)->getAlignment() >= Alignment;
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
DebugLoc DL;
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
@@ -2174,6 +2394,40 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
NewMIs.push_back(MIB);
}
+/// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr
+/// instruction with two undef reads of the register being defined. This is
+/// used for mapping:
+/// %xmm4 = V_SET0
+/// to:
+/// %xmm4 = PXORrr %xmm4<undef>, %xmm4<undef>
+///
+static bool Expand2AddrUndef(MachineInstr *MI, const MCInstrDesc &Desc) {
+ assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.");
+ unsigned Reg = MI->getOperand(0).getReg();
+ MI->setDesc(Desc);
+
+ // MachineInstr::addOperand() will insert explicit operands before any
+ // implicit operands.
+ MachineInstrBuilder(MI).addReg(Reg, RegState::Undef)
+ .addReg(Reg, RegState::Undef);
+ // But we don't trust that.
+ assert(MI->getOperand(1).getReg() == Reg &&
+ MI->getOperand(2).getReg() == Reg && "Misplaced operand");
+ return true;
+}
+
+bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+ bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
+ switch (MI->getOpcode()) {
+ case X86::V_SET0:
+ return Expand2AddrUndef(MI, get(HasAVX ? X86::VPXORrr : X86::PXORrr));
+ case X86::TEST8ri_NOREX:
+ MI->setDesc(get(X86::TEST8ri));
+ return true;
+ }
+ return false;
+}
+
MachineInstr*
X86InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
int FrameIx, uint64_t Offset,
@@ -2305,7 +2559,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
OpcodeTablePtr->find(MI->getOpcode());
if (I != OpcodeTablePtr->end()) {
unsigned Opcode = I->second.first;
- unsigned MinAlign = I->second.second;
+ unsigned MinAlign = (I->second.second & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT;
if (Align < MinAlign)
return NULL;
bool NarrowToMOV32rm = false;
@@ -2352,6 +2606,51 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
return NULL;
}
+/// hasPartialRegUpdate - Return true for all instructions that only update
+/// the first 32 or 64-bits of the destination register and leave the rest
+/// unmodified. This can be used to avoid folding loads if the instructions
+/// only update part of the destination register, and the non-updated part is
+/// not needed. e.g. cvtss2sd, sqrtss. Unfolding the load from these
+/// instructions breaks the partial register dependency and it can improve
+/// performance. e.g.:
+///
+/// movss (%rdi), %xmm0
+/// cvtss2sd %xmm0, %xmm0
+///
+/// Instead of
+/// cvtss2sd (%rdi), %xmm0
+///
+/// FIXME: This should be turned into a TSFlags.
+///
+static bool hasPartialRegUpdate(unsigned Opcode) {
+ switch (Opcode) {
+ case X86::CVTSD2SSrr:
+ case X86::Int_CVTSD2SSrr:
+ case X86::CVTSS2SDrr:
+ case X86::Int_CVTSS2SDrr:
+ case X86::RCPSSr:
+ case X86::RCPSSr_Int:
+ case X86::ROUNDSDr:
+ case X86::ROUNDSSr:
+ case X86::RSQRTSSr:
+ case X86::RSQRTSSr_Int:
+ case X86::SQRTSSr:
+ case X86::SQRTSSr_Int:
+ // AVX encoded versions
+ case X86::VCVTSD2SSrr:
+ case X86::Int_VCVTSD2SSrr:
+ case X86::VCVTSS2SDrr:
+ case X86::Int_VCVTSS2SDrr:
+ case X86::VRCPSSr:
+ case X86::VROUNDSDr:
+ case X86::VROUNDSSr:
+ case X86::VRSQRTSSr:
+ case X86::VSQRTSSr:
+ return true;
+ }
+
+ return false;
+}
MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
@@ -2360,22 +2659,11 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// Check switch flag
if (NoFusing) return NULL;
- if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
- switch (MI->getOpcode()) {
- case X86::CVTSD2SSrr:
- case X86::Int_CVTSD2SSrr:
- case X86::CVTSS2SDrr:
- case X86::Int_CVTSS2SDrr:
- case X86::RCPSSr:
- case X86::RCPSSr_Int:
- case X86::ROUNDSDr:
- case X86::ROUNDSSr:
- case X86::RSQRTSSr:
- case X86::RSQRTSSr_Int:
- case X86::SQRTSSr:
- case X86::SQRTSSr_Int:
- return 0;
- }
+ // Unless optimizing for size, don't fold to avoid partial
+ // register update stalls
+ if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
+ hasPartialRegUpdate(MI->getOpcode()))
+ return 0;
const MachineFrameInfo *MFI = MF.getFrameInfo();
unsigned Size = MFI->getObjectSize(FrameIndex);
@@ -2412,22 +2700,11 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// Check switch flag
if (NoFusing) return NULL;
- if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
- switch (MI->getOpcode()) {
- case X86::CVTSD2SSrr:
- case X86::Int_CVTSD2SSrr:
- case X86::CVTSS2SDrr:
- case X86::Int_CVTSS2SDrr:
- case X86::RCPSSr:
- case X86::RCPSSr_Int:
- case X86::ROUNDSDr:
- case X86::ROUNDSSr:
- case X86::RSQRTSSr:
- case X86::RSQRTSSr_Int:
- case X86::SQRTSSr:
- case X86::SQRTSSr_Int:
- return 0;
- }
+ // Unless optimizing for size, don't fold to avoid partial
+ // register update stalls
+ if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
+ hasPartialRegUpdate(MI->getOpcode()))
+ return 0;
// Determine the alignment of the load.
unsigned Alignment = 0;
@@ -2439,13 +2716,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
case X86::AVX_SET0PDY:
Alignment = 32;
break;
- case X86::V_SET0PS:
- case X86::V_SET0PD:
- case X86::V_SET0PI:
+ case X86::V_SET0:
case X86::V_SETALLONES:
- case X86::AVX_SET0PS:
- case X86::AVX_SET0PD:
- case X86::AVX_SET0PI:
+ case X86::AVX_SETALLONES:
Alignment = 16;
break;
case X86::FsFLD0SD:
@@ -2481,18 +2754,16 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
SmallVector<MachineOperand,X86::AddrNumOperands> MOs;
switch (LoadMI->getOpcode()) {
- case X86::V_SET0PS:
- case X86::V_SET0PD:
- case X86::V_SET0PI:
+ case X86::V_SET0:
case X86::V_SETALLONES:
- case X86::AVX_SET0PS:
- case X86::AVX_SET0PD:
- case X86::AVX_SET0PI:
case X86::AVX_SET0PSY:
case X86::AVX_SET0PDY:
+ case X86::AVX_SETALLONES:
case X86::FsFLD0SD:
- case X86::FsFLD0SS: {
- // Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure.
+ case X86::FsFLD0SS:
+ case X86::VFsFLD0SD:
+ case X86::VFsFLD0SS: {
+ // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
// Create a constant-pool entry and operands to load from it.
// Medium and large mode can't fold loads this way.
@@ -2515,7 +2786,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// Create a constant-pool entry.
MachineConstantPool &MCP = *MF.getConstantPool();
- const Type *Ty;
+ Type *Ty;
unsigned Opc = LoadMI->getOpcode();
if (Opc == X86::FsFLD0SS || Opc == X86::VFsFLD0SS)
Ty = Type::getFloatTy(MF.getFunction()->getContext());
@@ -2525,9 +2796,10 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
else
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
- const Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ?
- Constant::getAllOnesValue(Ty) :
- Constant::getNullValue(Ty);
+
+ bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX_SETALLONES);
+ const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) :
+ Constant::getNullValue(Ty);
unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
// Create operands to load from the constant pool entry.
@@ -2615,9 +2887,9 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
if (I == MemOp2RegOpTable.end())
return false;
unsigned Opc = I->second.first;
- unsigned Index = I->second.second & 0xf;
- bool FoldedLoad = I->second.second & (1 << 4);
- bool FoldedStore = I->second.second & (1 << 5);
+ unsigned Index = I->second.second & TB_INDEX_MASK;
+ bool FoldedLoad = I->second.second & TB_FOLDED_LOAD;
+ bool FoldedStore = I->second.second & TB_FOLDED_STORE;
if (UnfoldLoad && !FoldedLoad)
return false;
UnfoldLoad &= FoldedLoad;
@@ -2743,9 +3015,9 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
if (I == MemOp2RegOpTable.end())
return false;
unsigned Opc = I->second.first;
- unsigned Index = I->second.second & 0xf;
- bool FoldedLoad = I->second.second & (1 << 4);
- bool FoldedStore = I->second.second & (1 << 5);
+ unsigned Index = I->second.second & TB_INDEX_MASK;
+ bool FoldedLoad = I->second.second & TB_FOLDED_LOAD;
+ bool FoldedStore = I->second.second & TB_FOLDED_STORE;
const MCInstrDesc &MCID = get(Opc);
const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI);
unsigned NumDefs = MCID.NumDefs;
@@ -2780,7 +3052,9 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
!TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
// Do not introduce a slow unaligned load.
return false;
- bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16;
+ unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ bool isAligned = (*MMOs.first) &&
+ (*MMOs.first)->getAlignment() >= Alignment;
Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
VT, MVT::Other, &AddrOps[0], AddrOps.size());
NewNodes.push_back(Load);
@@ -2822,7 +3096,9 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
!TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
// Do not introduce a slow unaligned store.
return false;
- bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16;
+ unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ bool isAligned = (*MMOs.first) &&
+ (*MMOs.first)->getAlignment() >= Alignment;
SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC,
isAligned, TM),
dl, MVT::Other,
@@ -2843,14 +3119,14 @@ unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
MemOp2RegOpTable.find(Opc);
if (I == MemOp2RegOpTable.end())
return 0;
- bool FoldedLoad = I->second.second & (1 << 4);
- bool FoldedStore = I->second.second & (1 << 5);
+ bool FoldedLoad = I->second.second & TB_FOLDED_LOAD;
+ bool FoldedStore = I->second.second & TB_FOLDED_STORE;
if (UnfoldLoad && !FoldedLoad)
return 0;
if (UnfoldStore && !FoldedStore)
return 0;
if (LoadRegIndex)
- *LoadRegIndex = I->second.second & 0xf;
+ *LoadRegIndex = I->second.second & TB_INDEX_MASK;
return I->second.first;
}
@@ -2881,6 +3157,16 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case X86::MOVAPDrm:
case X86::MOVDQArm:
case X86::MOVDQUrm:
+ // AVX load instructions
+ case X86::VMOVSSrm:
+ case X86::VMOVSDrm:
+ case X86::FsVMOVAPSrm:
+ case X86::FsVMOVAPDrm:
+ case X86::VMOVAPSrm:
+ case X86::VMOVUPSrm:
+ case X86::VMOVAPDrm:
+ case X86::VMOVDQArm:
+ case X86::VMOVDQUrm:
case X86::VMOVAPSYrm:
case X86::VMOVUPSYrm:
case X86::VMOVAPDYrm:
@@ -2908,6 +3194,16 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case X86::MOVAPDrm:
case X86::MOVDQArm:
case X86::MOVDQUrm:
+ // AVX load instructions
+ case X86::VMOVSSrm:
+ case X86::VMOVSDrm:
+ case X86::FsVMOVAPSrm:
+ case X86::FsVMOVAPDrm:
+ case X86::VMOVAPSrm:
+ case X86::VMOVUPSrm:
+ case X86::VMOVAPDrm:
+ case X86::VMOVDQArm:
+ case X86::VMOVDQUrm:
case X86::VMOVAPSYrm:
case X86::VMOVUPSYrm:
case X86::VMOVAPDYrm:
@@ -3007,31 +3303,6 @@ isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass);
}
-
-/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or higher)
-/// register? e.g. r8, xmm8, xmm13, etc.
-bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) {
- switch (RegNo) {
- default: break;
- case X86::R8: case X86::R9: case X86::R10: case X86::R11:
- case X86::R12: case X86::R13: case X86::R14: case X86::R15:
- case X86::R8D: case X86::R9D: case X86::R10D: case X86::R11D:
- case X86::R12D: case X86::R13D: case X86::R14D: case X86::R15D:
- case X86::R8W: case X86::R9W: case X86::R10W: case X86::R11W:
- case X86::R12W: case X86::R13W: case X86::R14W: case X86::R15W:
- case X86::R8B: case X86::R9B: case X86::R10B: case X86::R11B:
- case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B:
- case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11:
- case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
- case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11:
- case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15:
- case X86::CR8: case X86::CR9: case X86::CR10: case X86::CR11:
- case X86::CR12: case X86::CR13: case X86::CR14: case X86::CR15:
- return true;
- }
- return false;
-}
-
/// getGlobalBaseReg - Return a virtual register initialized with the
/// the global base register value. Output instructions required to
/// initialize the register in the function entry block, if necessary.
@@ -3072,7 +3343,6 @@ static const unsigned ReplaceableInstrs[][3] = {
{ X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr },
{ X86::ORPSrm, X86::ORPDrm, X86::PORrm },
{ X86::ORPSrr, X86::ORPDrr, X86::PORrr },
- { X86::V_SET0PS, X86::V_SET0PD, X86::V_SET0PI },
{ X86::XORPSrm, X86::XORPDrm, X86::PXORrm },
{ X86::XORPSrr, X86::XORPDrr, X86::PXORrr },
// AVX 128-bit support
@@ -3088,7 +3358,6 @@ static const unsigned ReplaceableInstrs[][3] = {
{ X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr },
{ X86::VORPSrm, X86::VORPDrm, X86::VPORrm },
{ X86::VORPSrr, X86::VORPDrr, X86::VPORrr },
- { X86::AVX_SET0PS, X86::AVX_SET0PD, X86::AVX_SET0PI },
{ X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm },
{ X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr },
// AVX 256-bit support
@@ -3111,13 +3380,13 @@ static const unsigned *lookup(unsigned opcode, unsigned domain) {
}
std::pair<uint16_t, uint16_t>
-X86InstrInfo::GetSSEDomain(const MachineInstr *MI) const {
+X86InstrInfo::getExecutionDomain(const MachineInstr *MI) const {
uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
return std::make_pair(domain,
domain && lookup(MI->getOpcode(), domain) ? 0xe : 0);
}
-void X86InstrInfo::SetSSEDomain(MachineInstr *MI, unsigned Domain) const {
+void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
assert(Domain>0 && Domain<4 && "Invalid execution domain");
uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
assert(dom && "Not an SSE instruction");
@@ -3158,6 +3427,29 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const {
case X86::SQRTSSm_Int:
case X86::SQRTSSr:
case X86::SQRTSSr_Int:
+ // AVX instructions with high latency
+ case X86::VDIVSDrm:
+ case X86::VDIVSDrm_Int:
+ case X86::VDIVSDrr:
+ case X86::VDIVSDrr_Int:
+ case X86::VDIVSSrm:
+ case X86::VDIVSSrm_Int:
+ case X86::VDIVSSrr:
+ case X86::VDIVSSrr_Int:
+ case X86::VSQRTPDm:
+ case X86::VSQRTPDm_Int:
+ case X86::VSQRTPDr:
+ case X86::VSQRTPDr_Int:
+ case X86::VSQRTPSm:
+ case X86::VSQRTPSm_Int:
+ case X86::VSQRTPSr:
+ case X86::VSQRTPSr_Int:
+ case X86::VSQRTSDm:
+ case X86::VSQRTSDm_Int:
+ case X86::VSQRTSDr:
+ case X86::VSQRTSSm:
+ case X86::VSQRTSSm_Int:
+ case X86::VSQRTSSr:
return true;
}
}
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 5f2eba3..97009db 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -27,24 +27,6 @@ namespace llvm {
class X86TargetMachine;
namespace X86 {
- // Enums for memory operand decoding. Each memory operand is represented with
- // a 5 operand sequence in the form:
- // [BaseReg, ScaleAmt, IndexReg, Disp, Segment]
- // These enums help decode this.
- enum {
- AddrBaseReg = 0,
- AddrScaleAmt = 1,
- AddrIndexReg = 2,
- AddrDisp = 3,
-
- /// AddrSegmentReg - The operand # of the segment in the memory operand.
- AddrSegmentReg = 4,
-
- /// AddrNumOperands - Total number of operands in a memory reference.
- AddrNumOperands = 5
- };
-
-
// X86 specific condition code. These correspond to X86_*_COND in
// X86InstrInfo.td. They must be kept in synch.
enum CondCode {
@@ -82,133 +64,8 @@ namespace X86 {
/// GetOppositeBranchCondition - Return the inverse of the specified cond,
/// e.g. turning COND_E to COND_NE.
CondCode GetOppositeBranchCondition(X86::CondCode CC);
+} // end namespace X86;
-}
-
-/// X86II - This namespace holds all of the target specific flags that
-/// instruction info tracks.
-///
-namespace X86II {
- /// Target Operand Flag enum.
- enum TOF {
- //===------------------------------------------------------------------===//
- // X86 Specific MachineOperand flags.
-
- MO_NO_FLAG,
-
- /// MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a
- /// relocation of:
- /// SYMBOL_LABEL + [. - PICBASELABEL]
- MO_GOT_ABSOLUTE_ADDRESS,
-
- /// MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the
- /// immediate should get the value of the symbol minus the PIC base label:
- /// SYMBOL_LABEL - PICBASELABEL
- MO_PIC_BASE_OFFSET,
-
- /// MO_GOT - On a symbol operand this indicates that the immediate is the
- /// offset to the GOT entry for the symbol name from the base of the GOT.
- ///
- /// See the X86-64 ELF ABI supplement for more details.
- /// SYMBOL_LABEL @GOT
- MO_GOT,
-
- /// MO_GOTOFF - On a symbol operand this indicates that the immediate is
- /// the offset to the location of the symbol name from the base of the GOT.
- ///
- /// See the X86-64 ELF ABI supplement for more details.
- /// SYMBOL_LABEL @GOTOFF
- MO_GOTOFF,
-
- /// MO_GOTPCREL - On a symbol operand this indicates that the immediate is
- /// offset to the GOT entry for the symbol name from the current code
- /// location.
- ///
- /// See the X86-64 ELF ABI supplement for more details.
- /// SYMBOL_LABEL @GOTPCREL
- MO_GOTPCREL,
-
- /// MO_PLT - On a symbol operand this indicates that the immediate is
- /// offset to the PLT entry of symbol name from the current code location.
- ///
- /// See the X86-64 ELF ABI supplement for more details.
- /// SYMBOL_LABEL @PLT
- MO_PLT,
-
- /// MO_TLSGD - On a symbol operand this indicates that the immediate is
- /// some TLS offset.
- ///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
- /// SYMBOL_LABEL @TLSGD
- MO_TLSGD,
-
- /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is
- /// some TLS offset.
- ///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
- /// SYMBOL_LABEL @GOTTPOFF
- MO_GOTTPOFF,
-
- /// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is
- /// some TLS offset.
- ///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
- /// SYMBOL_LABEL @INDNTPOFF
- MO_INDNTPOFF,
-
- /// MO_TPOFF - On a symbol operand this indicates that the immediate is
- /// some TLS offset.
- ///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
- /// SYMBOL_LABEL @TPOFF
- MO_TPOFF,
-
- /// MO_NTPOFF - On a symbol operand this indicates that the immediate is
- /// some TLS offset.
- ///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
- /// SYMBOL_LABEL @NTPOFF
- MO_NTPOFF,
-
- /// MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the
- /// reference is actually to the "__imp_FOO" symbol. This is used for
- /// dllimport linkage on windows.
- MO_DLLIMPORT,
-
- /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the
- /// reference is actually to the "FOO$stub" symbol. This is used for calls
- /// and jumps to external functions on Tiger and earlier.
- MO_DARWIN_STUB,
-
- /// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the
- /// reference is actually to the "FOO$non_lazy_ptr" symbol, which is a
- /// non-PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
- MO_DARWIN_NONLAZY,
-
- /// MO_DARWIN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this indicates
- /// that the reference is actually to "FOO$non_lazy_ptr - PICBASE", which is
- /// a PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
- MO_DARWIN_NONLAZY_PIC_BASE,
-
- /// MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this
- /// indicates that the reference is actually to "FOO$non_lazy_ptr -PICBASE",
- /// which is a PIC-base-relative reference to a hidden dyld lazy pointer
- /// stub.
- MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE,
-
- /// MO_TLVP - On a symbol operand this indicates that the immediate is
- /// some TLS offset.
- ///
- /// This is the TLS offset for the Darwin TLS mechanism.
- MO_TLVP,
-
- /// MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate
- /// is some TLS offset from the picbase.
- ///
- /// This is the 32-bit TLS offset for Darwin TLS in PIC mode.
- MO_TLVP_PIC_BASE
- };
-}
/// isGlobalStubReference - Return true if the specified TargetFlag operand is
/// a reference to a stub for a global, not the global itself.
@@ -243,353 +100,6 @@ inline static bool isGlobalRelativeToPICBase(unsigned char TargetFlag) {
}
}
-/// X86II - This namespace holds all of the target specific flags that
-/// instruction info tracks.
-///
-namespace X86II {
- enum {
- //===------------------------------------------------------------------===//
- // Instruction encodings. These are the standard/most common forms for X86
- // instructions.
- //
-
- // PseudoFrm - This represents an instruction that is a pseudo instruction
- // or one that has not been implemented yet. It is illegal to code generate
- // it, but tolerated for intermediate implementation stages.
- Pseudo = 0,
-
- /// Raw - This form is for instructions that don't have any operands, so
- /// they are just a fixed opcode value, like 'leave'.
- RawFrm = 1,
-
- /// AddRegFrm - This form is used for instructions like 'push r32' that have
- /// their one register operand added to their opcode.
- AddRegFrm = 2,
-
- /// MRMDestReg - This form is used for instructions that use the Mod/RM byte
- /// to specify a destination, which in this case is a register.
- ///
- MRMDestReg = 3,
-
- /// MRMDestMem - This form is used for instructions that use the Mod/RM byte
- /// to specify a destination, which in this case is memory.
- ///
- MRMDestMem = 4,
-
- /// MRMSrcReg - This form is used for instructions that use the Mod/RM byte
- /// to specify a source, which in this case is a register.
- ///
- MRMSrcReg = 5,
-
- /// MRMSrcMem - This form is used for instructions that use the Mod/RM byte
- /// to specify a source, which in this case is memory.
- ///
- MRMSrcMem = 6,
-
- /// MRM[0-7][rm] - These forms are used to represent instructions that use
- /// a Mod/RM byte, and use the middle field to hold extended opcode
- /// information. In the intel manual these are represented as /0, /1, ...
- ///
-
- // First, instructions that operate on a register r/m operand...
- MRM0r = 16, MRM1r = 17, MRM2r = 18, MRM3r = 19, // Format /0 /1 /2 /3
- MRM4r = 20, MRM5r = 21, MRM6r = 22, MRM7r = 23, // Format /4 /5 /6 /7
-
- // Next, instructions that operate on a memory r/m operand...
- MRM0m = 24, MRM1m = 25, MRM2m = 26, MRM3m = 27, // Format /0 /1 /2 /3
- MRM4m = 28, MRM5m = 29, MRM6m = 30, MRM7m = 31, // Format /4 /5 /6 /7
-
- // MRMInitReg - This form is used for instructions whose source and
- // destinations are the same register.
- MRMInitReg = 32,
-
- //// MRM_C1 - A mod/rm byte of exactly 0xC1.
- MRM_C1 = 33,
- MRM_C2 = 34,
- MRM_C3 = 35,
- MRM_C4 = 36,
- MRM_C8 = 37,
- MRM_C9 = 38,
- MRM_E8 = 39,
- MRM_F0 = 40,
- MRM_F8 = 41,
- MRM_F9 = 42,
- MRM_D0 = 45,
- MRM_D1 = 46,
-
- /// RawFrmImm8 - This is used for the ENTER instruction, which has two
- /// immediates, the first of which is a 16-bit immediate (specified by
- /// the imm encoding) and the second is a 8-bit fixed value.
- RawFrmImm8 = 43,
-
- /// RawFrmImm16 - This is used for CALL FAR instructions, which have two
- /// immediates, the first of which is a 16 or 32-bit immediate (specified by
- /// the imm encoding) and the second is a 16-bit fixed value. In the AMD
- /// manual, this operand is described as pntr16:32 and pntr16:16
- RawFrmImm16 = 44,
-
- FormMask = 63,
-
- //===------------------------------------------------------------------===//
- // Actual flags...
-
- // OpSize - Set if this instruction requires an operand size prefix (0x66),
- // which most often indicates that the instruction operates on 16 bit data
- // instead of 32 bit data.
- OpSize = 1 << 6,
-
- // AsSize - Set if this instruction requires an operand size prefix (0x67),
- // which most often indicates that the instruction address 16 bit address
- // instead of 32 bit address (or 32 bit address in 64 bit mode).
- AdSize = 1 << 7,
-
- //===------------------------------------------------------------------===//
- // Op0Mask - There are several prefix bytes that are used to form two byte
- // opcodes. These are currently 0x0F, 0xF3, and 0xD8-0xDF. This mask is
- // used to obtain the setting of this field. If no bits in this field is
- // set, there is no prefix byte for obtaining a multibyte opcode.
- //
- Op0Shift = 8,
- Op0Mask = 0x1F << Op0Shift,
-
- // TB - TwoByte - Set if this instruction has a two byte opcode, which
- // starts with a 0x0F byte before the real opcode.
- TB = 1 << Op0Shift,
-
- // REP - The 0xF3 prefix byte indicating repetition of the following
- // instruction.
- REP = 2 << Op0Shift,
-
- // D8-DF - These escape opcodes are used by the floating point unit. These
- // values must remain sequential.
- D8 = 3 << Op0Shift, D9 = 4 << Op0Shift,
- DA = 5 << Op0Shift, DB = 6 << Op0Shift,
- DC = 7 << Op0Shift, DD = 8 << Op0Shift,
- DE = 9 << Op0Shift, DF = 10 << Op0Shift,
-
- // XS, XD - These prefix codes are for single and double precision scalar
- // floating point operations performed in the SSE registers.
- XD = 11 << Op0Shift, XS = 12 << Op0Shift,
-
- // T8, TA, A6, A7 - Prefix after the 0x0F prefix.
- T8 = 13 << Op0Shift, TA = 14 << Op0Shift,
- A6 = 15 << Op0Shift, A7 = 16 << Op0Shift,
-
- // TF - Prefix before and after 0x0F
- TF = 17 << Op0Shift,
-
- //===------------------------------------------------------------------===//
- // REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
- // They are used to specify GPRs and SSE registers, 64-bit operand size,
- // etc. We only cares about REX.W and REX.R bits and only the former is
- // statically determined.
- //
- REXShift = Op0Shift + 5,
- REX_W = 1 << REXShift,
-
- //===------------------------------------------------------------------===//
- // This three-bit field describes the size of an immediate operand. Zero is
- // unused so that we can tell if we forgot to set a value.
- ImmShift = REXShift + 1,
- ImmMask = 7 << ImmShift,
- Imm8 = 1 << ImmShift,
- Imm8PCRel = 2 << ImmShift,
- Imm16 = 3 << ImmShift,
- Imm16PCRel = 4 << ImmShift,
- Imm32 = 5 << ImmShift,
- Imm32PCRel = 6 << ImmShift,
- Imm64 = 7 << ImmShift,
-
- //===------------------------------------------------------------------===//
- // FP Instruction Classification... Zero is non-fp instruction.
-
- // FPTypeMask - Mask for all of the FP types...
- FPTypeShift = ImmShift + 3,
- FPTypeMask = 7 << FPTypeShift,
-
- // NotFP - The default, set for instructions that do not use FP registers.
- NotFP = 0 << FPTypeShift,
-
- // ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0
- ZeroArgFP = 1 << FPTypeShift,
-
- // OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst
- OneArgFP = 2 << FPTypeShift,
-
- // OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a
- // result back to ST(0). For example, fcos, fsqrt, etc.
- //
- OneArgFPRW = 3 << FPTypeShift,
-
- // TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an
- // explicit argument, storing the result to either ST(0) or the implicit
- // argument. For example: fadd, fsub, fmul, etc...
- TwoArgFP = 4 << FPTypeShift,
-
- // CompareFP - 2 arg FP instructions which implicitly read ST(0) and an
- // explicit argument, but have no destination. Example: fucom, fucomi, ...
- CompareFP = 5 << FPTypeShift,
-
- // CondMovFP - "2 operand" floating point conditional move instructions.
- CondMovFP = 6 << FPTypeShift,
-
- // SpecialFP - Special instruction forms. Dispatch by opcode explicitly.
- SpecialFP = 7 << FPTypeShift,
-
- // Lock prefix
- LOCKShift = FPTypeShift + 3,
- LOCK = 1 << LOCKShift,
-
- // Segment override prefixes. Currently we just need ability to address
- // stuff in gs and fs segments.
- SegOvrShift = LOCKShift + 1,
- SegOvrMask = 3 << SegOvrShift,
- FS = 1 << SegOvrShift,
- GS = 2 << SegOvrShift,
-
- // Execution domain for SSE instructions in bits 23, 24.
- // 0 in bits 23-24 means normal, non-SSE instruction.
- SSEDomainShift = SegOvrShift + 2,
-
- OpcodeShift = SSEDomainShift + 2,
-
- //===------------------------------------------------------------------===//
- /// VEX - The opcode prefix used by AVX instructions
- VEXShift = OpcodeShift + 8,
- VEX = 1U << 0,
-
- /// VEX_W - Has a opcode specific functionality, but is used in the same
- /// way as REX_W is for regular SSE instructions.
- VEX_W = 1U << 1,
-
- /// VEX_4V - Used to specify an additional AVX/SSE register. Several 2
- /// address instructions in SSE are represented as 3 address ones in AVX
- /// and the additional register is encoded in VEX_VVVV prefix.
- VEX_4V = 1U << 2,
-
- /// VEX_I8IMM - Specifies that the last register used in a AVX instruction,
- /// must be encoded in the i8 immediate field. This usually happens in
- /// instructions with 4 operands.
- VEX_I8IMM = 1U << 3,
-
- /// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
- /// instruction uses 256-bit wide registers. This is usually auto detected
- /// if a VR256 register is used, but some AVX instructions also have this
- /// field marked when using a f256 memory references.
- VEX_L = 1U << 4,
-
- /// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the
- /// wacky 0x0F 0x0F prefix for 3DNow! instructions. The manual documents
- /// this as having a 0x0F prefix with a 0x0F opcode, and each instruction
- /// storing a classifier in the imm8 field. To simplify our implementation,
- /// we handle this by storeing the classifier in the opcode field and using
- /// this flag to indicate that the encoder should do the wacky 3DNow! thing.
- Has3DNow0F0FOpcode = 1U << 5
- };
-
- // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
- // specified machine instruction.
- //
- static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) {
- return TSFlags >> X86II::OpcodeShift;
- }
-
- static inline bool hasImm(uint64_t TSFlags) {
- return (TSFlags & X86II::ImmMask) != 0;
- }
-
- /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field
- /// of the specified instruction.
- static inline unsigned getSizeOfImm(uint64_t TSFlags) {
- switch (TSFlags & X86II::ImmMask) {
- default: assert(0 && "Unknown immediate size");
- case X86II::Imm8:
- case X86II::Imm8PCRel: return 1;
- case X86II::Imm16:
- case X86II::Imm16PCRel: return 2;
- case X86II::Imm32:
- case X86II::Imm32PCRel: return 4;
- case X86II::Imm64: return 8;
- }
- }
-
- /// isImmPCRel - Return true if the immediate of the specified instruction's
- /// TSFlags indicates that it is pc relative.
- static inline unsigned isImmPCRel(uint64_t TSFlags) {
- switch (TSFlags & X86II::ImmMask) {
- default: assert(0 && "Unknown immediate size");
- case X86II::Imm8PCRel:
- case X86II::Imm16PCRel:
- case X86II::Imm32PCRel:
- return true;
- case X86II::Imm8:
- case X86II::Imm16:
- case X86II::Imm32:
- case X86II::Imm64:
- return false;
- }
- }
-
- /// getMemoryOperandNo - The function returns the MCInst operand # for the
- /// first field of the memory operand. If the instruction doesn't have a
- /// memory operand, this returns -1.
- ///
- /// Note that this ignores tied operands. If there is a tied register which
- /// is duplicated in the MCInst (e.g. "EAX = addl EAX, [mem]") it is only
- /// counted as one operand.
- ///
- static inline int getMemoryOperandNo(uint64_t TSFlags) {
- switch (TSFlags & X86II::FormMask) {
- case X86II::MRMInitReg: assert(0 && "FIXME: Remove this form");
- default: assert(0 && "Unknown FormMask value in getMemoryOperandNo!");
- case X86II::Pseudo:
- case X86II::RawFrm:
- case X86II::AddRegFrm:
- case X86II::MRMDestReg:
- case X86II::MRMSrcReg:
- case X86II::RawFrmImm8:
- case X86II::RawFrmImm16:
- return -1;
- case X86II::MRMDestMem:
- return 0;
- case X86II::MRMSrcMem: {
- bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
- unsigned FirstMemOp = 1;
- if (HasVEX_4V)
- ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
-
- // FIXME: Maybe lea should have its own form? This is a horrible hack.
- //if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
- // Opcode == X86::LEA16r || Opcode == X86::LEA32r)
- return FirstMemOp;
- }
- case X86II::MRM0r: case X86II::MRM1r:
- case X86II::MRM2r: case X86II::MRM3r:
- case X86II::MRM4r: case X86II::MRM5r:
- case X86II::MRM6r: case X86II::MRM7r:
- return -1;
- case X86II::MRM0m: case X86II::MRM1m:
- case X86II::MRM2m: case X86II::MRM3m:
- case X86II::MRM4m: case X86II::MRM5m:
- case X86II::MRM6m: case X86II::MRM7m:
- return 0;
- case X86II::MRM_C1:
- case X86II::MRM_C2:
- case X86II::MRM_C3:
- case X86II::MRM_C4:
- case X86II::MRM_C8:
- case X86II::MRM_C9:
- case X86II::MRM_E8:
- case X86II::MRM_F0:
- case X86II::MRM_F8:
- case X86II::MRM_F9:
- case X86II::MRM_D0:
- case X86II::MRM_D1:
- return -1;
- }
- }
-}
-
inline static bool isScale(const MachineOperand &MO) {
return MO.isImm() &&
(MO.getImm() == 1 || MO.getImm() == 2 ||
@@ -621,14 +131,22 @@ class X86InstrInfo : public X86GenInstrInfo {
/// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1,
/// RegOp2MemOpTable2 - Load / store folding opcode maps.
///
- DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2Addr;
- DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable0;
- DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable1;
- DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2;
+ typedef DenseMap<unsigned,
+ std::pair<unsigned, unsigned> > RegOp2MemOpTableType;
+ RegOp2MemOpTableType RegOp2MemOpTable2Addr;
+ RegOp2MemOpTableType RegOp2MemOpTable0;
+ RegOp2MemOpTableType RegOp2MemOpTable1;
+ RegOp2MemOpTableType RegOp2MemOpTable2;
/// MemOp2RegOpTable - Load / store unfolding opcode map.
///
- DenseMap<unsigned, std::pair<unsigned, unsigned> > MemOp2RegOpTable;
+ typedef DenseMap<unsigned,
+ std::pair<unsigned, unsigned> > MemOp2RegOpTableType;
+ MemOp2RegOpTableType MemOp2RegOpTable;
+
+ void AddTableEntry(RegOp2MemOpTableType &R2MTable,
+ MemOp2RegOpTableType &M2RTable,
+ unsigned RegOp, unsigned MemOp, unsigned Flags);
public:
explicit X86InstrInfo(X86TargetMachine &tm);
@@ -656,17 +174,6 @@ public:
unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const;
- /// hasLoadFromStackSlot - If the specified machine instruction has
- /// a load from a stack slot, return true along with the FrameIndex
- /// of the loaded stack slot and the machine mem operand containing
- /// the reference. If not, return false. Unlike
- /// isLoadFromStackSlot, this returns true for any instructions that
- /// loads from the stack. This is a hint only and may not catch all
- /// cases.
- bool hasLoadFromStackSlot(const MachineInstr *MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const;
-
unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
/// isStoreToStackSlotPostFE - Check for post-frame ptr elimination
/// stack locations as well. This uses a heuristic so it isn't
@@ -674,16 +181,6 @@ public:
unsigned isStoreToStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const;
- /// hasStoreToStackSlot - If the specified machine instruction has a
- /// store to a stack slot, return true along with the FrameIndex of
- /// the loaded stack slot and the machine mem operand containing the
- /// reference. If not, return false. Unlike isStoreToStackSlot,
- /// this returns true for any instructions that loads from the
- /// stack. This is a hint only and may not catch all cases.
- bool hasStoreToStackSlot(const MachineInstr *MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const;
-
bool isReallyTriviallyReMaterializable(const MachineInstr *MI,
AliasAnalysis *AA) const;
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
@@ -750,6 +247,9 @@ public:
MachineInstr::mmo_iterator MMOBegin,
MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
virtual
MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
int FrameIx, uint64_t Offset,
@@ -829,32 +329,21 @@ public:
/// instruction that defines the specified register class.
bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
- static bool isX86_64NonExtLowByteReg(unsigned reg) {
- return (reg == X86::SPL || reg == X86::BPL ||
- reg == X86::SIL || reg == X86::DIL);
- }
-
static bool isX86_64ExtendedReg(const MachineOperand &MO) {
if (!MO.isReg()) return false;
- return isX86_64ExtendedReg(MO.getReg());
+ return X86II::isX86_64ExtendedReg(MO.getReg());
}
- /// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or
- /// higher) register? e.g. r8, xmm8, xmm13, etc.
- static bool isX86_64ExtendedReg(unsigned RegNo);
-
/// getGlobalBaseReg - Return a virtual register initialized with the
/// the global base register value. Output instructions required to
/// initialize the register in the function entry block, if necessary.
///
unsigned getGlobalBaseReg(MachineFunction *MF) const;
- /// GetSSEDomain - Return the SSE execution domain of MI as the first element,
- /// and a bitmask of possible arguments to SetSSEDomain ase the second.
- std::pair<uint16_t, uint16_t> GetSSEDomain(const MachineInstr *MI) const;
+ std::pair<uint16_t, uint16_t>
+ getExecutionDomain(const MachineInstr *MI) const;
- /// SetSSEDomain - Set the SSEDomain of MI.
- void SetSSEDomain(MachineInstr *MI, unsigned Domain) const;
+ void setExecutionDomain(MachineInstr *MI, unsigned Domain) const;
MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr* MI,
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 7eb07b0..d54bf27 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -65,7 +65,7 @@ def SDTX86SetCC_C : SDTypeProfile<1, 2,
def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>,
SDTCisVT<2, i8>]>;
-def SDTX86cas8 : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def SDTX86caspair : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def SDTX86atomicBinary : SDTypeProfile<2, 3, [SDTCisInt<0>, SDTCisInt<1>,
SDTCisPtrTy<2>, SDTCisInt<3>,SDTCisInt<4>]>;
@@ -97,6 +97,8 @@ def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
+
def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
@@ -133,9 +135,13 @@ def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>;
def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
-def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86cas8,
+def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86caspair,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86cas16 : SDNode<"X86ISD::LCMPXCHG16_DAG", SDTX86caspair,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
+
def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary,
[SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
@@ -218,12 +224,16 @@ def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
+def X86andn_flag : SDNode<"X86ISD::ANDN", SDTBinaryArithWithFlags>;
def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDTX86Void,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA,
+ [SDNPHasChain]>;
+
def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
@@ -331,6 +341,11 @@ class ImmSExtAsmOperandClass : AsmOperandClass {
let RenderMethod = "addImmOperands";
}
+class ImmZExtAsmOperandClass : AsmOperandClass {
+ let SuperClasses = [ImmAsmOperand];
+ let RenderMethod = "addImmOperands";
+}
+
// Sign-extended immediate classes. We don't need to define the full lattice
// here because there is no instruction with an ambiguity between ImmSExti64i32
// and ImmSExti32i8.
@@ -358,6 +373,12 @@ def ImmSExti32i8AsmOperand : ImmSExtAsmOperandClass {
let Name = "ImmSExti32i8";
}
+// [0, 0x000000FF]
+def ImmZExtu32u8AsmOperand : ImmZExtAsmOperandClass {
+ let Name = "ImmZExtu32u8";
+}
+
+
// [0, 0x0000007F] |
// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass {
@@ -377,6 +398,11 @@ def i32i8imm : Operand<i32> {
let ParserMatchClass = ImmSExti32i8AsmOperand;
let OperandType = "OPERAND_IMMEDIATE";
}
+// 32-bits but only 8 bits are significant, and those 8 bits are unsigned.
+def u32u8imm : Operand<i32> {
+ let ParserMatchClass = ImmZExtu32u8AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
// 64-bits but only 32 bits are significant.
def i64i32imm : Operand<i64> {
@@ -389,11 +415,13 @@ def i64i32imm : Operand<i64> {
def i64i32imm_pcrel : Operand<i64> {
let PrintMethod = "print_pcrel_imm";
let ParserMatchClass = X86AbsMemAsmOperand;
+ let OperandType = "OPERAND_PCREL";
}
// 64-bits but only 8 bits are significant.
def i64i8imm : Operand<i64> {
let ParserMatchClass = ImmSExti64i8AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
}
def lea64_32mem : Operand<i32> {
@@ -442,18 +470,33 @@ def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
def HasAVX : Predicate<"Subtarget->hasAVX()">;
def HasXMMInt : Predicate<"Subtarget->hasXMMInt()">;
+def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
def HasAES : Predicate<"Subtarget->hasAES()">;
def HasCLMUL : Predicate<"Subtarget->hasCLMUL()">;
def HasFMA3 : Predicate<"Subtarget->hasFMA3()">;
def HasFMA4 : Predicate<"Subtarget->hasFMA4()">;
+def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">;
+def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">;
+def HasF16C : Predicate<"Subtarget->hasF16C()">;
+def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">;
+def HasBMI : Predicate<"Subtarget->hasBMI()">;
def FPStackf32 : Predicate<"!Subtarget->hasXMM()">;
def FPStackf64 : Predicate<"!Subtarget->hasXMMInt()">;
+def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
def In32BitMode : Predicate<"!Subtarget->is64Bit()">,
AssemblerPredicate<"!Mode64Bit">;
def In64BitMode : Predicate<"Subtarget->is64Bit()">,
AssemblerPredicate<"Mode64Bit">;
def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">;
+def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">,
+ AssemblerPredicate<"ModeNaCl">;
+def IsNaCl32 : Predicate<"Subtarget->isTargetNaCl32()">,
+ AssemblerPredicate<"ModeNaCl,!Mode64Bit">;
+def IsNaCl64 : Predicate<"Subtarget->isTargetNaCl64()">,
+ AssemblerPredicate<"ModeNaCl,Mode64Bit">;
+def NotNaCl : Predicate<"!Subtarget->isTargetNaCl()">,
+ AssemblerPredicate<"!ModeNaCl">;
def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;
def KernelCode : Predicate<"TM.getCodeModel() == CodeModel::Kernel">;
def FarData : Predicate<"TM.getCodeModel() != CodeModel::Small &&"
@@ -766,30 +809,30 @@ def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in {
-def MOVSB : I<0xA4, RawFrm, (outs), (ins), "{movsb}", []>;
-def MOVSW : I<0xA5, RawFrm, (outs), (ins), "{movsw}", []>, OpSize;
-def MOVSD : I<0xA5, RawFrm, (outs), (ins), "{movsl|movsd}", []>;
+def MOVSB : I<0xA4, RawFrm, (outs), (ins), "movsb", []>;
+def MOVSW : I<0xA5, RawFrm, (outs), (ins), "movsw", []>, OpSize;
+def MOVSD : I<0xA5, RawFrm, (outs), (ins), "movs{l|d}", []>;
def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>;
}
// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
let Defs = [EDI], Uses = [AL,EDI,EFLAGS] in
-def STOSB : I<0xAA, RawFrm, (outs), (ins), "{stosb}", []>;
+def STOSB : I<0xAA, RawFrm, (outs), (ins), "stosb", []>;
let Defs = [EDI], Uses = [AX,EDI,EFLAGS] in
-def STOSW : I<0xAB, RawFrm, (outs), (ins), "{stosw}", []>, OpSize;
+def STOSW : I<0xAB, RawFrm, (outs), (ins), "stosw", []>, OpSize;
let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in
-def STOSD : I<0xAB, RawFrm, (outs), (ins), "{stosl|stosd}", []>;
+def STOSD : I<0xAB, RawFrm, (outs), (ins), "stos{l|d}", []>;
let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in
def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>;
-def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scas{b}", []>;
-def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scas{w}", []>, OpSize;
-def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l}", []>;
+def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scasb", []>;
+def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scasw", []>, OpSize;
+def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l|d}", []>;
def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>;
-def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmps{b}", []>;
-def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmps{w}", []>, OpSize;
-def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l}", []>;
+def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmpsb", []>;
+def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmpsw", []>, OpSize;
+def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l|d}", []>;
def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>;
@@ -841,22 +884,22 @@ def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
/// moffs8, moffs16 and moffs32 versions of moves. The immediate is a
/// 32-bit offset from the PC. These are only valid in x86-32 mode.
def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src),
- "mov{b}\t{$src, %al|%al, $src}", []>,
+ "mov{b}\t{$src, %al|AL, $src}", []>,
Requires<[In32BitMode]>;
def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src),
- "mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize,
+ "mov{w}\t{$src, %ax|AL, $src}", []>, OpSize,
Requires<[In32BitMode]>;
def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src),
- "mov{l}\t{$src, %eax|%eax, $src}", []>,
+ "mov{l}\t{$src, %eax|EAX, $src}", []>,
Requires<[In32BitMode]>;
def MOV8ao8 : Ii32 <0xA2, RawFrm, (outs offset8:$dst), (ins),
- "mov{b}\t{%al, $dst|$dst, %al}", []>,
+ "mov{b}\t{%al, $dst|$dst, AL}", []>,
Requires<[In32BitMode]>;
def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins),
- "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize,
+ "mov{w}\t{%ax, $dst|$dst, AL}", []>, OpSize,
Requires<[In32BitMode]>;
def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
- "mov{l}\t{%eax, $dst|$dst, %eax}", []>,
+ "mov{l}\t{%eax, $dst|$dst, EAX}", []>,
Requires<[In32BitMode]>;
// FIXME: These definitions are utterly broken
@@ -865,13 +908,13 @@ def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
// in question.
/*
def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src),
- "mov{q}\t{$src, %rax|%rax, $src}", []>;
+ "mov{q}\t{$src, %rax|RAX, $src}", []>;
def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src),
- "mov{q}\t{$src, %rax|%rax, $src}", []>;
+ "mov{q}\t{$src, %rax|RAX, $src}", []>;
def MOV64ao8 : RIi8<0xA2, RawFrm, (outs offset8:$dst), (ins),
- "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
+ "mov{q}\t{%rax, $dst|$dst, RAX}", []>;
def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins),
- "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
+ "mov{q}\t{%rax, $dst|$dst, RAX}", []>;
*/
@@ -926,7 +969,7 @@ let mayStore = 1 in
def MOV8mr_NOREX : I<0x88, MRMDestMem,
(outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src),
"mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>;
-let mayLoad = 1,
+let mayLoad = 1, neverHasSideEffects = 1,
canFoldAsLoad = 1, isReMaterializable = 1 in
def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
(outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src),
@@ -1117,11 +1160,15 @@ def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src),
}
def XCHG16ar : I<0x90, AddRegFrm, (outs), (ins GR16:$src),
- "xchg{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+ "xchg{w}\t{$src, %ax|AX, $src}", []>, OpSize;
def XCHG32ar : I<0x90, AddRegFrm, (outs), (ins GR32:$src),
- "xchg{l}\t{$src, %eax|%eax, $src}", []>;
+ "xchg{l}\t{$src, %eax|EAX, $src}", []>, Requires<[In32BitMode]>;
+// Uses GR32_NOAX in 64-bit mode to prevent encoding using the 0x90 NOP encoding.
+// xchg %eax, %eax needs to clear upper 32-bits of RAX so is not a NOP.
+def XCHG32ar64 : I<0x90, AddRegFrm, (outs), (ins GR32_NOAX:$src),
+ "xchg{l}\t{$src, %eax|EAX, $src}", []>, Requires<[In64BitMode]>;
def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src),
- "xchg{q}\t{$src, %rax|%rax, $src}", []>;
+ "xchg{q}\t{$src, %rax|RAX, $src}", []>;
@@ -1172,7 +1219,7 @@ def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
- "cmpxchg16b\t$dst", []>, TB;
+ "cmpxchg16b\t$dst", []>, TB, Requires<[HasCmpxchg16b]>;
@@ -1261,6 +1308,104 @@ def ARPL16mr : I<0x63, MRMSrcMem, (outs GR16:$src), (ins i16mem:$dst),
"arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>;
//===----------------------------------------------------------------------===//
+// MOVBE Instructions
+//
+let Predicates = [HasMOVBE] in {
+ def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "movbe{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (bswap (loadi16 addr:$src)))]>, OpSize, T8;
+ def MOVBE32rm : I<0xF0, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "movbe{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (bswap (loadi32 addr:$src)))]>, T8;
+ def MOVBE64rm : RI<0xF0, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "movbe{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (bswap (loadi64 addr:$src)))]>, T8;
+ def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
+ "movbe{w}\t{$src, $dst|$dst, $src}",
+ [(store (bswap GR16:$src), addr:$dst)]>, OpSize, T8;
+ def MOVBE32mr : I<0xF1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "movbe{l}\t{$src, $dst|$dst, $src}",
+ [(store (bswap GR32:$src), addr:$dst)]>, T8;
+ def MOVBE64mr : RI<0xF1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "movbe{q}\t{$src, $dst|$dst, $src}",
+ [(store (bswap GR64:$src), addr:$dst)]>, T8;
+}
+
+//===----------------------------------------------------------------------===//
+// RDRAND Instruction
+//
+let Predicates = [HasRDRAND], Defs = [EFLAGS] in {
+ def RDRAND16r : I<0xC7, MRM6r, (outs GR16:$dst), (ins),
+ "rdrand{w}\t$dst", []>, OpSize, TB;
+ def RDRAND32r : I<0xC7, MRM6r, (outs GR32:$dst), (ins),
+ "rdrand{l}\t$dst", []>, TB;
+ def RDRAND64r : RI<0xC7, MRM6r, (outs GR64:$dst), (ins),
+ "rdrand{q}\t$dst", []>, TB;
+}
+
+//===----------------------------------------------------------------------===//
+// LZCNT Instruction
+//
+let Predicates = [HasLZCNT], Defs = [EFLAGS] in {
+ def LZCNT16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "lzcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (ctlz GR16:$src)), (implicit EFLAGS)]>, XS,
+ OpSize;
+ def LZCNT16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "lzcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (ctlz (loadi16 addr:$src))),
+ (implicit EFLAGS)]>, XS, OpSize;
+
+ def LZCNT32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "lzcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (ctlz GR32:$src)), (implicit EFLAGS)]>, XS;
+ def LZCNT32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "lzcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (ctlz (loadi32 addr:$src))),
+ (implicit EFLAGS)]>, XS;
+
+ def LZCNT64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "lzcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (ctlz GR64:$src)), (implicit EFLAGS)]>,
+ XS;
+ def LZCNT64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "lzcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (ctlz (loadi64 addr:$src))),
+ (implicit EFLAGS)]>, XS;
+}
+
+//===----------------------------------------------------------------------===//
+// TZCNT Instruction
+//
+let Predicates = [HasBMI], Defs = [EFLAGS] in {
+ def TZCNT16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "tzcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (cttz GR16:$src)), (implicit EFLAGS)]>, XS,
+ OpSize;
+ def TZCNT16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "tzcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (cttz (loadi16 addr:$src))),
+ (implicit EFLAGS)]>, XS, OpSize;
+
+ def TZCNT32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "tzcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (cttz GR32:$src)), (implicit EFLAGS)]>, XS;
+ def TZCNT32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "tzcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (cttz (loadi32 addr:$src))),
+ (implicit EFLAGS)]>, XS;
+
+ def TZCNT64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "tzcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (cttz GR64:$src)), (implicit EFLAGS)]>,
+ XS;
+ def TZCNT64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "tzcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (cttz (loadi64 addr:$src))),
+ (implicit EFLAGS)]>, XS;
+}
+
+//===----------------------------------------------------------------------===//
// Subsystems.
//===----------------------------------------------------------------------===//
@@ -1646,3 +1791,9 @@ def : InstAlias<"xchgb $mem, $val", (XCHG8rm GR8 :$val, i8mem :$mem)>;
def : InstAlias<"xchgw $mem, $val", (XCHG16rm GR16:$val, i16mem:$mem)>;
def : InstAlias<"xchgl $mem, $val", (XCHG32rm GR32:$val, i32mem:$mem)>;
def : InstAlias<"xchgq $mem, $val", (XCHG64rm GR64:$val, i64mem:$mem)>;
+
+// xchg: We accept "xchgX <reg>, %eax" and "xchgX %eax, <reg>" as synonyms.
+def : InstAlias<"xchgw %ax, $src", (XCHG16ar GR16:$src)>;
+def : InstAlias<"xchgl %eax, $src", (XCHG32ar GR32:$src)>, Requires<[In32BitMode]>;
+def : InstAlias<"xchgl %eax, $src", (XCHG32ar64 GR32_NOAX:$src)>, Requires<[In64BitMode]>;
+def : InstAlias<"xchgq %rax, $src", (XCHG64ar GR64:$src)>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index fe11d77..d3ced23 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -116,7 +116,217 @@ multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
}
//===----------------------------------------------------------------------===//
-// SSE 1 & 2 - Move Instructions
+// Non-instruction patterns
+//===----------------------------------------------------------------------===//
+
+// A vector extract of the first f32/f64 position is a subregister copy
+def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+ (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+ (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+
+// A 128-bit subvector extract from the first 256-bit vector position
+// is a subregister copy that needs no instruction.
+def : Pat<(v4i32 (extract_subvector (v8i32 VR256:$src), (i32 0))),
+ (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm))>;
+def : Pat<(v4f32 (extract_subvector (v8f32 VR256:$src), (i32 0))),
+ (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm))>;
+
+def : Pat<(v2i64 (extract_subvector (v4i64 VR256:$src), (i32 0))),
+ (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm))>;
+def : Pat<(v2f64 (extract_subvector (v4f64 VR256:$src), (i32 0))),
+ (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm))>;
+
+def : Pat<(v8i16 (extract_subvector (v16i16 VR256:$src), (i32 0))),
+ (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src), sub_xmm))>;
+def : Pat<(v16i8 (extract_subvector (v32i8 VR256:$src), (i32 0))),
+ (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src), sub_xmm))>;
+
+// A 128-bit subvector insert to the first 256-bit vector position
+// is a subregister copy that needs no instruction.
+def : Pat<(insert_subvector undef, (v2i64 VR128:$src), (i32 0)),
+ (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+def : Pat<(insert_subvector undef, (v2f64 VR128:$src), (i32 0)),
+ (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+def : Pat<(insert_subvector undef, (v4i32 VR128:$src), (i32 0)),
+ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+def : Pat<(insert_subvector undef, (v4f32 VR128:$src), (i32 0)),
+ (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+def : Pat<(insert_subvector undef, (v8i16 VR128:$src), (i32 0)),
+ (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (i32 0)),
+ (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+
+// Implicitly promote a 32-bit scalar to a vector.
+def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
+def : Pat<(v8f32 (scalar_to_vector FR32:$src)),
+ (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
+// Implicitly promote a 64-bit scalar to a vector.
+def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
+def : Pat<(v4f64 (scalar_to_vector FR64:$src)),
+ (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
+
+// Bitcasts between 128-bit vector types. Return the original type since
+// no instruction is needed for the conversion
+let Predicates = [HasXMMInt] in {
+ def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
+}
+
+// Bitcasts between 256-bit vector types. Return the original type since
+// no instruction is needed for the conversion
+let Predicates = [HasAVX] in {
+ def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
+ def : Pat<(v4f64 (bitconvert (v8i32 VR256:$src))), (v4f64 VR256:$src)>;
+ def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>;
+ def : Pat<(v4f64 (bitconvert (v16i16 VR256:$src))), (v4f64 VR256:$src)>;
+ def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v8i32 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v16i16 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v8i32 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v16i16 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v16i16 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>;
+ def : Pat<(v8i32 (bitconvert (v16i16 VR256:$src))), (v8i32 VR256:$src)>;
+ def : Pat<(v8i32 (bitconvert (v8f32 VR256:$src))), (v8i32 VR256:$src)>;
+ def : Pat<(v8i32 (bitconvert (v4i64 VR256:$src))), (v8i32 VR256:$src)>;
+ def : Pat<(v8i32 (bitconvert (v4f64 VR256:$src))), (v8i32 VR256:$src)>;
+ def : Pat<(v16i16 (bitconvert (v8f32 VR256:$src))), (v16i16 VR256:$src)>;
+ def : Pat<(v16i16 (bitconvert (v8i32 VR256:$src))), (v16i16 VR256:$src)>;
+ def : Pat<(v16i16 (bitconvert (v4i64 VR256:$src))), (v16i16 VR256:$src)>;
+ def : Pat<(v16i16 (bitconvert (v4f64 VR256:$src))), (v16i16 VR256:$src)>;
+ def : Pat<(v16i16 (bitconvert (v32i8 VR256:$src))), (v16i16 VR256:$src)>;
+}
+
+// Alias instructions that map fld0 to pxor for sse.
+// FIXME: Set encoding to pseudo!
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
+ canFoldAsLoad = 1 in {
+ def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
+ [(set FR32:$dst, fp32imm0)]>,
+ Requires<[HasSSE1]>, TB, OpSize;
+ def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
+ [(set FR64:$dst, fpimm0)]>,
+ Requires<[HasSSE2]>, TB, OpSize;
+ def VFsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
+ [(set FR32:$dst, fp32imm0)]>,
+ Requires<[HasAVX]>, TB, OpSize, VEX_4V;
+ def VFsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
+ [(set FR64:$dst, fpimm0)]>,
+ Requires<[HasAVX]>, TB, OpSize, VEX_4V;
+}
+
+//===----------------------------------------------------------------------===//
+// AVX & SSE - Zero/One Vectors
+//===----------------------------------------------------------------------===//
+
+// Alias instruction that maps zero vector to pxor / xorp* for sse.
+// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
+// swizzled by ExecutionDepsFix to pxor.
+// We set canFoldAsLoad because this can be converted to a constant-pool
+// load of an all-zeros value if folding it would be beneficial.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isPseudo = 1, neverHasSideEffects = 1 in {
+def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", []>;
+}
+
+def : Pat<(v4f32 immAllZerosV), (V_SET0)>;
+def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
+def : Pat<(v4i32 immAllZerosV), (V_SET0)>;
+def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
+def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
+def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
+
+
+// The same as done above but for AVX. The 256-bit ISA does not support PI,
+// and doesn't need it because on sandy bridge the register is set to zero
+// at the rename stage without using any execution unit, so SET0PSY
+// and SET0PDY can be used for vector int instructions without penalty
+// FIXME: Change encoding to pseudo! This is blocked right now by the x86
+// JIT implementatioan, it does not expand the instructions below like
+// X86MCInstLower does.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isCodeGenOnly = 1, Predicates = [HasAVX] in {
+def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
+ [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V;
+def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
+ [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V;
+}
+
+
+// AVX has no support for 256-bit integer instructions, but since the 128-bit
+// VPXOR instruction writes zero to its upper part, it's safe build zeros.
+def : Pat<(v8i32 immAllZerosV), (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>;
+def : Pat<(bc_v8i32 (v8f32 immAllZerosV)),
+ (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>;
+
+def : Pat<(v4i64 immAllZerosV), (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>;
+def : Pat<(bc_v4i64 (v8f32 immAllZerosV)),
+ (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>;
+
+// We set canFoldAsLoad because this can be converted to a constant-pool
+// load of an all-ones value if folding it would be beneficial.
+// FIXME: Change encoding to pseudo! This is blocked right now by the x86
+// JIT implementation, it does not expand the instructions below like
+// X86MCInstLower does.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isCodeGenOnly = 1, ExeDomain = SSEPackedInt in
+ def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4i32 immAllOnesV))]>;
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isCodeGenOnly = 1, ExeDomain = SSEPackedInt, Predicates = [HasAVX] in
+ def AVX_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4i32 immAllOnesV))]>, VEX_4V;
+
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Move FP Scalar Instructions
+//
+// Move Instructions. Register-to-register movss/movsd is not used for FR32/64
+// register copies because it's a partial register update; FsMOVAPSrr/FsMOVAPDrr
+// is used instead. Register-to-register movss/movsd is not modeled as an
+// INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable
+// in terms of a copy, and just mentioned, we don't use movss/movsd for copies.
//===----------------------------------------------------------------------===//
class sse12_move_rr<RegisterClass RC, ValueType vt, string asm> :
@@ -130,28 +340,57 @@ class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (mem_pat addr:$src))]>;
-// Move Instructions. Register-to-register movss/movsd is not used for FR32/64
-// register copies because it's a partial register update; FsMOVAPSrr/FsMOVAPDrr
-// is used instead. Register-to-register movss/movsd is not modeled as an
-// INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable
-// in terms of a copy, and just mentioned, we don't use movss/movsd for copies.
+// AVX
def VMOVSSrr : sse12_move_rr<FR32, v4f32,
- "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V;
+ "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V,
+ VEX_LIG;
def VMOVSDrr : sse12_move_rr<FR64, v2f64,
- "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V;
+ "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V,
+ VEX_LIG;
-let canFoldAsLoad = 1, isReMaterializable = 1 in {
- def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX;
+// For the disassembler
+let isCodeGenOnly = 1 in {
+ def VMOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src1, FR32:$src2),
+ "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ XS, VEX_4V, VEX_LIG;
+ def VMOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src1, FR64:$src2),
+ "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ XD, VEX_4V, VEX_LIG;
+}
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+ def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX,
+ VEX_LIG;
let AddedComplexity = 20 in
- def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX;
+ def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX,
+ VEX_LIG;
}
+def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
+ "movss\t{$src, $dst|$dst, $src}",
+ [(store FR32:$src, addr:$dst)]>, XS, VEX, VEX_LIG;
+def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
+ "movsd\t{$src, $dst|$dst, $src}",
+ [(store FR64:$src, addr:$dst)]>, XD, VEX, VEX_LIG;
+
+// SSE1 & 2
let Constraints = "$src1 = $dst" in {
def MOVSSrr : sse12_move_rr<FR32, v4f32,
"movss\t{$src2, $dst|$dst, $src2}">, XS;
def MOVSDrr : sse12_move_rr<FR64, v2f64,
"movsd\t{$src2, $dst|$dst, $src2}">, XD;
+
+ // For the disassembler
+ let isCodeGenOnly = 1 in {
+ def MOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src1, FR32:$src2),
+ "movss\t{$src2, $dst|$dst, $src2}", []>, XS;
+ def MOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src1, FR64:$src2),
+ "movsd\t{$src2, $dst|$dst, $src2}", []>, XD;
+ }
}
let canFoldAsLoad = 1, isReMaterializable = 1 in {
@@ -161,54 +400,6 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in {
def MOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD;
}
-let AddedComplexity = 15 in {
-// Extract the low 32-bit value from one vector and insert it into another.
-def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)),
- (MOVSSrr (v4f32 VR128:$src1),
- (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
-// Extract the low 64-bit value from one vector and insert it into another.
-def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
- (MOVSDrr (v2f64 VR128:$src1),
- (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
-}
-
-// Implicitly promote a 32-bit scalar to a vector.
-def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
-// Implicitly promote a 64-bit scalar to a vector.
-def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
- (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
-// Implicitly promote a 32-bit scalar to a vector.
-def : Pat<(v8f32 (scalar_to_vector FR32:$src)),
- (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
-// Implicitly promote a 64-bit scalar to a vector.
-def : Pat<(v4f64 (scalar_to_vector FR64:$src)),
- (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
-
-let AddedComplexity = 20 in {
-// MOVSSrm zeros the high parts of the register; represent this
-// with SUBREG_TO_REG.
-def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
- (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
-def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
- (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
-def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
- (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
-// MOVSDrm zeros the high parts of the register; represent this
-// with SUBREG_TO_REG.
-def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-def : Pat<(v2f64 (X86vzload addr:$src)),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-}
-
-// Store scalar value to memory.
def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
"movss\t{$src, $dst|$dst, $src}",
[(store FR32:$src, addr:$dst)]>;
@@ -216,24 +407,257 @@ def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
"movsd\t{$src, $dst|$dst, $src}",
[(store FR64:$src, addr:$dst)]>;
-def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
- "movss\t{$src, $dst|$dst, $src}",
- [(store FR32:$src, addr:$dst)]>, XS, VEX;
-def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
- "movsd\t{$src, $dst|$dst, $src}",
- [(store FR64:$src, addr:$dst)]>, XD, VEX;
+// Patterns
+let Predicates = [HasSSE1] in {
+ let AddedComplexity = 15 in {
+ // Extract the low 32-bit value from one vector and insert it into another.
+ def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)),
+ (MOVSSrr (v4f32 VR128:$src1),
+ (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+ def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
+ (MOVSSrr (v4i32 VR128:$src1),
+ (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+
+ // Move scalar to XMM zero-extended, zeroing a VR128 then do a
+ // MOVSS to the lower bits.
+ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
+ (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
+ def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
+ (MOVSSrr (v4f32 (V_SET0)),
+ (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
+ def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
+ (MOVSSrr (v4i32 (V_SET0)),
+ (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
+ }
-// Extract and store.
-def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
- addr:$dst),
- (MOVSSmr addr:$dst,
- (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
-def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
- addr:$dst),
- (MOVSDmr addr:$dst,
- (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+ let AddedComplexity = 20 in {
+ // MOVSSrm zeros the high parts of the register; represent this
+ // with SUBREG_TO_REG.
+ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+ def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+ def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+ }
+
+ // Extract and store.
+ def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+ addr:$dst),
+ (MOVSSmr addr:$dst,
+ (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+
+ // Shuffle with MOVSS
+ def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))),
+ (MOVSSrr VR128:$src1, FR32:$src2)>;
+ def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
+ (MOVSSrr (v4i32 VR128:$src1),
+ (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+ def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
+ (MOVSSrr (v4f32 VR128:$src1),
+ (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+}
+
+let Predicates = [HasSSE2] in {
+ let AddedComplexity = 15 in {
+ // Extract the low 64-bit value from one vector and insert it into another.
+ def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
+ (MOVSDrr (v2f64 VR128:$src1),
+ (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+ def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
+ (MOVSDrr (v2i64 VR128:$src1),
+ (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+
+ // vector_shuffle v1, v2 <4, 5, 2, 3> using movsd
+ def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>;
+ def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>;
+
+ // Move scalar to XMM zero-extended, zeroing a VR128 then do a
+ // MOVSD to the lower bits.
+ def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
+ (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
+ }
+
+ let AddedComplexity = 20 in {
+ // MOVSDrm zeros the high parts of the register; represent this
+ // with SUBREG_TO_REG.
+ def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ def : Pat<(v2f64 (X86vzload addr:$src)),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ }
+
+ // Extract and store.
+ def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+ addr:$dst),
+ (MOVSDmr addr:$dst,
+ (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+
+ // Shuffle with MOVSD
+ def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
+ (MOVSDrr VR128:$src1, FR64:$src2)>;
+ def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr (v2i64 VR128:$src1),
+ (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+ def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr (v2f64 VR128:$src1),
+ (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+ def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>;
+ def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>;
+
+ // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
+ // is during lowering, where it's not possible to recognize the fold cause
+ // it has two uses through a bitcast. One use disappears at isel time and the
+ // fold opportunity reappears.
+ def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>;
+ def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>;
+}
+
+let Predicates = [HasAVX] in {
+ let AddedComplexity = 15 in {
+ // Extract the low 32-bit value from one vector and insert it into another.
+ def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)),
+ (VMOVSSrr (v4f32 VR128:$src1),
+ (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+ def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
+ (VMOVSSrr (v4i32 VR128:$src1),
+ (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+
+ // Extract the low 64-bit value from one vector and insert it into another.
+ def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
+ (VMOVSDrr (v2f64 VR128:$src1),
+ (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+ def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
+ (VMOVSDrr (v2i64 VR128:$src1),
+ (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+
+ // vector_shuffle v1, v2 <4, 5, 2, 3> using movsd
+ def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>;
+ def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>;
+
+ // Move scalar to XMM zero-extended, zeroing a VR128 then do a
+ // MOVS{S,D} to the lower bits.
+ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
+ (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
+ def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
+ (VMOVSSrr (v4f32 (V_SET0)),
+ (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
+ def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
+ (VMOVSSrr (v4i32 (V_SET0)),
+ (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
+ def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
+ (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
+ }
+
+ let AddedComplexity = 20 in {
+ // MOVSSrm zeros the high parts of the register; represent this
+ // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
+ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
+ (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+ def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+ (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+ def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
+ (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+
+ // MOVSDrm zeros the high parts of the register; represent this
+ // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
+ def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
+ (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+ (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
+ (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
+ (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ def : Pat<(v2f64 (X86vzload addr:$src)),
+ (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+
+ // Represent the same patterns above but in the form they appear for
+ // 256-bit types
+ def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
+ (v4f32 (scalar_to_vector (loadf32 addr:$src))), (i32 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+ def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
+ (v2f64 (scalar_to_vector (loadf64 addr:$src))), (i32 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_sd)>;
+ }
+ def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
+ (v4f32 (scalar_to_vector FR32:$src)), (i32 0)))),
+ (SUBREG_TO_REG (i32 0),
+ (v4f32 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)),
+ sub_xmm)>;
+ def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
+ (v2f64 (scalar_to_vector FR64:$src)), (i32 0)))),
+ (SUBREG_TO_REG (i64 0),
+ (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)),
+ sub_xmm)>;
+
+ // Extract and store.
+ def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+ addr:$dst),
+ (VMOVSSmr addr:$dst,
+ (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+ def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+ addr:$dst),
+ (VMOVSDmr addr:$dst,
+ (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+
+ // Shuffle with VMOVSS
+ def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))),
+ (VMOVSSrr VR128:$src1, FR32:$src2)>;
+ def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
+ (VMOVSSrr (v4i32 VR128:$src1),
+ (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+ def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
+ (VMOVSSrr (v4f32 VR128:$src1),
+ (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+
+ // Shuffle with VMOVSD
+ def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
+ (VMOVSDrr VR128:$src1, FR64:$src2)>;
+ def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
+ (VMOVSDrr (v2i64 VR128:$src1),
+ (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+ def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
+ (VMOVSDrr (v2f64 VR128:$src1),
+ (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+ def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),
+ sub_sd))>;
+ def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),
+ sub_sd))>;
+
+ // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
+ // is during lowering, where it's not possible to recognize the fold cause
+ // it has two uses through a bitcast. One use disappears at isel time and the
+ // fold opportunity reappears.
+ def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),
+ sub_sd))>;
+ def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),
+ sub_sd))>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions
+//===----------------------------------------------------------------------===//
-// Move Aligned/Unaligned floating point values
multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC,
X86MemOperand x86memop, PatFrag ld_frag,
string asm, Domain d,
@@ -248,22 +672,22 @@ let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
}
defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
- "movaps", SSEPackedSingle>, VEX;
+ "movaps", SSEPackedSingle>, TB, VEX;
defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
- "movapd", SSEPackedDouble>, OpSize, VEX;
+ "movapd", SSEPackedDouble>, TB, OpSize, VEX;
defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
- "movups", SSEPackedSingle>, VEX;
+ "movups", SSEPackedSingle>, TB, VEX;
defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
- "movupd", SSEPackedDouble, 0>, OpSize, VEX;
+ "movupd", SSEPackedDouble, 0>, TB, OpSize, VEX;
defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32,
- "movaps", SSEPackedSingle>, VEX;
+ "movaps", SSEPackedSingle>, TB, VEX;
defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64,
- "movapd", SSEPackedDouble>, OpSize, VEX;
+ "movapd", SSEPackedDouble>, TB, OpSize, VEX;
defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32,
- "movups", SSEPackedSingle>, VEX;
+ "movups", SSEPackedSingle>, TB, VEX;
defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64,
- "movupd", SSEPackedDouble, 0>, OpSize, VEX;
+ "movupd", SSEPackedDouble, 0>, TB, OpSize, VEX;
defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
"movaps", SSEPackedSingle>, TB;
defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
@@ -287,10 +711,10 @@ def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
[(store (v2f64 VR128:$src), addr:$dst)]>, VEX;
def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movaps\t{$src, $dst|$dst, $src}",
- [(alignedstore (v8f32 VR256:$src), addr:$dst)]>, VEX;
+ [(alignedstore256 (v8f32 VR256:$src), addr:$dst)]>, VEX;
def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movapd\t{$src, $dst|$dst, $src}",
- [(alignedstore (v4f64 VR256:$src), addr:$dst)]>, VEX;
+ [(alignedstore256 (v4f64 VR256:$src), addr:$dst)]>, VEX;
def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movups\t{$src, $dst|$dst, $src}",
[(store (v8f32 VR256:$src), addr:$dst)]>, VEX;
@@ -298,6 +722,34 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v4f64 VR256:$src), addr:$dst)]>, VEX;
+// For disassembler
+let isCodeGenOnly = 1 in {
+ def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ "movaps\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ "movapd\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst),
+ (ins VR256:$src),
+ "movaps\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst),
+ (ins VR256:$src),
+ "movapd\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst),
+ (ins VR256:$src),
+ "movups\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst),
+ (ins VR256:$src),
+ "movupd\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+
def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>;
def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
(VMOVUPSYmr addr:$dst, VR256:$src)>;
@@ -319,24 +771,155 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v2f64 VR128:$src), addr:$dst)]>;
-// Intrinsic forms of MOVUPS/D load and store
-def VMOVUPSmr_Int : VPSI<0x11, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movups\t{$src, $dst|$dst, $src}",
- [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>, VEX;
-def VMOVUPDmr_Int : VPDI<0x11, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movupd\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>, VEX;
-
-def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movups\t{$src, $dst|$dst, $src}",
- [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>;
-def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movupd\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>;
-
-// Move Low/High packed floating point values
+// For disassembler
+let isCodeGenOnly = 1 in {
+ def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movaps\t{$src, $dst|$dst, $src}", []>;
+ def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movapd\t{$src, $dst|$dst, $src}", []>;
+ def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}", []>;
+ def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}", []>;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src),
+ (VMOVUPDmr addr:$dst, VR128:$src)>;
+}
+
+let Predicates = [HasSSE1] in
+ def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+let Predicates = [HasSSE2] in
+ def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src),
+ (MOVUPDmr addr:$dst, VR128:$src)>;
+
+// Use movaps / movups for SSE integer load / store (one byte shorter).
+// The instructions selected below are then converted to MOVDQA/MOVDQU
+// during the SSE domain pass.
+let Predicates = [HasSSE1] in {
+ def : Pat<(alignedloadv4i32 addr:$src),
+ (MOVAPSrm addr:$src)>;
+ def : Pat<(loadv4i32 addr:$src),
+ (MOVUPSrm addr:$src)>;
+ def : Pat<(alignedloadv2i64 addr:$src),
+ (MOVAPSrm addr:$src)>;
+ def : Pat<(loadv2i64 addr:$src),
+ (MOVUPSrm addr:$src)>;
+
+ def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v2i64 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+}
+
+// Use vmovaps/vmovups for AVX integer load/store.
+let Predicates = [HasAVX] in {
+ // 128-bit load/store
+ def : Pat<(alignedloadv4i32 addr:$src),
+ (VMOVAPSrm addr:$src)>;
+ def : Pat<(loadv4i32 addr:$src),
+ (VMOVUPSrm addr:$src)>;
+ def : Pat<(alignedloadv2i64 addr:$src),
+ (VMOVAPSrm addr:$src)>;
+ def : Pat<(loadv2i64 addr:$src),
+ (VMOVUPSrm addr:$src)>;
+
+ def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v2i64 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+
+ // 256-bit load/store
+ def : Pat<(alignedloadv4i64 addr:$src),
+ (VMOVAPSYrm addr:$src)>;
+ def : Pat<(loadv4i64 addr:$src),
+ (VMOVUPSYrm addr:$src)>;
+ def : Pat<(alignedloadv8i32 addr:$src),
+ (VMOVAPSYrm addr:$src)>;
+ def : Pat<(loadv8i32 addr:$src),
+ (VMOVUPSYrm addr:$src)>;
+ def : Pat<(alignedstore256 (v4i64 VR256:$src), addr:$dst),
+ (VMOVAPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(alignedstore256 (v8i32 VR256:$src), addr:$dst),
+ (VMOVAPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(alignedstore256 (v16i16 VR256:$src), addr:$dst),
+ (VMOVAPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(alignedstore256 (v32i8 VR256:$src), addr:$dst),
+ (VMOVAPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(store (v4i64 VR256:$src), addr:$dst),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(store (v8i32 VR256:$src), addr:$dst),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(store (v16i16 VR256:$src), addr:$dst),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(store (v32i8 VR256:$src), addr:$dst),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
+}
+
+// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
+// bits are disregarded. FIXME: Set encoding to pseudo!
+let neverHasSideEffects = 1 in {
+def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ "movaps\t{$src, $dst|$dst, $src}", []>;
+def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
+ "movapd\t{$src, $dst|$dst, $src}", []>;
+def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ "movaps\t{$src, $dst|$dst, $src}", []>, VEX;
+def FsVMOVAPDrr : VPDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
+ "movapd\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+
+// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper
+// bits are disregarded. FIXME: Set encoding to pseudo!
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
+def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
+let isCodeGenOnly = 1 in {
+ def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>, VEX;
+ def FsVMOVAPDrm : VPDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>, VEX;
+}
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Move Low packed FP Instructions
+//===----------------------------------------------------------------------===//
+
multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
PatFrag mov_frag, string base_opc,
string asm_opr> {
@@ -359,14 +942,10 @@ multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
let AddedComplexity = 20 in {
defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
"\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
- defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
}
let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
defm MOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
"\t{$src2, $dst|$dst, $src2}">;
- defm MOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
- "\t{$src2, $dst|$dst, $src2}">;
}
def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
@@ -386,6 +965,147 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
[(store (f64 (vector_extract (v2f64 VR128:$src),
(iPTR 0))), addr:$dst)]>;
+let Predicates = [HasAVX] in {
+ let AddedComplexity = 20 in {
+ // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
+ def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
+ (VMOVLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
+ (VMOVLPSrm VR128:$src1, addr:$src2)>;
+ // vector_shuffle v1, (load v2) <2, 1> using MOVLPS
+ def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
+ (VMOVLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))),
+ (VMOVLPDrm VR128:$src1, addr:$src2)>;
+ }
+
+ // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
+ def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (VMOVLPSmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)),
+ VR128:$src2)), addr:$src1),
+ (VMOVLPSmr addr:$src1, VR128:$src2)>;
+
+ // (store (vector_shuffle (load addr), v2, <2, 1>), addr) using MOVLPS
+ def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (VMOVLPDmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (VMOVLPDmr addr:$src1, VR128:$src2)>;
+
+ // Shuffle with VMOVLPS
+ def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
+ (VMOVLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
+ (VMOVLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86Movlps VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (VMOVLPSrm VR128:$src1, addr:$src2)>;
+
+ // Shuffle with VMOVLPD
+ def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+ (VMOVLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+ (VMOVLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Movlpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (VMOVLPDrm VR128:$src1, addr:$src2)>;
+
+ // Store patterns
+ def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),
+ addr:$src1),
+ (VMOVLPSmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v4i32 (X86Movlps
+ (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1),
+ (VMOVLPSmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),
+ addr:$src1),
+ (VMOVLPDmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)),
+ addr:$src1),
+ (VMOVLPDmr addr:$src1, VR128:$src2)>;
+}
+
+let Predicates = [HasSSE1] in {
+ let AddedComplexity = 20 in {
+ // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
+ def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+ }
+
+ // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
+ def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)),
+ VR128:$src2)), addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
+
+ // Shuffle with MOVLPS
+ def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86Movlps VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+
+ // Store patterns
+ def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),
+ addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v4i32 (X86Movlps
+ (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
+ addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
+}
+
+let Predicates = [HasSSE2] in {
+ let AddedComplexity = 20 in {
+ // vector_shuffle v1, (load v2) <2, 1> using MOVLPS
+ def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+ }
+
+ // (store (vector_shuffle (load addr), v2, <2, 1>), addr) using MOVLPS
+ def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
+
+ // Shuffle with MOVLPD
+ def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Movlpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+
+ // Store patterns
+ def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),
+ addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
+ def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)),
+ addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Move Hi packed FP Instructions
+//===----------------------------------------------------------------------===//
+
+let AddedComplexity = 20 in {
+ defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
+}
+let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
+ defm MOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
+ "\t{$src2, $dst|$dst, $src2}">;
+}
+
// v2f64 extract element 1 is always custom lowered to unpack high to low
// and extract element 0 so the non-store version isn't too horrible.
def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
@@ -411,6 +1131,80 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
(v2f64 (unpckh VR128:$src, (undef))),
(iPTR 0))), addr:$dst)]>;
+let Predicates = [HasAVX] in {
+ // VMOVHPS patterns
+ def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+ (VMOVHPSrm (v4i32 VR128:$src1), addr:$src2)>;
+ def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (VMOVHPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+ (VMOVHPSrm VR128:$src1, addr:$src2)>;
+
+ // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
+ // is during lowering, where it's not possible to recognize the load fold cause
+ // it has two uses through a bitcast. One use disappears at isel time and the
+ // fold opportunity reappears.
+ def : Pat<(v2f64 (X86Unpcklpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (VMOVHPDrm VR128:$src1, addr:$src2)>;
+
+ // FIXME: This should be matched by a X86Movhpd instead. Same as above
+ def : Pat<(v2f64 (X86Movlhpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (VMOVHPDrm VR128:$src1, addr:$src2)>;
+
+ // Store patterns
+ def : Pat<(store (f64 (vector_extract
+ (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst),
+ (VMOVHPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (f64 (vector_extract
+ (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))), addr:$dst),
+ (VMOVHPDmr addr:$dst, VR128:$src)>;
+}
+
+let Predicates = [HasSSE1] in {
+ // MOVHPS patterns
+ def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+ (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>;
+ def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (MOVHPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4f32 (v2i64 (X86vzload addr:$src2)))),
+ (MOVHPSrm VR128:$src1, addr:$src2)>;
+
+ // Store patterns
+ def : Pat<(store (f64 (vector_extract
+ (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst),
+ (MOVHPSmr addr:$dst, VR128:$src)>;
+}
+
+let Predicates = [HasSSE2] in {
+ // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
+ // is during lowering, where it's not possible to recognize the load fold cause
+ // it has two uses through a bitcast. One use disappears at isel time and the
+ // fold opportunity reappears.
+ def : Pat<(v2f64 (X86Unpcklpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (MOVHPDrm VR128:$src1, addr:$src2)>;
+
+ // FIXME: This should be matched by a X86Movhpd instead. Same as above
+ def : Pat<(v2f64 (X86Movlhpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (MOVHPDrm VR128:$src1, addr:$src2)>;
+
+ // Store patterns
+ def : Pat<(store (f64 (vector_extract
+ (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst),
+ (MOVHPDmr addr:$dst, VR128:$src)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions
+//===----------------------------------------------------------------------===//
+
let AddedComplexity = 20 in {
def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
@@ -438,13 +1232,80 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
(v4f32 (movhlps VR128:$src1, VR128:$src2)))]>;
}
-def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
- (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>;
-let AddedComplexity = 20 in {
- def : Pat<(v4f32 (movddup VR128:$src, (undef))),
- (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>;
- def : Pat<(v2i64 (movddup VR128:$src, (undef))),
- (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>;
+let Predicates = [HasAVX] in {
+ // MOVLHPS patterns
+ let AddedComplexity = 20 in {
+ def : Pat<(v4f32 (movddup VR128:$src, (undef))),
+ (VMOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>;
+ def : Pat<(v2i64 (movddup VR128:$src, (undef))),
+ (VMOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>;
+
+ // vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
+ def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)),
+ (VMOVLHPSrr VR128:$src1, VR128:$src2)>;
+ }
+ def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (VMOVLHPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (VMOVLHPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (VMOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
+
+ // MOVHLPS patterns
+ let AddedComplexity = 20 in {
+ // vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
+ def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)),
+ (VMOVHLPSrr VR128:$src1, VR128:$src2)>;
+
+ // vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS
+ def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))),
+ (VMOVHLPSrr VR128:$src1, VR128:$src1)>;
+ def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))),
+ (VMOVHLPSrr VR128:$src1, VR128:$src1)>;
+ }
+
+ def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)),
+ (VMOVHLPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)),
+ (VMOVHLPSrr VR128:$src1, VR128:$src2)>;
+}
+
+let Predicates = [HasSSE1] in {
+ // MOVLHPS patterns
+ let AddedComplexity = 20 in {
+ def : Pat<(v4f32 (movddup VR128:$src, (undef))),
+ (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>;
+ def : Pat<(v2i64 (movddup VR128:$src, (undef))),
+ (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>;
+
+ // vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
+ def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+ }
+ def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
+
+ // MOVHLPS patterns
+ let AddedComplexity = 20 in {
+ // vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
+ def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)),
+ (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+
+ // vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS
+ def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))),
+ (MOVHLPSrr VR128:$src1, VR128:$src1)>;
+ def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))),
+ (MOVHLPSrr VR128:$src1, VR128:$src1)>;
+ }
+
+ def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)),
+ (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)),
+ (MOVHLPSrr VR128:$src1, VR128:$src2)>;
}
//===----------------------------------------------------------------------===//
@@ -462,10 +1323,9 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
multiclass sse12_cvt_s_np<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
X86MemOperand x86memop, string asm> {
- def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
- []>;
- def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
- []>;
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, []>;
+ let mayLoad = 1 in
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, []>;
}
multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
@@ -481,36 +1341,39 @@ multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
X86MemOperand x86memop, string asm> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
+ let mayLoad = 1 in
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
}
defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
- "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX;
+ "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
+ VEX_LIG;
defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
- VEX_W;
+ VEX_W, VEX_LIG;
defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
- "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
+ "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX,
+ VEX_LIG;
defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
"cvttsd2si\t{$src, $dst|$dst, $src}">, XD,
- VEX, VEX_W;
+ VEX, VEX_W, VEX_LIG;
// The assembler can recognize rr 64-bit instructions by seeing a rxx
// register, but the same isn't true when only using memory operands,
// provide other assembly "l" and "q" forms to address this explicitly
// where appropriate to do so.
defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">, XS,
- VEX_4V;
+ VEX_4V, VEX_LIG;
defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, XS,
- VEX_4V, VEX_W;
+ VEX_4V, VEX_W, VEX_LIG;
defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, XD,
- VEX_4V;
+ VEX_4V, VEX_LIG;
defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD,
- VEX_4V;
+ VEX_4V, VEX_LIG;
defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD,
- VEX_4V, VEX_W;
+ VEX_4V, VEX_W, VEX_LIG;
let Predicates = [HasAVX] in {
def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
@@ -579,11 +1442,6 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
[(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>;
}
-defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
- f32mem, load, "cvtss2si">, XS, VEX;
-defm Int_VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
- int_x86_sse_cvtss2si64, f32mem, load, "cvtss2si">,
- XS, VEX, VEX_W;
defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
f128mem, load, "cvtsd2si">, XD, VEX;
defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
@@ -594,14 +1452,12 @@ defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
// Get rid of this hack or rename the intrinsics, there are several
// intructions that only match with the intrinsic form, why create duplicates
// to let them be recognized by the assembler?
-defm VCVTSD2SI_alt : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem,
- "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
+defm VCVTSD2SI : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem,
+ "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_LIG;
defm VCVTSD2SI64 : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem,
- "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W;
-defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
- f32mem, load, "cvtss2si">, XS;
-defm Int_CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
- f32mem, load, "cvtss2si{q}">, XS, REX_W;
+ "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W,
+ VEX_LIG;
+
defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
f128mem, load, "cvtsd2si{l}">, XD;
defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
@@ -660,10 +1516,11 @@ defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
let Pattern = []<dag> in {
defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
- "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX;
+ "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS,
+ VEX, VEX_LIG;
defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load,
"cvtss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
- VEX_W;
+ VEX_W, VEX_LIG;
defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle>, TB, VEX;
@@ -671,6 +1528,7 @@ defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle>, TB, VEX;
}
+
let Pattern = []<dag> in {
defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,
"cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS;
@@ -681,19 +1539,43 @@ defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/,
SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */
}
+let Predicates = [HasSSE1] in {
+ def : Pat<(int_x86_sse_cvtss2si VR128:$src),
+ (CVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+ def : Pat<(int_x86_sse_cvtss2si (load addr:$src)),
+ (CVTSS2SIrm addr:$src)>;
+ def : Pat<(int_x86_sse_cvtss2si64 VR128:$src),
+ (CVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+ def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)),
+ (CVTSS2SI64rm addr:$src)>;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_sse_cvtss2si VR128:$src),
+ (VCVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+ def : Pat<(int_x86_sse_cvtss2si (load addr:$src)),
+ (VCVTSS2SIrm addr:$src)>;
+ def : Pat<(int_x86_sse_cvtss2si64 VR128:$src),
+ (VCVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+ def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)),
+ (VCVTSS2SI64rm addr:$src)>;
+}
+
/// SSE 2 Only
// Convert scalar double to scalar single
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
(ins FR64:$src1, FR64:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- VEX_4V;
+ VEX_4V, VEX_LIG;
+let mayLoad = 1 in
def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
(ins FR64:$src1, f64mem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V;
+ []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG;
+
def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
- Requires<[HasAVX]>;
+ Requires<[HasAVX]>;
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
@@ -715,13 +1597,25 @@ defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
(ins FR32:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, XS, Requires<[HasAVX]>, VEX_4V;
+ []>, XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG;
+let mayLoad = 1 in
def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
(ins FR32:$src1, f32mem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>;
-def : Pat<(f64 (fextend FR32:$src)), (VCVTSS2SDrr FR32:$src, FR32:$src)>,
- Requires<[HasAVX]>;
+ []>, XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>;
+
+let Predicates = [HasAVX] in {
+ def : Pat<(f64 (fextend FR32:$src)),
+ (VCVTSS2SDrr FR32:$src, FR32:$src)>;
+ def : Pat<(fextend (loadf32 addr:$src)),
+ (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>;
+ def : Pat<(extloadf32 addr:$src),
+ (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>;
+}
+
+def : Pat<(extloadf32 addr:$src),
+ (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (MOVSSrm addr:$src))>,
+ Requires<[HasAVX, OptForSpeed]>;
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
@@ -732,6 +1626,16 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
[(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
Requires<[HasSSE2, OptForSize]>;
+// extload f32 -> f64. This matches load+fextend because we have a hack in
+// the isel (PreprocessForFPConvert) that can introduce loads after dag
+// combine.
+// Since these loads aren't folded into the fextend, we have to match it
+// explicitly here.
+def : Pat<(fextend (loadf32 addr:$src)),
+ (CVTSS2SDrm addr:$src)>, Requires<[HasSSE2]>;
+def : Pat<(extloadf32 addr:$src),
+ (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[HasSSE2, OptForSpeed]>;
+
def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -759,10 +1663,6 @@ def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
Requires<[HasSSE2]>;
}
-def : Pat<(extloadf32 addr:$src),
- (CVTSS2SDrr (MOVSSrm addr:$src))>,
- Requires<[HasSSE2, OptForSpeed]>;
-
// Convert doubleword to packed single/double fp
// SSE2 instructions without OpSize prefix
def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -862,10 +1762,12 @@ def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
// SSE2 packed instructions with XS prefix
def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+let mayLoad = 1 in
def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+let mayLoad = 1 in
def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -877,7 +1779,6 @@ def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
[(set VR128:$dst,
(int_x86_sse2_cvttps2dq (memop addr:$src)))]>;
-
def Int_VCVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -889,16 +1790,33 @@ def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
(memop addr:$src)))]>,
XS, VEX, Requires<[HasAVX]>;
-def Int_VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src),
- "cvttpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>,
- VEX;
-def Int_VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst),
- (ins f128mem:$src),
- "cvttpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
- (memop addr:$src)))]>, VEX;
+let Predicates = [HasSSE2] in {
+ def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
+ (Int_CVTDQ2PSrr VR128:$src)>;
+ def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
+ (CVTTPS2DQrr VR128:$src)>;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
+ (Int_VCVTDQ2PSrr VR128:$src)>;
+ def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
+ (VCVTTPS2DQrr VR128:$src)>;
+ def : Pat<(v8f32 (sint_to_fp (v8i32 VR256:$src))),
+ (VCVTDQ2PSYrr VR256:$src)>;
+ def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))),
+ (VCVTTPS2DQYrr VR256:$src)>;
+}
+
+def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvttpd2dq VR128:$src))]>, VEX;
+let isCodeGenOnly = 1 in
+def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+ (memop addr:$src)))]>, VEX;
def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
@@ -910,8 +1828,6 @@ def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
-def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTTPD2DQXrYr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
@@ -931,13 +1847,13 @@ def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
let Predicates = [HasAVX] in {
// SSE2 instructions without OpSize prefix
def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX;
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX;
def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX;
def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX;
}
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
@@ -947,12 +1863,12 @@ def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
- VEX, Requires<[HasAVX]>;
+ TB, VEX, Requires<[HasAVX]>;
def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd
(load addr:$src)))]>,
- VEX, Requires<[HasAVX]>;
+ TB, VEX, Requires<[HasAVX]>;
def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
@@ -1038,75 +1954,61 @@ def : Pat<(int_x86_avx_cvtt_ps2dq_256 VR256:$src),
def : Pat<(int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)),
(VCVTTPS2DQYrm addr:$src)>;
+// Match fround and fextend for 128/256-bit conversions
+def : Pat<(v4f32 (fround (v4f64 VR256:$src))),
+ (VCVTPD2PSYrr VR256:$src)>;
+def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),
+ (VCVTPD2PSYrm addr:$src)>;
+
+def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
+ (VCVTPS2PDYrr VR128:$src)>;
+def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
+ (VCVTPS2PDYrm addr:$src)>;
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Compare Instructions
//===----------------------------------------------------------------------===//
// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
+ SDNode OpNode, ValueType VT, PatFrag ld_frag,
string asm, string asm_alt> {
- let isAsmParserOnly = 1 in {
- def rr : SIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc),
- asm, []>;
- let mayLoad = 1 in
- def rm : SIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, x86memop:$src, SSECC:$cc),
- asm, []>;
- }
+ def rr : SIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2, SSECC:$cc), asm,
+ [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))]>;
+ def rm : SIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2, SSECC:$cc), asm,
+ [(set RC:$dst, (OpNode (VT RC:$src1),
+ (ld_frag addr:$src2), imm:$cc))]>;
// Accept explicit immediate argument form instead of comparison code.
- def rr_alt : SIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
- asm_alt, []>;
- let mayLoad = 1 in
- def rm_alt : SIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, x86memop:$src, i8imm:$src2),
- asm_alt, []>;
+ let neverHasSideEffects = 1 in {
+ def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, []>;
+ let mayLoad = 1 in
+ def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, []>;
+ }
}
-let neverHasSideEffects = 1 in {
- defm VCMPSS : sse12_cmp_scalar<FR32, f32mem,
- "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmpss\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">,
- XS, VEX_4V;
- defm VCMPSD : sse12_cmp_scalar<FR64, f64mem,
- "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmpsd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">,
- XD, VEX_4V;
-}
+defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32,
+ "cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
+ XS, VEX_4V, VEX_LIG;
+defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmpsd, f64, loadf64,
+ "cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
+ XD, VEX_4V, VEX_LIG;
let Constraints = "$src1 = $dst" in {
-def CMPSSrr : SIi8<0xC2, MRMSrcReg,
- (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, SSECC:$cc),
+ defm CMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32,
"cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
- [(set FR32:$dst, (X86cmpss (f32 FR32:$src1), FR32:$src2, imm:$cc))]>, XS;
-def CMPSSrm : SIi8<0xC2, MRMSrcMem,
- (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2, SSECC:$cc),
- "cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
- [(set FR32:$dst, (X86cmpss (f32 FR32:$src1), (loadf32 addr:$src2), imm:$cc))]>, XS;
-def CMPSDrr : SIi8<0xC2, MRMSrcReg,
- (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, SSECC:$cc),
- "cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
- [(set FR64:$dst, (X86cmpsd (f64 FR64:$src1), FR64:$src2, imm:$cc))]>, XD;
-def CMPSDrm : SIi8<0xC2, MRMSrcMem,
- (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2, SSECC:$cc),
+ "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}">,
+ XS;
+ defm CMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmpsd, f64, loadf64,
"cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
- [(set FR64:$dst, (X86cmpsd (f64 FR64:$src1), (loadf64 addr:$src2), imm:$cc))]>, XD;
-}
-let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
-def CMPSSrr_alt : SIi8<0xC2, MRMSrcReg,
- (outs FR32:$dst), (ins FR32:$src1, FR32:$src, i8imm:$src2),
- "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XS;
-def CMPSSrm_alt : SIi8<0xC2, MRMSrcMem,
- (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, i8imm:$src2),
- "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XS;
-def CMPSDrr_alt : SIi8<0xC2, MRMSrcReg,
- (outs FR64:$dst), (ins FR64:$src1, FR64:$src, i8imm:$src2),
- "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XD;
-def CMPSDrm_alt : SIi8<0xC2, MRMSrcMem,
- (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, i8imm:$src2),
- "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XD;
+ "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}">,
+ XD;
}
multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop,
@@ -1151,25 +2053,28 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
let Defs = [EFLAGS] in {
defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
- "ucomiss", SSEPackedSingle>, VEX;
+ "ucomiss", SSEPackedSingle>, TB, VEX, VEX_LIG;
defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
- "ucomisd", SSEPackedDouble>, OpSize, VEX;
+ "ucomisd", SSEPackedDouble>, TB, OpSize, VEX,
+ VEX_LIG;
let Pattern = []<dag> in {
defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
- "comiss", SSEPackedSingle>, VEX;
+ "comiss", SSEPackedSingle>, TB, VEX,
+ VEX_LIG;
defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
- "comisd", SSEPackedDouble>, OpSize, VEX;
+ "comisd", SSEPackedDouble>, TB, OpSize, VEX,
+ VEX_LIG;
}
defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
- load, "ucomiss", SSEPackedSingle>, VEX;
+ load, "ucomiss", SSEPackedSingle>, TB, VEX;
defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
- load, "ucomisd", SSEPackedDouble>, OpSize, VEX;
+ load, "ucomisd", SSEPackedDouble>, TB, OpSize, VEX;
defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
- load, "comiss", SSEPackedSingle>, VEX;
+ load, "comiss", SSEPackedSingle>, TB, VEX;
defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
- load, "comisd", SSEPackedDouble>, OpSize, VEX;
+ load, "comisd", SSEPackedDouble>, TB, OpSize, VEX;
defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
"ucomiss", SSEPackedSingle>, TB;
defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
@@ -1199,57 +2104,82 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
Domain d> {
let isAsmParserOnly = 1 in {
def rri : PIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), asm,
- [(set RC:$dst, (Int RC:$src1, RC:$src, imm:$cc))], d>;
+ (outs RC:$dst), (ins RC:$src1, RC:$src2, SSECC:$cc), asm,
+ [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))], d>;
def rmi : PIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, f128mem:$src, SSECC:$cc), asm,
- [(set RC:$dst, (Int RC:$src1, (memop addr:$src), imm:$cc))], d>;
+ (outs RC:$dst), (ins RC:$src1, f128mem:$src2, SSECC:$cc), asm,
+ [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], d>;
}
// Accept explicit immediate argument form instead of comparison code.
def rri_alt : PIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
+ (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
asm_alt, [], d>;
def rmi_alt : PIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, f128mem:$src, i8imm:$src2),
+ (outs RC:$dst), (ins RC:$src1, f128mem:$src2, i8imm:$cc),
asm_alt, [], d>;
}
defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
- "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
- SSEPackedSingle>, VEX_4V;
+ "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+ SSEPackedSingle>, TB, VEX_4V;
defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
- "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
+ "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+ SSEPackedDouble>, TB, OpSize, VEX_4V;
defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256,
- "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
- SSEPackedSingle>, VEX_4V;
+ "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+ SSEPackedSingle>, TB, VEX_4V;
defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256,
- "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
+ "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+ SSEPackedDouble>, TB, OpSize, VEX_4V;
let Constraints = "$src1 = $dst" in {
defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
- "cmp${cc}ps\t{$src, $dst|$dst, $src}",
- "cmpps\t{$src2, $src, $dst|$dst, $src, $src2}",
+ "cmp${cc}ps\t{$src2, $dst|$dst, $src2}",
+ "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
SSEPackedSingle>, TB;
defm CMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
- "cmp${cc}pd\t{$src, $dst|$dst, $src}",
- "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}",
+ "cmp${cc}pd\t{$src2, $dst|$dst, $src2}",
+ "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
SSEPackedDouble>, TB, OpSize;
}
+let Predicates = [HasSSE1] in {
def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
(CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
(CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
+}
+
+let Predicates = [HasSSE2] in {
def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
(CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
(CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
+}
+
+let Predicates = [HasAVX] in {
+def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
+ (VCMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
+def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
+ (VCMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
+def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
+ (VCMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
+def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
+ (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
+
+def : Pat<(v8i32 (X86cmpps (v8f32 VR256:$src1), VR256:$src2, imm:$cc)),
+ (VCMPPSYrri (v8f32 VR256:$src1), (v8f32 VR256:$src2), imm:$cc)>;
+def : Pat<(v8i32 (X86cmpps (v8f32 VR256:$src1), (memop addr:$src2), imm:$cc)),
+ (VCMPPSYrmi (v8f32 VR256:$src1), addr:$src2, imm:$cc)>;
+def : Pat<(v4i64 (X86cmppd (v4f64 VR256:$src1), VR256:$src2, imm:$cc)),
+ (VCMPPDYrri VR256:$src1, VR256:$src2, imm:$cc)>;
+def : Pat<(v4i64 (X86cmppd (v4f64 VR256:$src1), (memop addr:$src2), imm:$cc)),
+ (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>;
+}
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Shuffle Instructions
@@ -1293,6 +2223,132 @@ let Constraints = "$src1 = $dst" in {
memopv2f64, SSEPackedDouble>, TB, OpSize;
}
+let Predicates = [HasSSE1] in {
+ def : Pat<(v4f32 (X86Shufps VR128:$src1,
+ (memopv4f32 addr:$src2), (i8 imm:$imm))),
+ (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+ def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+ def : Pat<(v4i32 (X86Shufps VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
+ (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+ def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+ // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but
+ // fall back to this for SSE1)
+ def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
+ (SHUFPSrri VR128:$src2, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ // Special unary SHUFPSrri case.
+ def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))),
+ (SHUFPSrri VR128:$src1, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+}
+
+let Predicates = [HasSSE2] in {
+ // Special binary v4i32 shuffle cases with SHUFPS.
+ def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))),
+ (SHUFPSrri VR128:$src1, VR128:$src2,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ def : Pat<(v4i32 (shufp:$src3 VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (SHUFPSrmi VR128:$src1, addr:$src2,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ // Special unary SHUFPDrri cases.
+ def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))),
+ (SHUFPDrri VR128:$src1, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))),
+ (SHUFPDrri VR128:$src1, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ // Special binary v2i64 shuffle cases using SHUFPDrri.
+ def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)),
+ (SHUFPDrri VR128:$src1, VR128:$src2,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ // Generic SHUFPD patterns
+ def : Pat<(v2f64 (X86Shufps VR128:$src1,
+ (memopv2f64 addr:$src2), (i8 imm:$imm))),
+ (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
+ def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+ def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(v4f32 (X86Shufps VR128:$src1,
+ (memopv4f32 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+ def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+ def : Pat<(v4i32 (X86Shufps VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
+ (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+ def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+ // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but
+ // fall back to this for SSE1)
+ def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
+ (VSHUFPSrri VR128:$src2, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ // Special unary SHUFPSrri case.
+ def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))),
+ (VSHUFPSrri VR128:$src1, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ // Special binary v4i32 shuffle cases with SHUFPS.
+ def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))),
+ (VSHUFPSrri VR128:$src1, VR128:$src2,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ def : Pat<(v4i32 (shufp:$src3 VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (VSHUFPSrmi VR128:$src1, addr:$src2,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ // Special unary SHUFPDrri cases.
+ def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))),
+ (VSHUFPDrri VR128:$src1, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))),
+ (VSHUFPDrri VR128:$src1, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+ // Special binary v2i64 shuffle cases using SHUFPDrri.
+ def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)),
+ (VSHUFPDrri VR128:$src1, VR128:$src2,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
+
+ def : Pat<(v2f64 (X86Shufps VR128:$src1,
+ (memopv2f64 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
+ def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+ def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+ // 256-bit patterns
+ def : Pat<(v8i32 (X86Shufps VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VSHUFPSYrri VR256:$src1, VR256:$src2, imm:$imm)>;
+ def : Pat<(v8i32 (X86Shufps VR256:$src1,
+ (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (VSHUFPSYrmi VR256:$src1, addr:$src2, imm:$imm)>;
+
+ def : Pat<(v8f32 (X86Shufps VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VSHUFPSYrri VR256:$src1, VR256:$src2, imm:$imm)>;
+ def : Pat<(v8f32 (X86Shufps VR256:$src1,
+ (memopv8f32 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPSYrmi VR256:$src1, addr:$src2, imm:$imm)>;
+
+ def : Pat<(v4i64 (X86Shufpd VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VSHUFPDYrri VR256:$src1, VR256:$src2, imm:$imm)>;
+ def : Pat<(v4i64 (X86Shufpd VR256:$src1,
+ (memopv4i64 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>;
+
+ def : Pat<(v4f64 (X86Shufpd VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VSHUFPDYrri VR256:$src1, VR256:$src2, imm:$imm)>;
+ def : Pat<(v4f64 (X86Shufpd VR256:$src1,
+ (memopv4f64 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>;
+}
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Unpack Instructions
//===----------------------------------------------------------------------===//
@@ -1316,29 +2372,29 @@ multiclass sse12_unpack_interleave<bits<8> opc, PatFrag OpNode, ValueType vt,
let AddedComplexity = 10 in {
defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, VEX_4V;
+ SSEPackedSingle>, TB, VEX_4V;
defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
+ SSEPackedDouble>, TB, OpSize, VEX_4V;
defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, VEX_4V;
+ SSEPackedSingle>, TB, VEX_4V;
defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
+ SSEPackedDouble>, TB, OpSize, VEX_4V;
defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32,
VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, VEX_4V;
+ SSEPackedSingle>, TB, VEX_4V;
defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64,
VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
+ SSEPackedDouble>, TB, OpSize, VEX_4V;
defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32,
VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, VEX_4V;
+ SSEPackedSingle>, TB, VEX_4V;
defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64,
VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
+ SSEPackedDouble>, TB, OpSize, VEX_4V;
let Constraints = "$src1 = $dst" in {
defm UNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
@@ -1356,6 +2412,103 @@ let AddedComplexity = 10 in {
} // Constraints = "$src1 = $dst"
} // AddedComplexity
+let Predicates = [HasSSE1] in {
+ def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+ (UNPCKLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+ (UNPCKLPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+ (UNPCKHPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+ (UNPCKHPSrr VR128:$src1, VR128:$src2)>;
+}
+
+let Predicates = [HasSSE2] in {
+ def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (UNPCKLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+ (UNPCKLPDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (UNPCKHPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+ (UNPCKHPDrr VR128:$src1, VR128:$src2)>;
+
+ // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the
+ // problem is during lowering, where it's not possible to recognize the load
+ // fold cause it has two uses through a bitcast. One use disappears at isel
+ // time and the fold opportunity reappears.
+ def : Pat<(v2f64 (X86Movddup VR128:$src)),
+ (UNPCKLPDrr VR128:$src, VR128:$src)>;
+
+ let AddedComplexity = 10 in
+ def : Pat<(splat_lo (v2f64 VR128:$src), (undef)),
+ (UNPCKLPDrr VR128:$src, VR128:$src)>;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+ (VUNPCKLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+ (VUNPCKLPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+ (VUNPCKHPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+ (VUNPCKHPSrr VR128:$src1, VR128:$src2)>;
+
+ def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))),
+ (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
+ (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
+ (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, (memopv8i32 addr:$src2))),
+ (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, (memopv8f32 addr:$src2))),
+ (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, VR256:$src2)),
+ (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, (memopv8i32 addr:$src2))),
+ (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, VR256:$src2)),
+ (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
+
+ def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (VUNPCKLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+ (VUNPCKLPDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (VUNPCKHPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+ (VUNPCKHPDrr VR128:$src1, VR128:$src2)>;
+
+ def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))),
+ (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
+ (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, (memopv4i64 addr:$src2))),
+ (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
+ (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, (memopv4f64 addr:$src2))),
+ (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, VR256:$src2)),
+ (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, (memopv4i64 addr:$src2))),
+ (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, VR256:$src2)),
+ (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
+
+ // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the
+ // problem is during lowering, where it's not possible to recognize the load
+ // fold cause it has two uses through a bitcast. One use disappears at isel
+ // time and the fold opportunity reappears.
+ def : Pat<(v2f64 (X86Movddup VR128:$src)),
+ (VUNPCKLPDrr VR128:$src, VR128:$src)>;
+ let AddedComplexity = 10 in
+ def : Pat<(splat_lo (v2f64 VR128:$src), (undef)),
+ (VUNPCKLPDrr VR128:$src, VR128:$src)>;
+}
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Extract Floating-Point Sign mask
//===----------------------------------------------------------------------===//
@@ -1370,91 +2523,60 @@ multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
!strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>, REX_W;
}
-// Mask creation
-defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps,
- "movmskps", SSEPackedSingle>, VEX;
-defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
- "movmskpd", SSEPackedDouble>, OpSize,
- VEX;
-defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256,
- "movmskps", SSEPackedSingle>, VEX;
-defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256,
- "movmskpd", SSEPackedDouble>, OpSize,
- VEX;
defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
SSEPackedSingle>, TB;
defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
SSEPackedDouble>, TB, OpSize;
-// X86fgetsign
-def MOVMSKPDrr32_alt : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src),
- "movmskpd\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (X86fgetsign FR64:$src))], SSEPackedDouble>, TB, OpSize;
-def MOVMSKPDrr64_alt : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins FR64:$src),
- "movmskpd\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (X86fgetsign FR64:$src))], SSEPackedDouble>, TB, OpSize;
-def MOVMSKPSrr32_alt : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
- "movmskps\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (X86fgetsign FR32:$src))], SSEPackedSingle>, TB;
-def MOVMSKPSrr64_alt : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src),
- "movmskps\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (X86fgetsign FR32:$src))], SSEPackedSingle>, TB;
-
-// Assembler Only
-def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
-def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
- VEX;
-def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
- "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
-def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
- "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
- VEX;
-
-//===----------------------------------------------------------------------===//
-// SSE 1 & 2 - Misc aliasing of packed SSE 1 & 2 instructions
-//===----------------------------------------------------------------------===//
-
-// Aliases of packed SSE1 & SSE2 instructions for scalar use. These all have
-// names that start with 'Fs'.
-
-// Alias instructions that map fld0 to pxor for sse.
-let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
- canFoldAsLoad = 1 in {
- // FIXME: Set encoding to pseudo!
-def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
- [(set FR32:$dst, fp32imm0)]>,
- Requires<[HasSSE1]>, TB, OpSize;
-def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
- [(set FR64:$dst, fpimm0)]>,
- Requires<[HasSSE2]>, TB, OpSize;
-def VFsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
- [(set FR32:$dst, fp32imm0)]>,
- Requires<[HasAVX]>, TB, OpSize, VEX_4V;
-def VFsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
- [(set FR64:$dst, fpimm0)]>,
- Requires<[HasAVX]>, TB, OpSize, VEX_4V;
-}
-
-// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
-// bits are disregarded.
-let neverHasSideEffects = 1 in {
-def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
- "movaps\t{$src, $dst|$dst, $src}", []>;
-def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
- "movapd\t{$src, $dst|$dst, $src}", []>;
-}
+def : Pat<(i32 (X86fgetsign FR32:$src)),
+ (MOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
+ sub_ss))>, Requires<[HasSSE1]>;
+def : Pat<(i64 (X86fgetsign FR32:$src)),
+ (MOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
+ sub_ss))>, Requires<[HasSSE1]>;
+def : Pat<(i32 (X86fgetsign FR64:$src)),
+ (MOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
+ sub_sd))>, Requires<[HasSSE2]>;
+def : Pat<(i64 (X86fgetsign FR64:$src)),
+ (MOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
+ sub_sd))>, Requires<[HasSSE2]>;
-// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper
-// bits are disregarded.
-let canFoldAsLoad = 1, isReMaterializable = 1 in {
-def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
- "movaps\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
-def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
- "movapd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
+let Predicates = [HasAVX] in {
+ defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps,
+ "movmskps", SSEPackedSingle>, TB, VEX;
+ defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
+ "movmskpd", SSEPackedDouble>, TB,
+ OpSize, VEX;
+ defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256,
+ "movmskps", SSEPackedSingle>, TB, VEX;
+ defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256,
+ "movmskpd", SSEPackedDouble>, TB,
+ OpSize, VEX;
+
+ def : Pat<(i32 (X86fgetsign FR32:$src)),
+ (VMOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
+ sub_ss))>;
+ def : Pat<(i64 (X86fgetsign FR32:$src)),
+ (VMOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
+ sub_ss))>;
+ def : Pat<(i32 (X86fgetsign FR64:$src)),
+ (VMOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
+ sub_sd))>;
+ def : Pat<(i64 (X86fgetsign FR64:$src)),
+ (VMOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
+ sub_sd))>;
+
+ // Assembler Only
+ def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, TB, VEX;
+ def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, TB,
+ OpSize, VEX;
+ def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
+ "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, TB, VEX;
+ def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
+ "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, TB,
+ OpSize, VEX;
}
//===----------------------------------------------------------------------===//
@@ -1466,10 +2588,10 @@ def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
- FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, VEX_4V;
+ FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, TB, VEX_4V;
defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
- FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, OpSize, VEX_4V;
+ FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, TB, OpSize, VEX_4V;
let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32,
@@ -1494,21 +2616,22 @@ let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
///
multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
- let Pattern = []<dag> in {
- defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
- !strconcat(OpcodeStr, "ps"), f128mem,
- [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))],
- [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
- (memopv2i64 addr:$src2)))], 0>, VEX_4V;
-
- defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
- !strconcat(OpcodeStr, "pd"), f128mem,
- [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
- (bc_v2i64 (v2f64 VR128:$src2))))],
- [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
- (memopv2i64 addr:$src2)))], 0>,
- OpSize, VEX_4V;
- }
+ // In AVX no need to add a pattern for 128-bit logical rr ps, because they
+ // are all promoted to v2i64, and the patterns are covered by the int
+ // version. This is needed in SSE only, because v2i64 isn't supported on
+ // SSE1, but only on SSE2.
+ defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
+ !strconcat(OpcodeStr, "ps"), f128mem, [],
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+ (memopv2i64 addr:$src2)))], 0>, TB, VEX_4V;
+
+ defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
+ !strconcat(OpcodeStr, "pd"), f128mem,
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (v2f64 VR128:$src2))))],
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (memopv2i64 addr:$src2)))], 0>,
+ TB, OpSize, VEX_4V;
let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem,
@@ -1533,7 +2656,7 @@ multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "ps"), f256mem,
[(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
[(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
- (memopv4i64 addr:$src2)))], 0>, VEX_4V;
+ (memopv4i64 addr:$src2)))], 0>, TB, VEX_4V;
defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f256mem,
@@ -1541,7 +2664,7 @@ multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr,
(bc_v4i64 (v4f64 VR256:$src2))))],
[(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
(memopv4i64 addr:$src2)))], 0>,
- OpSize, VEX_4V;
+ TB, OpSize, VEX_4V;
}
// AVX 256-bit packed logical ops forms
@@ -1632,32 +2755,32 @@ multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr> {
// Binary Arithmetic instructions
defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>,
- basic_sse12_fp_binop_s_int<0x58, "add", 0>,
- basic_sse12_fp_binop_p<0x58, "add", fadd, 0>,
+ basic_sse12_fp_binop_s_int<0x58, "add", 0>, VEX_4V, VEX_LIG;
+defm VADD : basic_sse12_fp_binop_p<0x58, "add", fadd, 0>,
basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V;
defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>,
- basic_sse12_fp_binop_s_int<0x59, "mul", 0>,
- basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>,
+ basic_sse12_fp_binop_s_int<0x59, "mul", 0>, VEX_4V, VEX_LIG;
+defm VMUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>,
basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V;
let isCommutable = 0 in {
defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>,
- basic_sse12_fp_binop_s_int<0x5C, "sub", 0>,
- basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>,
+ basic_sse12_fp_binop_s_int<0x5C, "sub", 0>, VEX_4V, VEX_LIG;
+ defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>,
basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V;
defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>,
- basic_sse12_fp_binop_s_int<0x5E, "div", 0>,
- basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>,
+ basic_sse12_fp_binop_s_int<0x5E, "div", 0>, VEX_4V, VEX_LIG;
+ defm VDIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>,
basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V;
defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>,
- basic_sse12_fp_binop_s_int<0x5F, "max", 0>,
- basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
+ basic_sse12_fp_binop_s_int<0x5F, "max", 0>, VEX_4V, VEX_LIG;
+ defm VMAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
basic_sse12_fp_binop_p_int<0x5F, "max", 0>,
basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>,
basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V;
defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>,
- basic_sse12_fp_binop_s_int<0x5D, "min", 0>,
- basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
+ basic_sse12_fp_binop_s_int<0x5D, "min", 0>, VEX_4V, VEX_LIG;
+ defm VMIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
basic_sse12_fp_binop_p_int<0x5D, "min", 0>,
basic_sse12_fp_binop_p_y_int<0x5D, "min">,
basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V;
@@ -1720,23 +2843,18 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
}
/// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form.
-multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
- SDNode OpNode, Intrinsic F32Int> {
+multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
!strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
- !strconcat(OpcodeStr,
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, XS, Requires<[HasAVX, OptForSize]>;
- def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ let mayLoad = 1 in
+ def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2),
!strconcat(OpcodeStr,
- "ss\t{$src, $dst, $dst|$dst, $dst, $src}"),
- [(set VR128:$dst, (F32Int VR128:$src))]>;
- def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins ssmem:$src1, VR128:$src2),
!strconcat(OpcodeStr,
- "ss\t{$src, $dst, $dst|$dst, $dst, $src}"),
- [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
}
/// sse1_fp_unop_p - SSE1 unops in packed form.
@@ -1801,21 +2919,17 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
}
/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form.
-multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
- SDNode OpNode, Intrinsic F64Int> {
+multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
- (ins FR64:$src1, f64mem:$src2),
+ def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, sdmem:$src2),
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "sd\t{$src, $dst, $dst|$dst, $dst, $src}"),
- [(set VR128:$dst, (F64Int VR128:$src))]>;
- def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
- !strconcat(OpcodeStr, "sd\t{$src, $dst, $dst|$dst, $dst, $src}"),
- [(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
}
/// sse2_fp_unop_p - SSE2 unops in vector forms.
@@ -1863,9 +2977,8 @@ multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
let Predicates = [HasAVX] in {
// Square root.
- defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse_sqrt_ss>,
- sse2_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse2_sqrt_sd>,
- VEX_4V;
+ defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt">,
+ sse2_fp_unop_s_avx<0x51, "vsqrt">, VEX_4V, VEX_LIG;
defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt>,
sse2_fp_unop_p<0x51, "vsqrt", fsqrt>,
@@ -1879,21 +2992,76 @@ let Predicates = [HasAVX] in {
// Reciprocal approximations. Note that these typically require refinement
// in order to obtain suitable precision.
- defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt", X86frsqrt,
- int_x86_sse_rsqrt_ss>, VEX_4V;
+ defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt">, VEX_4V, VEX_LIG;
defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt>,
sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>,
sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256>,
sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps>, VEX;
- defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp", X86frcp, int_x86_sse_rcp_ss>,
- VEX_4V;
+ defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp">, VEX_4V, VEX_LIG;
defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp>,
sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>,
sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256>,
sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps>, VEX;
}
+def : Pat<(f32 (fsqrt FR32:$src)),
+ (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
+def : Pat<(f32 (fsqrt (load addr:$src))),
+ (VSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+def : Pat<(f64 (fsqrt FR64:$src)),
+ (VSQRTSDr (f64 (IMPLICIT_DEF)), FR64:$src)>, Requires<[HasAVX]>;
+def : Pat<(f64 (fsqrt (load addr:$src))),
+ (VSQRTSDm (f64 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+
+def : Pat<(f32 (X86frsqrt FR32:$src)),
+ (VRSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
+def : Pat<(f32 (X86frsqrt (load addr:$src))),
+ (VRSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+
+def : Pat<(f32 (X86frcp FR32:$src)),
+ (VRCPSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
+def : Pat<(f32 (X86frcp (load addr:$src))),
+ (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
+ (VSQRTSSr (f32 (IMPLICIT_DEF)),
+ (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)),
+ sub_ss)>;
+ def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
+ (VSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
+
+ def : Pat<(int_x86_sse2_sqrt_sd VR128:$src),
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)),
+ (VSQRTSDr (f64 (IMPLICIT_DEF)),
+ (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd)),
+ sub_sd)>;
+ def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
+ (VSQRTSDm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
+
+ def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
+ (VRSQRTSSr (f32 (IMPLICIT_DEF)),
+ (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)),
+ sub_ss)>;
+ def : Pat<(int_x86_sse_rsqrt_ss sse_load_f32:$src),
+ (VRSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
+
+ def : Pat<(int_x86_sse_rcp_ss VR128:$src),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
+ (VRCPSSr (f32 (IMPLICIT_DEF)),
+ (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)),
+ sub_ss)>;
+ def : Pat<(int_x86_sse_rcp_ss sse_load_f32:$src),
+ (VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
+}
+
// Square root.
defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>,
sse1_fp_unop_p<0x51, "sqrt", fsqrt>,
@@ -1992,7 +3160,7 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
[(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
- (MOVNTDQmr addr:$dst, VR128:$src)>;
+ (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
// There is no AVX form for instructions below this point
def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
@@ -2006,7 +3174,7 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
}
//===----------------------------------------------------------------------===//
-// SSE 1 & 2 - Misc Instructions (No AVX form)
+// SSE 1 & 2 - Prefetch and memory fence
//===----------------------------------------------------------------------===//
// Prefetch intrinsic.
@@ -2019,66 +3187,26 @@ def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src),
def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
"prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>;
-// Load, store, and memory fence
-def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
- TB, Requires<[HasSSE1]>;
-def : Pat<(X86SFence), (SFENCE)>;
-
-// Alias instructions that map zero vector to pxor / xorp* for sse.
-// We set canFoldAsLoad because this can be converted to a constant-pool
-// load of an all-zeros value if folding it would be beneficial.
-// FIXME: Change encoding to pseudo! This is blocked right now by the x86
-// JIT implementation, it does not expand the instructions below like
-// X86MCInstLower does.
-let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1 in {
-def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v4f32 immAllZerosV))]>;
-def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v2f64 immAllZerosV))]>;
-let ExeDomain = SSEPackedInt in
-def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v4i32 immAllZerosV))]>;
-}
-
-// The same as done above but for AVX. The 128-bit versions are the
-// same, but re-encoded. The 256-bit does not support PI version, and
-// doesn't need it because on sandy bridge the register is set to zero
-// at the rename stage without using any execution unit, so SET0PSY
-// and SET0PDY can be used for vector int instructions without penalty
-// FIXME: Change encoding to pseudo! This is blocked right now by the x86
-// JIT implementatioan, it does not expand the instructions below like
-// X86MCInstLower does.
-let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1, Predicates = [HasAVX] in {
-def AVX_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v4f32 immAllZerosV))]>, VEX_4V;
-def AVX_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v2f64 immAllZerosV))]>, VEX_4V;
-def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
- [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V;
-def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
- [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V;
-let ExeDomain = SSEPackedInt in
-def AVX_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v4i32 immAllZerosV))]>;
-}
+// Flush cache
+def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
+ "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
+ TB, Requires<[HasSSE2]>;
-def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>;
-def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>;
-def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
+// Pause. This "instruction" is encoded as "rep; nop", so even though it
+// was introduced with SSE2, it's backward compatible.
+def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
-def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
- (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+// Load, store, and memory fence
+def SFENCE : I<0xAE, MRM_F8, (outs), (ins),
+ "sfence", [(int_x86_sse_sfence)]>, TB, Requires<[HasSSE1]>;
+def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
+ "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
+def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
+ "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
-// FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while
-// in the non-AVX version bits 127:64 aren't touched. Find a better way to
-// represent this instead of always zeroing SRC1. One possible solution is
-// to represent the instruction w/ something similar as the "$src1 = $dst"
-// constraint but without the tied operands.
-def : Pat<(extloadf32 addr:$src),
- (VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)), addr:$src)>,
- Requires<[HasAVX, OptForSpeed]>;
+def : Pat<(X86SFence), (SFENCE)>;
+def : Pat<(X86LFence), (LFENCE)>;
+def : Pat<(X86MFence), (MFENCE)>;
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Load/Store XCSR register
@@ -2106,10 +3234,22 @@ def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
}
-def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
-def VMOVDQUYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
+def VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// For Disassembler
+let isCodeGenOnly = 1 in {
+def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", []>, VEX;
+}
let canFoldAsLoad = 1, mayLoad = 1 in {
def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
@@ -2147,6 +3287,16 @@ def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}",
[]>, XS, Requires<[HasSSE2]>;
+// For Disassembler
+let isCodeGenOnly = 1 in {
+def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>;
+
+def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}",
+ []>, XS, Requires<[HasSSE2]>;
+}
+
let canFoldAsLoad = 1, mayLoad = 1 in {
def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqa\t{$src, $dst|$dst, $src}",
@@ -2180,9 +3330,11 @@ def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
} // ExeDomain = SSEPackedInt
-def : Pat<(int_x86_avx_loadu_dq_256 addr:$src), (VMOVDQUYrm addr:$src)>;
-def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src),
- (VMOVDQUYmr addr:$dst, VR256:$src)>;
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_avx_loadu_dq_256 addr:$src), (VMOVDQUYrm addr:$src)>;
+ def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src),
+ (VMOVDQUYmr addr:$dst, VR256:$src)>;
+}
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Arithmetic Instructions
@@ -2415,15 +3567,14 @@ let ExeDomain = SSEPackedInt in {
def VPANDNrr : PDI<0xDF, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
- VR128:$src2)))]>, VEX_4V;
+ [(set VR128:$dst,
+ (v2i64 (X86andnp VR128:$src1, VR128:$src2)))]>,VEX_4V;
def VPANDNrm : PDI<0xDF, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
- (memopv2i64 addr:$src2))))]>,
- VEX_4V;
+ [(set VR128:$dst, (X86andnp VR128:$src1,
+ (memopv2i64 addr:$src2)))]>, VEX_4V;
}
}
@@ -2527,6 +3678,32 @@ let Predicates = [HasAVX] in {
0>, VEX_4V;
defm VPCMPGTD : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_sse2_pcmpgt_d, 0,
0>, VEX_4V;
+
+ def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)),
+ (VPCMPEQBrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))),
+ (VPCMPEQBrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)),
+ (VPCMPEQWrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))),
+ (VPCMPEQWrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)),
+ (VPCMPEQDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))),
+ (VPCMPEQDrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)),
+ (VPCMPGTBrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))),
+ (VPCMPGTBrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)),
+ (VPCMPGTWrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))),
+ (VPCMPGTWrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)),
+ (VPCMPGTDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
+ (VPCMPGTDrm VR128:$src1, addr:$src2)>;
}
let Constraints = "$src1 = $dst" in {
@@ -2538,31 +3715,33 @@ let Constraints = "$src1 = $dst" in {
defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
} // Constraints = "$src1 = $dst"
-def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)),
- (PCMPEQBrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))),
- (PCMPEQBrm VR128:$src1, addr:$src2)>;
-def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)),
- (PCMPEQWrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))),
- (PCMPEQWrm VR128:$src1, addr:$src2)>;
-def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)),
- (PCMPEQDrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))),
- (PCMPEQDrm VR128:$src1, addr:$src2)>;
-
-def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)),
- (PCMPGTBrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))),
- (PCMPGTBrm VR128:$src1, addr:$src2)>;
-def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)),
- (PCMPGTWrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))),
- (PCMPGTWrm VR128:$src1, addr:$src2)>;
-def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)),
- (PCMPGTDrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
- (PCMPGTDrm VR128:$src1, addr:$src2)>;
+let Predicates = [HasSSE2] in {
+ def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)),
+ (PCMPEQBrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))),
+ (PCMPEQBrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)),
+ (PCMPEQWrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))),
+ (PCMPEQWrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)),
+ (PCMPEQDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))),
+ (PCMPEQDrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)),
+ (PCMPGTBrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))),
+ (PCMPGTBrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)),
+ (PCMPGTWrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))),
+ (PCMPGTWrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)),
+ (PCMPGTDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
+ (PCMPGTDrm VR128:$src1, addr:$src2)>;
+}
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Pack Instructions
@@ -2608,7 +3787,7 @@ def mi : Ii8<0x70, MRMSrcMem,
let Predicates = [HasAVX] in {
let AddedComplexity = 5 in
- defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, pshufd, bc_v4i32>, OpSize,
+ defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, pshufd, bc_v4i32>, TB, OpSize,
VEX;
// SSE2 with ImmT == Imm8 and XS prefix.
@@ -2618,6 +3797,34 @@ let Predicates = [HasAVX] in {
// SSE2 with ImmT == Imm8 and XD prefix.
defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, pshuflw, bc_v8i16>, XD,
VEX;
+
+ let AddedComplexity = 5 in
+ def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))),
+ (VPSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>;
+ // Unary v4f32 shuffle with VPSHUF* in order to fold a load.
+ def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)),
+ (VPSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>;
+
+ def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
+ (i8 imm:$imm))),
+ (VPSHUFDmi addr:$src1, imm:$imm)>;
+ def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)),
+ (i8 imm:$imm))),
+ (VPSHUFDmi addr:$src1, imm:$imm)>;
+ def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (VPSHUFDri VR128:$src1, imm:$imm)>;
+ def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (VPSHUFDri VR128:$src1, imm:$imm)>;
+ def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))),
+ (VPSHUFHWri VR128:$src, imm:$imm)>;
+ def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)),
+ (i8 imm:$imm))),
+ (VPSHUFHWmi addr:$src, imm:$imm)>;
+ def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))),
+ (VPSHUFLWri VR128:$src, imm:$imm)>;
+ def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)),
+ (i8 imm:$imm))),
+ (VPSHUFLWmi addr:$src, imm:$imm)>;
}
let Predicates = [HasSSE2] in {
@@ -2629,6 +3836,34 @@ let Predicates = [HasSSE2] in {
// SSE2 with ImmT == Imm8 and XD prefix.
defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, pshuflw, bc_v8i16>, XD;
+
+ let AddedComplexity = 5 in
+ def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))),
+ (PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>;
+ // Unary v4f32 shuffle with PSHUF* in order to fold a load.
+ def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)),
+ (PSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>;
+
+ def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
+ (i8 imm:$imm))),
+ (PSHUFDmi addr:$src1, imm:$imm)>;
+ def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)),
+ (i8 imm:$imm))),
+ (PSHUFDmi addr:$src1, imm:$imm)>;
+ def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (PSHUFDri VR128:$src1, imm:$imm)>;
+ def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (PSHUFDri VR128:$src1, imm:$imm)>;
+ def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))),
+ (PSHUFHWri VR128:$src, imm:$imm)>;
+ def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)),
+ (i8 imm:$imm))),
+ (PSHUFHWmi addr:$src, imm:$imm)>;
+ def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))),
+ (PSHUFLWri VR128:$src, imm:$imm)>;
+ def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)),
+ (i8 imm:$imm))),
+ (PSHUFLWmi addr:$src, imm:$imm)>;
}
//===---------------------------------------------------------------------===//
@@ -2637,71 +3872,69 @@ let Predicates = [HasSSE2] in {
let ExeDomain = SSEPackedInt in {
multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
- PatFrag unp_frag, PatFrag bc_frag, bit Is2Addr = 1> {
+ SDNode OpNode, PatFrag bc_frag, bit Is2Addr = 1> {
def rr : PDI<opc, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (vt (unp_frag VR128:$src1, VR128:$src2)))]>;
+ [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))]>;
def rm : PDI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (unp_frag VR128:$src1,
+ [(set VR128:$dst, (OpNode VR128:$src1,
(bc_frag (memopv2i64
addr:$src2))))]>;
}
let Predicates = [HasAVX] in {
- defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, unpckl, bc_v16i8,
- 0>, VEX_4V;
- defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, unpckl, bc_v8i16,
- 0>, VEX_4V;
- defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, unpckl, bc_v4i32,
- 0>, VEX_4V;
+ defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Punpcklbw,
+ bc_v16i8, 0>, VEX_4V;
+ defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Punpcklwd,
+ bc_v8i16, 0>, VEX_4V;
+ defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckldq,
+ bc_v4i32, 0>, VEX_4V;
/// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
/// knew to collapse (bitconvert VT to VT) into its operand.
def VPUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst,
- (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>, VEX_4V;
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1,
+ VR128:$src2)))]>, VEX_4V;
def VPUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst,
- (v2i64 (unpckl VR128:$src1,
- (memopv2i64 addr:$src2))))]>, VEX_4V;
-
- defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, unpckh, bc_v16i8,
- 0>, VEX_4V;
- defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, unpckh, bc_v8i16,
- 0>, VEX_4V;
- defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, unpckh, bc_v4i32,
- 0>, VEX_4V;
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1,
+ (memopv2i64 addr:$src2))))]>, VEX_4V;
+
+ defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckhbw,
+ bc_v16i8, 0>, VEX_4V;
+ defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Punpckhwd,
+ bc_v8i16, 0>, VEX_4V;
+ defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckhdq,
+ bc_v4i32, 0>, VEX_4V;
/// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
/// knew to collapse (bitconvert VT to VT) into its operand.
def VPUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst,
- (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>, VEX_4V;
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1,
+ VR128:$src2)))]>, VEX_4V;
def VPUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst,
- (v2i64 (unpckh VR128:$src1,
- (memopv2i64 addr:$src2))))]>, VEX_4V;
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1,
+ (memopv2i64 addr:$src2))))]>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
- defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, unpckl, bc_v16i8>;
- defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, unpckl, bc_v8i16>;
- defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, unpckl, bc_v4i32>;
+ defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw, bc_v16i8>;
+ defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd, bc_v8i16>;
+ defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckldq, bc_v4i32>;
/// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
/// knew to collapse (bitconvert VT to VT) into its operand.
@@ -2709,17 +3942,17 @@ let Constraints = "$src1 = $dst" in {
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpcklqdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>;
+ (v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)))]>;
def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpcklqdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v2i64 (unpckl VR128:$src1,
+ (v2i64 (X86Punpcklqdq VR128:$src1,
(memopv2i64 addr:$src2))))]>;
- defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, unpckh, bc_v16i8>;
- defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, unpckh, bc_v8i16>;
- defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, unpckh, bc_v4i32>;
+ defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckhbw, bc_v16i8>;
+ defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckhwd, bc_v8i16>;
+ defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckhdq, bc_v4i32>;
/// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
/// knew to collapse (bitconvert VT to VT) into its operand.
@@ -2727,17 +3960,24 @@ let Constraints = "$src1 = $dst" in {
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpckhqdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>;
+ (v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)))]>;
def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpckhqdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v2i64 (unpckh VR128:$src1,
+ (v2i64 (X86Punpckhqdq VR128:$src1,
(memopv2i64 addr:$src2))))]>;
}
-
} // ExeDomain = SSEPackedInt
+// Splat v2f64 / v2i64
+let AddedComplexity = 10 in {
+ def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
+ (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+ def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
+ (VPUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasAVX]>;
+}
+
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Extract and Insert
//===---------------------------------------------------------------------===//
@@ -2769,7 +4009,7 @@ def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
(outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
"vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
- imm:$src2))]>, OpSize, VEX;
+ imm:$src2))]>, TB, OpSize, VEX;
def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
(outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -2778,11 +4018,11 @@ def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
// Insert
let Predicates = [HasAVX] in {
- defm VPINSRW : sse2_pinsrw<0>, OpSize, VEX_4V;
+ defm VPINSRW : sse2_pinsrw<0>, TB, OpSize, VEX_4V;
def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
"vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, OpSize, VEX_4V;
+ []>, TB, OpSize, VEX_4V;
}
let Constraints = "$src1 = $dst" in
@@ -2839,7 +4079,9 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
// SSE2 - Move Doubleword
//===---------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
// Move Int Doubleword to Packed Double Int
+//
def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -2849,6 +4091,14 @@ def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
[(set VR128:$dst,
(v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
VEX;
+def VMOV64toPQIrr : VRPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector GR64:$src)))]>, VEX;
+def VMOV64toSDrr : VRPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert GR64:$src))]>, VEX;
+
def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -2865,8 +4115,9 @@ def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert GR64:$src))]>;
-
+//===---------------------------------------------------------------------===//
// Move Int Doubleword to Single Scalar
+//
def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert GR32:$src))]>, VEX;
@@ -2883,7 +4134,9 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>;
+//===---------------------------------------------------------------------===//
// Move Packed Doubleword Int to Packed Double Int
+//
def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
@@ -2902,22 +4155,48 @@ def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
[(store (i32 (vector_extract (v4i32 VR128:$src),
(iPTR 0))), addr:$dst)]>;
-def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+//===---------------------------------------------------------------------===//
+// Move Packed Doubleword Int first element to Doubleword Int
+//
+def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
+ (iPTR 0)))]>,
+ TB, OpSize, VEX, VEX_W, Requires<[HasAVX, In64BitMode]>;
+
+def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
+ (iPTR 0)))]>;
+
+//===---------------------------------------------------------------------===//
+// Bitcast FR64 <-> GR64
+//
+let Predicates = [HasAVX] in
+def VMOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
+ VEX;
+def VMOVSDto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
- (iPTR 0)))]>;
+ [(set GR64:$dst, (bitconvert FR64:$src))]>;
+def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
+
def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>;
+def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (bitconvert FR64:$src))]>;
+def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
-def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (bitconvert FR64:$src))]>;
-def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
-
+//===---------------------------------------------------------------------===//
// Move Scalar Single to Double Int
+//
def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bitconvert FR32:$src))]>, VEX;
@@ -2931,7 +4210,9 @@ def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
-// movd / movq to XMM register zero-extends
+//===---------------------------------------------------------------------===//
+// Patterns and instructions to describe movd/movq to XMM register zero-extends
+//
let AddedComplexity = 15 in {
def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
@@ -2967,15 +4248,36 @@ def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
[(set VR128:$dst,
(v4i32 (X86vzmovl (v4i32 (scalar_to_vector
(loadi32 addr:$src))))))]>;
+}
-def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
+let Predicates = [HasSSE2], AddedComplexity = 20 in {
+ def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
(MOVZDI2PDIrm addr:$src)>;
-def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
+ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
(MOVZDI2PDIrm addr:$src)>;
-def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
(MOVZDI2PDIrm addr:$src)>;
}
+let Predicates = [HasAVX] in {
+ // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
+ let AddedComplexity = 20 in {
+ def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
+ (VMOVZDI2PDIrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
+ (VMOVZDI2PDIrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+ (VMOVZDI2PDIrm addr:$src)>;
+ }
+ // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
+ def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
+ (v4i32 (scalar_to_vector GR32:$src)),(i32 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVZDI2PDIrr GR32:$src), sub_xmm)>;
+ def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
+ (v2i64 (scalar_to_vector GR64:$src)),(i32 0)))),
+ (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>;
+}
+
// These are the correct encodings of the instructions so that we know how to
// read correct assembly, even though we continue to emit the wrong ones for
// compatibility with Darwin's buggy assembler.
@@ -2996,7 +4298,9 @@ def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
// SSE2 - Move Quadword
//===---------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
// Move Quadword Int to Packed Quadword Int
+//
def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -3008,7 +4312,9 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
(v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix
+//===---------------------------------------------------------------------===//
// Move Packed Quadword Int to Quadword Int
+//
def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (vector_extract (v2i64 VR128:$src),
@@ -3018,10 +4324,9 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
[(store (i64 (vector_extract (v2i64 VR128:$src),
(iPTR 0))), addr:$dst)]>;
-def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
- (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
-
+//===---------------------------------------------------------------------===//
// Store / copy lower 64-bits of a XMM register.
+//
def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX;
@@ -3037,7 +4342,7 @@ def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
(loadi64 addr:$src))))))]>,
XS, VEX, Requires<[HasAVX]>;
-let AddedComplexity = 20 in {
+let AddedComplexity = 20 in
def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -3045,15 +4350,27 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
(loadi64 addr:$src))))))]>,
XS, Requires<[HasSSE2]>;
-def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
+let Predicates = [HasSSE2], AddedComplexity = 20 in {
+ def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
(MOVZQI2PQIrm addr:$src)>;
-def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
+ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
(MOVZQI2PQIrm addr:$src)>;
-def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
+}
+
+let Predicates = [HasAVX], AddedComplexity = 20 in {
+ def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
+ (VMOVZQI2PQIrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
+ (VMOVZQI2PQIrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzload addr:$src)),
+ (VMOVZQI2PQIrm addr:$src)>;
}
+//===---------------------------------------------------------------------===//
// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
// IA32 document. movq xmm1, xmm2 does clear the high bits.
+//
let AddedComplexity = 15 in
def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}",
@@ -3077,9 +4394,21 @@ def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
[(set VR128:$dst, (v2i64 (X86vzmovl
(loadv2i64 addr:$src))))]>,
XS, Requires<[HasSSE2]>;
+}
-def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
- (MOVZPQILo2PQIrm addr:$src)>;
+let AddedComplexity = 20 in {
+ let Predicates = [HasSSE2] in {
+ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
+ (MOVZPQILo2PQIrm addr:$src)>;
+ def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
+ (MOVZPQILo2PQIrr VR128:$src)>;
+ }
+ let Predicates = [HasAVX] in {
+ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
+ (VMOVZPQILo2PQIrm addr:$src)>;
+ def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
+ (VMOVZPQILo2PQIrr VR128:$src)>;
+ }
}
// Instructions to match in the assembler
@@ -3102,37 +4431,6 @@ def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}", []>, XS;
//===---------------------------------------------------------------------===//
-// SSE2 - Misc Instructions
-//===---------------------------------------------------------------------===//
-
-// Flush cache
-def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
- "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
- TB, Requires<[HasSSE2]>;
-
-// Load, store, and memory fence
-def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
- "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
-def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
- "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
-def : Pat<(X86LFence), (LFENCE)>;
-def : Pat<(X86MFence), (MFENCE)>;
-
-
-// Pause. This "instruction" is encoded as "rep; nop", so even though it
-// was introduced with SSE2, it's backward compatible.
-def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
-
-// Alias instructions that map zero vector to pxor / xorp* for sse.
-// We set canFoldAsLoad because this can be converted to a constant-pool
-// load of an all-ones value if folding it would be beneficial.
-let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1, ExeDomain = SSEPackedInt in
- // FIXME: Change encoding to pseudo.
- def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v4i32 immAllOnesV))]>;
-
-//===---------------------------------------------------------------------===//
// SSE3 - Conversion Instructions
//===---------------------------------------------------------------------===//
@@ -3164,6 +4462,11 @@ def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
+ (VCVTPD2DQYrr VR256:$src)>;
+def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))),
+ (VCVTPD2DQYrm addr:$src)>;
+
// Convert Packed DW Integers to Packed Double FP
let Predicates = [HasAVX] in {
def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
@@ -3192,41 +4495,74 @@ def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src),
def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)),
(VCVTPD2DQYrm addr:$src)>;
+def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))),
+ (VCVTDQ2PDYrr VR128:$src)>;
+def : Pat<(v4f64 (sint_to_fp (memopv4i32 addr:$src))),
+ (VCVTDQ2PDYrm addr:$src)>;
+
//===---------------------------------------------------------------------===//
-// SSE3 - Move Instructions
+// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP
//===---------------------------------------------------------------------===//
-
-// Replicate Single FP
-multiclass sse3_replicate_sfp<bits<8> op, PatFrag rep_frag, string OpcodeStr> {
-def rr : S3SI<op, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
+ ValueType vt, RegisterClass RC, PatFrag mem_frag,
+ X86MemOperand x86memop> {
+def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (v4f32 (rep_frag
- VR128:$src, (undef))))]>;
-def rm : S3SI<op, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ [(set RC:$dst, (vt (OpNode RC:$src)))]>;
+def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (rep_frag
- (memopv4f32 addr:$src), (undef)))]>;
+ [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>;
}
-multiclass sse3_replicate_sfp_y<bits<8> op, PatFrag rep_frag,
- string OpcodeStr> {
-def rr : S3SI<op, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
-def rm : S3SI<op, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
+let Predicates = [HasAVX] in {
+ defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
+ v4f32, VR128, memopv4f32, f128mem>, VEX;
+ defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
+ v4f32, VR128, memopv4f32, f128mem>, VEX;
+ defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
+ v8f32, VR256, memopv8f32, f256mem>, VEX;
+ defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
+ v8f32, VR256, memopv8f32, f256mem>, VEX;
+}
+defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128,
+ memopv4f32, f128mem>;
+defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128,
+ memopv4f32, f128mem>;
+
+let Predicates = [HasSSE3] in {
+ def : Pat<(v4i32 (X86Movshdup VR128:$src)),
+ (MOVSHDUPrr VR128:$src)>;
+ def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))),
+ (MOVSHDUPrm addr:$src)>;
+ def : Pat<(v4i32 (X86Movsldup VR128:$src)),
+ (MOVSLDUPrr VR128:$src)>;
+ def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))),
+ (MOVSLDUPrm addr:$src)>;
}
let Predicates = [HasAVX] in {
- // FIXME: Merge above classes when we have patterns for the ymm version
- defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX;
- defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX;
- defm VMOVSHDUPY : sse3_replicate_sfp_y<0x16, movshdup, "vmovshdup">, VEX;
- defm VMOVSLDUPY : sse3_replicate_sfp_y<0x12, movsldup, "vmovsldup">, VEX;
+ def : Pat<(v4i32 (X86Movshdup VR128:$src)),
+ (VMOVSHDUPrr VR128:$src)>;
+ def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))),
+ (VMOVSHDUPrm addr:$src)>;
+ def : Pat<(v4i32 (X86Movsldup VR128:$src)),
+ (VMOVSLDUPrr VR128:$src)>;
+ def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))),
+ (VMOVSLDUPrm addr:$src)>;
+ def : Pat<(v8i32 (X86Movshdup VR256:$src)),
+ (VMOVSHDUPYrr VR256:$src)>;
+ def : Pat<(v8i32 (X86Movshdup (bc_v8i32 (memopv4i64 addr:$src)))),
+ (VMOVSHDUPYrm addr:$src)>;
+ def : Pat<(v8i32 (X86Movsldup VR256:$src)),
+ (VMOVSLDUPYrr VR256:$src)>;
+ def : Pat<(v8i32 (X86Movsldup (bc_v8i32 (memopv4i64 addr:$src)))),
+ (VMOVSLDUPYrm addr:$src)>;
}
-defm MOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "movshdup">;
-defm MOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "movsldup">;
-// Replicate Double FP
+//===---------------------------------------------------------------------===//
+// SSE3 - Replicate Double FP - MOVDDUP
+//===---------------------------------------------------------------------===//
+
multiclass sse3_replicate_dfp<string OpcodeStr> {
def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
@@ -3238,23 +4574,90 @@ def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
(undef))))]>;
}
+// FIXME: Merge with above classe when there're patterns for the ymm version
multiclass sse3_replicate_dfp_y<string OpcodeStr> {
-def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- []>;
-def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- []>;
+let Predicates = [HasAVX] in {
+ def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>;
+ def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>;
+ }
+}
+
+defm MOVDDUP : sse3_replicate_dfp<"movddup">;
+defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
+defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX;
+
+let Predicates = [HasSSE3] in {
+ def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
+ (undef)),
+ (MOVDDUPrm addr:$src)>;
+ let AddedComplexity = 5 in {
+ def : Pat<(movddup (memopv2f64 addr:$src), (undef)), (MOVDDUPrm addr:$src)>;
+ def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)),
+ (MOVDDUPrm addr:$src)>;
+ def : Pat<(movddup (memopv2i64 addr:$src), (undef)), (MOVDDUPrm addr:$src)>;
+ def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)),
+ (MOVDDUPrm addr:$src)>;
+ }
+ def : Pat<(X86Movddup (memopv2f64 addr:$src)),
+ (MOVDDUPrm addr:$src)>;
+ def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
+ (MOVDDUPrm addr:$src)>;
+ def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
+ (MOVDDUPrm addr:$src)>;
+ def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
+ (MOVDDUPrm addr:$src)>;
+ def : Pat<(X86Movddup (bc_v2f64
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+ (MOVDDUPrm addr:$src)>;
}
let Predicates = [HasAVX] in {
- // FIXME: Merge above classes when we have patterns for the ymm version
- defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
- defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX;
+ def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
+ (undef)),
+ (VMOVDDUPrm addr:$src)>;
+ let AddedComplexity = 5 in {
+ def : Pat<(movddup (memopv2f64 addr:$src), (undef)), (VMOVDDUPrm addr:$src)>;
+ def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)),
+ (VMOVDDUPrm addr:$src)>;
+ def : Pat<(movddup (memopv2i64 addr:$src), (undef)), (VMOVDDUPrm addr:$src)>;
+ def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)),
+ (VMOVDDUPrm addr:$src)>;
+ }
+ def : Pat<(X86Movddup (memopv2f64 addr:$src)),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+ def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+ def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+ def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+ def : Pat<(X86Movddup (bc_v2f64
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+
+ // 256-bit version
+ def : Pat<(X86Movddup (memopv4f64 addr:$src)),
+ (VMOVDDUPYrm addr:$src)>;
+ def : Pat<(X86Movddup (memopv4i64 addr:$src)),
+ (VMOVDDUPYrm addr:$src)>;
+ def : Pat<(X86Movddup (v4f64 (scalar_to_vector (loadf64 addr:$src)))),
+ (VMOVDDUPYrm addr:$src)>;
+ def : Pat<(X86Movddup (v4i64 (scalar_to_vector (loadi64 addr:$src)))),
+ (VMOVDDUPYrm addr:$src)>;
+ def : Pat<(X86Movddup (v4f64 VR256:$src)),
+ (VMOVDDUPYrr VR256:$src)>;
+ def : Pat<(X86Movddup (v4i64 VR256:$src)),
+ (VMOVDDUPYrr VR256:$src)>;
}
-defm MOVDDUP : sse3_replicate_dfp<"movddup">;
-// Move Unaligned Integer
+//===---------------------------------------------------------------------===//
+// SSE3 - Move Unaligned Integer
+//===---------------------------------------------------------------------===//
+
let Predicates = [HasAVX] in {
def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vlddqu\t{$src, $dst|$dst, $src}",
@@ -3267,38 +4670,6 @@ def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"lddqu\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
-def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
- (undef)),
- (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
-
-// Several Move patterns
-let AddedComplexity = 5 in {
-def : Pat<(movddup (memopv2f64 addr:$src), (undef)),
- (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
-def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)),
- (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
-def : Pat<(movddup (memopv2i64 addr:$src), (undef)),
- (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
-def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)),
- (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
-}
-
-// vector_shuffle v1, <undef> <1, 1, 3, 3>
-let AddedComplexity = 15 in
-def : Pat<(v4i32 (movshdup VR128:$src, (undef))),
- (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>;
-let AddedComplexity = 20 in
-def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
- (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>;
-
-// vector_shuffle v1, <undef> <0, 0, 2, 2>
-let AddedComplexity = 15 in
- def : Pat<(v4i32 (movsldup VR128:$src, (undef))),
- (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>;
-let AddedComplexity = 20 in
- def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
- (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
-
//===---------------------------------------------------------------------===//
// SSE3 - Arithmetic
//===---------------------------------------------------------------------===//
@@ -3344,62 +4715,58 @@ let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
// Horizontal ops
multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
- X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> {
+ X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> {
def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>;
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>;
def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>;
+ [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))]>;
}
multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
- X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> {
+ X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> {
def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>;
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>;
def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>;
+ [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))]>;
}
let Predicates = [HasAVX] in {
defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
- int_x86_sse3_hadd_ps, 0>, VEX_4V;
+ X86fhadd, 0>, VEX_4V;
defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
- int_x86_sse3_hadd_pd, 0>, VEX_4V;
+ X86fhadd, 0>, VEX_4V;
defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
- int_x86_sse3_hsub_ps, 0>, VEX_4V;
+ X86fhsub, 0>, VEX_4V;
defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
- int_x86_sse3_hsub_pd, 0>, VEX_4V;
+ X86fhsub, 0>, VEX_4V;
defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
- int_x86_avx_hadd_ps_256, 0>, VEX_4V;
+ X86fhadd, 0>, VEX_4V;
defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
- int_x86_avx_hadd_pd_256, 0>, VEX_4V;
+ X86fhadd, 0>, VEX_4V;
defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
- int_x86_avx_hsub_ps_256, 0>, VEX_4V;
+ X86fhsub, 0>, VEX_4V;
defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
- int_x86_avx_hsub_pd_256, 0>, VEX_4V;
+ X86fhsub, 0>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
- defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem,
- int_x86_sse3_hadd_ps>;
- defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem,
- int_x86_sse3_hadd_pd>;
- defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem,
- int_x86_sse3_hsub_ps>;
- defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem,
- int_x86_sse3_hsub_pd>;
+ defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd>;
+ defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd>;
+ defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub>;
+ defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub>;
}
//===---------------------------------------------------------------------===//
@@ -3466,7 +4833,7 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
-let Predicates = [HasAVX] in {
+let ImmT = NoImm, Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16,
int_x86_ssse3_phadd_w_128, 0>, VEX_4V;
@@ -3525,17 +4892,33 @@ defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv8i16,
int_x86_ssse3_pmul_hr_sw_128>;
}
-def : Pat<(X86pshufb VR128:$src, VR128:$mask),
- (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>;
-def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
- (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>;
+let Predicates = [HasSSSE3] in {
+ def : Pat<(X86pshufb VR128:$src, VR128:$mask),
+ (PSHUFBrr128 VR128:$src, VR128:$mask)>;
+ def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
+ (PSHUFBrm128 VR128:$src, addr:$mask)>;
+
+ def : Pat<(X86psignb VR128:$src1, VR128:$src2),
+ (PSIGNBrr128 VR128:$src1, VR128:$src2)>;
+ def : Pat<(X86psignw VR128:$src1, VR128:$src2),
+ (PSIGNWrr128 VR128:$src1, VR128:$src2)>;
+ def : Pat<(X86psignd VR128:$src1, VR128:$src2),
+ (PSIGNDrr128 VR128:$src1, VR128:$src2)>;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(X86pshufb VR128:$src, VR128:$mask),
+ (VPSHUFBrr128 VR128:$src, VR128:$mask)>;
+ def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
+ (VPSHUFBrm128 VR128:$src, addr:$mask)>;
-def : Pat<(X86psignb VR128:$src1, VR128:$src2),
- (PSIGNBrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
-def : Pat<(X86psignw VR128:$src1, VR128:$src2),
- (PSIGNWrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
-def : Pat<(X86psignd VR128:$src1, VR128:$src2),
- (PSIGNDrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
+ def : Pat<(X86psignb VR128:$src1, VR128:$src2),
+ (VPSIGNBrr128 VR128:$src1, VR128:$src2)>;
+ def : Pat<(X86psignw VR128:$src1, VR128:$src2),
+ (VPSIGNWrr128 VR128:$src1, VR128:$src2)>;
+ def : Pat<(X86psignd VR128:$src1, VR128:$src2),
+ (VPSIGNDrr128 VR128:$src1, VR128:$src2)>;
+}
//===---------------------------------------------------------------------===//
// SSSE3 - Packed Align Instruction Patterns
@@ -3560,33 +4943,35 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
let Predicates = [HasAVX] in
defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
-let Constraints = "$src1 = $dst" in
+let Constraints = "$src1 = $dst", Predicates = [HasSSSE3] in
defm PALIGN : ssse3_palign<"palignr">;
-let AddedComplexity = 5 in {
-def : Pat<(v4i32 (palign:$src3 VR128:$src1, VR128:$src2)),
- (PALIGNR128rr VR128:$src2, VR128:$src1,
- (SHUFFLE_get_palign_imm VR128:$src3))>,
- Requires<[HasSSSE3]>;
-def : Pat<(v4f32 (palign:$src3 VR128:$src1, VR128:$src2)),
- (PALIGNR128rr VR128:$src2, VR128:$src1,
- (SHUFFLE_get_palign_imm VR128:$src3))>,
- Requires<[HasSSSE3]>;
-def : Pat<(v8i16 (palign:$src3 VR128:$src1, VR128:$src2)),
- (PALIGNR128rr VR128:$src2, VR128:$src1,
- (SHUFFLE_get_palign_imm VR128:$src3))>,
- Requires<[HasSSSE3]>;
-def : Pat<(v16i8 (palign:$src3 VR128:$src1, VR128:$src2)),
- (PALIGNR128rr VR128:$src2, VR128:$src1,
- (SHUFFLE_get_palign_imm VR128:$src3))>,
- Requires<[HasSSSE3]>;
+let Predicates = [HasSSSE3] in {
+def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+}
+
+let Predicates = [HasAVX] in {
+def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
}
//===---------------------------------------------------------------------===//
-// SSSE3 Misc Instructions
+// SSSE3 - Thread synchronization
//===---------------------------------------------------------------------===//
-// Thread synchronization
let usesCustomInserter = 1 in {
def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
[(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>;
@@ -3609,338 +4994,6 @@ def : InstAlias<"monitor %eax, %ecx, %edx", (MONITORrrr)>,
def : InstAlias<"monitor %rax, %rcx, %rdx", (MONITORrrr)>,
Requires<[In64BitMode]>;
-//===---------------------------------------------------------------------===//
-// Non-Instruction Patterns
-//===---------------------------------------------------------------------===//
-
-// extload f32 -> f64. This matches load+fextend because we have a hack in
-// the isel (PreprocessForFPConvert) that can introduce loads after dag
-// combine.
-// Since these loads aren't folded into the fextend, we have to match it
-// explicitly here.
-let Predicates = [HasSSE2] in
- def : Pat<(fextend (loadf32 addr:$src)),
- (CVTSS2SDrm addr:$src)>;
-
-// FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while
-// in the non-AVX version bits 127:64 aren't touched. Find a better way to
-// represent this instead of always zeroing SRC1. One possible solution is
-// to represent the instruction w/ something similar as the "$src1 = $dst"
-// constraint but without the tied operands.
-let Predicates = [HasAVX] in
- def : Pat<(fextend (loadf32 addr:$src)),
- (VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)),
- addr:$src)>;
-
-// bit_convert
-let Predicates = [HasXMMInt] in {
- def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
- def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
- def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
- def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
- def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
- def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
- def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
- def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
- def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
- def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
- def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
- def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
- def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
- def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
- def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
- def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
- def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
- def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
- def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
- def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
- def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
- def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
- def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
- def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
- def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
- def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
- def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
- def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
- def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
- def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
-}
-
-let Predicates = [HasAVX] in {
- def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
- def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>;
- def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>;
- def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>;
- def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>;
- def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>;
- def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>;
- def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>;
- def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>;
- def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>;
- def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>;
- def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>;
- def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>;
- def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>;
-}
-
-// Move scalar to XMM zero-extended
-// movd to XMM register zero-extends
-let AddedComplexity = 15 in {
-// Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
-def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
- (MOVSDrr (v2f64 (V_SET0PS)), FR64:$src)>;
-def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
- (MOVSSrr (v4f32 (V_SET0PS)), FR32:$src)>;
-def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
- (MOVSSrr (v4f32 (V_SET0PS)),
- (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
-def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
- (MOVSSrr (v4i32 (V_SET0PI)),
- (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
-}
-
-// Splat v2f64 / v2i64
-let AddedComplexity = 10 in {
-def : Pat<(splat_lo (v2f64 VR128:$src), (undef)),
- (UNPCKLPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
-def : Pat<(unpckh (v2f64 VR128:$src), (undef)),
- (UNPCKHPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
-def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
- (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
-def : Pat<(unpckh (v2i64 VR128:$src), (undef)),
- (PUNPCKHQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
-}
-
-// Special unary SHUFPSrri case.
-def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))),
- (SHUFPSrri VR128:$src1, VR128:$src1,
- (SHUFFLE_get_shuf_imm VR128:$src3))>;
-let AddedComplexity = 5 in
-def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))),
- (PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>,
- Requires<[HasSSE2]>;
-// Special unary SHUFPDrri case.
-def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))),
- (SHUFPDrri VR128:$src1, VR128:$src1,
- (SHUFFLE_get_shuf_imm VR128:$src3))>,
- Requires<[HasSSE2]>;
-// Special unary SHUFPDrri case.
-def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))),
- (SHUFPDrri VR128:$src1, VR128:$src1,
- (SHUFFLE_get_shuf_imm VR128:$src3))>,
- Requires<[HasSSE2]>;
-// Unary v4f32 shuffle with PSHUF* in order to fold a load.
-def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)),
- (PSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>,
- Requires<[HasSSE2]>;
-
-// Special binary v4i32 shuffle cases with SHUFPS.
-def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))),
- (SHUFPSrri VR128:$src1, VR128:$src2,
- (SHUFFLE_get_shuf_imm VR128:$src3))>,
- Requires<[HasSSE2]>;
-def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
- (SHUFPSrmi VR128:$src1, addr:$src2,
- (SHUFFLE_get_shuf_imm VR128:$src3))>,
- Requires<[HasSSE2]>;
-// Special binary v2i64 shuffle cases using SHUFPDrri.
-def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)),
- (SHUFPDrri VR128:$src1, VR128:$src2,
- (SHUFFLE_get_shuf_imm VR128:$src3))>,
- Requires<[HasSSE2]>;
-
-// vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
-let AddedComplexity = 15 in {
-def : Pat<(v4i32 (unpckl_undef:$src2 VR128:$src, (undef))),
- (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
- Requires<[OptForSpeed, HasSSE2]>;
-def : Pat<(v4f32 (unpckl_undef:$src2 VR128:$src, (undef))),
- (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
- Requires<[OptForSpeed, HasSSE2]>;
-}
-let AddedComplexity = 10 in {
-def : Pat<(v4f32 (unpckl_undef VR128:$src, (undef))),
- (UNPCKLPSrr VR128:$src, VR128:$src)>;
-def : Pat<(v16i8 (unpckl_undef VR128:$src, (undef))),
- (PUNPCKLBWrr VR128:$src, VR128:$src)>;
-def : Pat<(v8i16 (unpckl_undef VR128:$src, (undef))),
- (PUNPCKLWDrr VR128:$src, VR128:$src)>;
-def : Pat<(v4i32 (unpckl_undef VR128:$src, (undef))),
- (PUNPCKLDQrr VR128:$src, VR128:$src)>;
-}
-
-// vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
-let AddedComplexity = 15 in {
-def : Pat<(v4i32 (unpckh_undef:$src2 VR128:$src, (undef))),
- (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
- Requires<[OptForSpeed, HasSSE2]>;
-def : Pat<(v4f32 (unpckh_undef:$src2 VR128:$src, (undef))),
- (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
- Requires<[OptForSpeed, HasSSE2]>;
-}
-let AddedComplexity = 10 in {
-def : Pat<(v4f32 (unpckh_undef VR128:$src, (undef))),
- (UNPCKHPSrr VR128:$src, VR128:$src)>;
-def : Pat<(v16i8 (unpckh_undef VR128:$src, (undef))),
- (PUNPCKHBWrr VR128:$src, VR128:$src)>;
-def : Pat<(v8i16 (unpckh_undef VR128:$src, (undef))),
- (PUNPCKHWDrr VR128:$src, VR128:$src)>;
-def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))),
- (PUNPCKHDQrr VR128:$src, VR128:$src)>;
-}
-
-let AddedComplexity = 20 in {
-// vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
-def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)),
- (MOVLHPSrr VR128:$src1, VR128:$src2)>;
-
-// vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
-def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)),
- (MOVHLPSrr VR128:$src1, VR128:$src2)>;
-
-// vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS
-def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))),
- (MOVHLPSrr VR128:$src1, VR128:$src1)>;
-def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))),
- (MOVHLPSrr VR128:$src1, VR128:$src1)>;
-}
-
-let AddedComplexity = 20 in {
-// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
-def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
- (MOVLPSrm VR128:$src1, addr:$src2)>;
-def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
- (MOVLPDrm VR128:$src1, addr:$src2)>;
-def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
- (MOVLPSrm VR128:$src1, addr:$src2)>;
-def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))),
- (MOVLPDrm VR128:$src1, addr:$src2)>;
-}
-
-// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
-def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
- (MOVLPSmr addr:$src1, VR128:$src2)>;
-def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
- (MOVLPDmr addr:$src1, VR128:$src2)>;
-def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
- addr:$src1),
- (MOVLPSmr addr:$src1, VR128:$src2)>;
-def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
- (MOVLPDmr addr:$src1, VR128:$src2)>;
-
-let AddedComplexity = 15 in {
-// Setting the lowest element in the vector.
-def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
- (MOVSSrr (v4i32 VR128:$src1),
- (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
-def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
- (MOVSDrr (v2i64 VR128:$src1),
- (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
-
-// vector_shuffle v1, v2 <4, 5, 2, 3> using movsd
-def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>,
- Requires<[HasSSE2]>;
-def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>,
- Requires<[HasSSE2]>;
-}
-
-// vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but
-// fall back to this for SSE1)
-def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
- (SHUFPSrri VR128:$src2, VR128:$src1,
- (SHUFFLE_get_shuf_imm VR128:$src3))>;
-
-// Set lowest element and zero upper elements.
-def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
- (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
-
-// vector -> vector casts
-def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
- (Int_CVTDQ2PSrr VR128:$src)>, Requires<[HasSSE2]>;
-def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
- (CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>;
-
-// Use movaps / movups for SSE integer load / store (one byte shorter).
-// The instructions selected below are then converted to MOVDQA/MOVDQU
-// during the SSE domain pass.
-let Predicates = [HasSSE1] in {
- def : Pat<(alignedloadv4i32 addr:$src),
- (MOVAPSrm addr:$src)>;
- def : Pat<(loadv4i32 addr:$src),
- (MOVUPSrm addr:$src)>;
- def : Pat<(alignedloadv2i64 addr:$src),
- (MOVAPSrm addr:$src)>;
- def : Pat<(loadv2i64 addr:$src),
- (MOVUPSrm addr:$src)>;
-
- def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v2i64 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v4i32 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v8i16 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v16i8 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
-}
-
-// Use vmovaps/vmovups for AVX integer load/store.
-let Predicates = [HasAVX] in {
- // 128-bit load/store
- def : Pat<(alignedloadv4i32 addr:$src),
- (VMOVAPSrm addr:$src)>;
- def : Pat<(loadv4i32 addr:$src),
- (VMOVUPSrm addr:$src)>;
- def : Pat<(alignedloadv2i64 addr:$src),
- (VMOVAPSrm addr:$src)>;
- def : Pat<(loadv2i64 addr:$src),
- (VMOVUPSrm addr:$src)>;
-
- def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
- (VMOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
- (VMOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
- (VMOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
- (VMOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v2i64 VR128:$src), addr:$dst),
- (VMOVUPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v4i32 VR128:$src), addr:$dst),
- (VMOVUPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v8i16 VR128:$src), addr:$dst),
- (VMOVUPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v16i8 VR128:$src), addr:$dst),
- (VMOVUPSmr addr:$dst, VR128:$src)>;
-
- // 256-bit load/store
- def : Pat<(alignedloadv4i64 addr:$src),
- (VMOVAPSYrm addr:$src)>;
- def : Pat<(loadv4i64 addr:$src),
- (VMOVUPSYrm addr:$src)>;
- def : Pat<(alignedloadv8i32 addr:$src),
- (VMOVAPSYrm addr:$src)>;
- def : Pat<(loadv8i32 addr:$src),
- (VMOVUPSYrm addr:$src)>;
- def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst),
- (VMOVAPSYmr addr:$dst, VR256:$src)>;
- def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst),
- (VMOVAPSYmr addr:$dst, VR256:$src)>;
- def : Pat<(store (v4i64 VR256:$src), addr:$dst),
- (VMOVUPSYmr addr:$dst, VR256:$src)>;
- def : Pat<(store (v8i32 VR256:$src), addr:$dst),
- (VMOVUPSYmr addr:$dst, VR256:$src)>;
-}
-
//===----------------------------------------------------------------------===//
// SSE4.1 - Packed Move with Sign/Zero Extend
//===----------------------------------------------------------------------===//
@@ -3979,36 +5032,71 @@ defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>;
defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>;
defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>;
-// Common patterns involving scalar load.
-def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
- (PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>;
-def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
- (PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>;
+let Predicates = [HasSSE41] in {
+ // Common patterns involving scalar load.
+ def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
+ (PMOVSXBWrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
+ (PMOVSXBWrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
+ (PMOVSXWDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
+ (PMOVSXWDrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
+ (PMOVSXDQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
+ (PMOVSXDQrm addr:$src)>;
-def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
- (PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>;
-def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
- (PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>;
+ def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
+ (PMOVZXBWrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
+ (PMOVZXBWrm addr:$src)>;
-def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
- (PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>;
-def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
- (PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>;
+ def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
+ (PMOVZXWDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
+ (PMOVZXWDrm addr:$src)>;
-def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
- (PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>;
-def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
- (PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>;
+ def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
+ (PMOVZXDQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
+ (PMOVZXDQrm addr:$src)>;
+}
+
+let Predicates = [HasAVX] in {
+ // Common patterns involving scalar load.
+ def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
+ (VPMOVSXBWrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
+ (VPMOVSXBWrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
+ (VPMOVSXWDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
+ (VPMOVSXWDrm addr:$src)>;
-def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
- (PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>;
-def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
- (PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>;
+ def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
+ (VPMOVSXDQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
+ (VPMOVSXDQrm addr:$src)>;
-def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
- (PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>;
-def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
- (PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>;
+ def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
+ (VPMOVZXBWrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
+ (VPMOVZXBWrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
+ (VPMOVZXWDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
+ (VPMOVZXWDrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
+ (VPMOVZXDQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
+ (VPMOVZXDQrm addr:$src)>;
+}
multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
@@ -4039,17 +5127,31 @@ defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>;
defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>;
defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>;
-// Common patterns involving scalar load
-def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
- (PMOVSXBDrm addr:$src)>, Requires<[HasSSE41]>;
-def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
- (PMOVSXWQrm addr:$src)>, Requires<[HasSSE41]>;
+let Predicates = [HasSSE41] in {
+ // Common patterns involving scalar load
+ def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
+ (PMOVSXBDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
+ (PMOVSXWQrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
+ (PMOVZXBDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
+ (PMOVZXWQrm addr:$src)>;
+}
-def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
- (PMOVZXBDrm addr:$src)>, Requires<[HasSSE41]>;
-def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
- (PMOVZXWQrm addr:$src)>, Requires<[HasSSE41]>;
+let Predicates = [HasAVX] in {
+ // Common patterns involving scalar load
+ def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
+ (VPMOVSXBDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
+ (VPMOVSXWQrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
+ (VPMOVZXBDrm addr:$src)>;
+ def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
+ (VPMOVZXWQrm addr:$src)>;
+}
multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -4073,16 +5175,31 @@ defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>,
defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
-// Common patterns involving scalar load
-def : Pat<(int_x86_sse41_pmovsxbq
- (bitconvert (v4i32 (X86vzmovl
- (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
- (PMOVSXBQrm addr:$src)>, Requires<[HasSSE41]>;
+let Predicates = [HasSSE41] in {
+ // Common patterns involving scalar load
+ def : Pat<(int_x86_sse41_pmovsxbq
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVSXBQrm addr:$src)>;
+
+ def : Pat<(int_x86_sse41_pmovzxbq
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVZXBQrm addr:$src)>;
+}
+
+let Predicates = [HasAVX] in {
+ // Common patterns involving scalar load
+ def : Pat<(int_x86_sse41_pmovsxbq
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (VPMOVSXBQrm addr:$src)>;
-def : Pat<(int_x86_sse41_pmovzxbq
- (bitconvert (v4i32 (X86vzmovl
- (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
- (PMOVZXBQrm addr:$src)>, Requires<[HasSSE41]>;
+ def : Pat<(int_x86_sse41_pmovzxbq
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (VPMOVZXBQrm addr:$src)>;
+}
//===----------------------------------------------------------------------===//
// SSE4.1 - Extract Instructions
@@ -4208,7 +5325,12 @@ def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
imm:$src2))),
addr:$dst),
(EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
- Requires<[HasSSE41]>;
+ Requires<[HasSSE41]>;
+def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
+ imm:$src2))),
+ addr:$dst),
+ (VEXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
+ Requires<[HasAVX]>;
//===----------------------------------------------------------------------===//
// SSE4.1 - Insert Instructions
@@ -4297,7 +5419,7 @@ let Constraints = "$src1 = $dst" in
// in the target vector.
multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ (ins VR128:$src1, VR128:$src2, u32u8imm:$src3),
!if(Is2Addr,
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
@@ -4306,7 +5428,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
(X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>,
OpSize;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f32mem:$src2, i32i8imm:$src3),
+ (ins VR128:$src1, f32mem:$src2, u32u8imm:$src3),
!if(Is2Addr,
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
@@ -4348,7 +5470,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
// Vector intrinsic operation, mem
def PSm : Ii8<opcps, MRMSrcMem,
- (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2),
+ (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
@@ -4366,7 +5488,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
// Vector intrinsic operation, mem
def PDm : SS4AIi8<opcpd, MRMSrcMem,
- (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2),
+ (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
@@ -4501,14 +5623,14 @@ let Predicates = [HasAVX] in {
int_x86_avx_round_pd_256>, VEX;
defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
int_x86_sse41_round_ss,
- int_x86_sse41_round_sd, 0>, VEX_4V;
+ int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG;
// Instructions for the assembler
defm VROUND : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR128, f128mem, "vround">,
VEX;
defm VROUNDY : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR256, f256mem, "vround">,
VEX;
- defm VROUND : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V;
+ defm VROUND : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V, VEX_LIG;
}
defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
@@ -4578,26 +5700,34 @@ defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>;
// SSE4.1 - Misc Instructions
//===----------------------------------------------------------------------===//
-def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
- "popcnt{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, (ctpop GR16:$src))]>, OpSize, XS;
-def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
- "popcnt{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, (ctpop (loadi16 addr:$src)))]>, OpSize, XS;
-
-def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "popcnt{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (ctpop GR32:$src))]>, XS;
-def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "popcnt{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (ctpop (loadi32 addr:$src)))]>, XS;
-
-def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "popcnt{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (ctpop GR64:$src))]>, XS;
-def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "popcnt{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (ctpop (loadi64 addr:$src)))]>, XS;
+let Defs = [EFLAGS], Predicates = [HasPOPCNT] in {
+ def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "popcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)]>,
+ OpSize, XS;
+ def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "popcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (ctpop (loadi16 addr:$src))),
+ (implicit EFLAGS)]>, OpSize, XS;
+
+ def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "popcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)]>,
+ XS;
+ def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "popcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (ctpop (loadi32 addr:$src))),
+ (implicit EFLAGS)]>, XS;
+
+ def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "popcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)]>,
+ XS;
+ def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "popcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (ctpop (loadi64 addr:$src))),
+ (implicit EFLAGS)]>, XS;
+}
@@ -4666,6 +5796,11 @@ let Predicates = [HasAVX] in {
0>, VEX_4V;
defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq,
0>, VEX_4V;
+
+ def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
+ (VPCMPEQQrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
+ (VPCMPEQQrm VR128:$src1, addr:$src2)>;
}
let Constraints = "$src1 = $dst" in {
@@ -4720,7 +5855,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
X86MemOperand x86memop, bit Is2Addr = 1> {
let isCommutable = 1 in
def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, i32i8imm:$src3),
+ (ins RC:$src1, RC:$src2, u32u8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
@@ -4729,7 +5864,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
[(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
OpSize;
def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2, i32i8imm:$src3),
+ (ins RC:$src1, x86memop:$src2, u32u8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
@@ -4815,6 +5950,36 @@ defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,
defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
memopv32i8, int_x86_avx_blendv_ps_256>;
+let Predicates = [HasAVX] in {
+ def : Pat<(v16i8 (vselect (v16i8 VR128:$mask), (v16i8 VR128:$src1),
+ (v16i8 VR128:$src2))),
+ (VPBLENDVBrr VR128:$src2, VR128:$src1, VR128:$mask)>;
+ def : Pat<(v4i32 (vselect (v4i32 VR128:$mask), (v4i32 VR128:$src1),
+ (v4i32 VR128:$src2))),
+ (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>;
+ def : Pat<(v4f32 (vselect (v4i32 VR128:$mask), (v4f32 VR128:$src1),
+ (v4f32 VR128:$src2))),
+ (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>;
+ def : Pat<(v2i64 (vselect (v2i64 VR128:$mask), (v2i64 VR128:$src1),
+ (v2i64 VR128:$src2))),
+ (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>;
+ def : Pat<(v2f64 (vselect (v2i64 VR128:$mask), (v2f64 VR128:$src1),
+ (v2f64 VR128:$src2))),
+ (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>;
+ def : Pat<(v8i32 (vselect (v8i32 VR256:$mask), (v8i32 VR256:$src1),
+ (v8i32 VR256:$src2))),
+ (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
+ def : Pat<(v8f32 (vselect (v8i32 VR256:$mask), (v8f32 VR256:$src1),
+ (v8f32 VR256:$src2))),
+ (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
+ def : Pat<(v4i64 (vselect (v4i64 VR256:$mask), (v4i64 VR256:$src1),
+ (v4i64 VR256:$src2))),
+ (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
+ def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1),
+ (v4f64 VR256:$src2))),
+ (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
+}
+
/// SS41I_ternary_int - SSE 4.1 ternary operator
let Uses = [XMM0], Constraints = "$src1 = $dst" in {
multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
@@ -4835,12 +6000,27 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
}
}
-defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
-defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
-defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
-
-def : Pat<(X86pblendv VR128:$src1, VR128:$src2, XMM0),
- (PBLENDVBrr0 VR128:$src1, VR128:$src2)>;
+defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
+defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
+defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
+
+let Predicates = [HasSSE41] in {
+ def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1),
+ (v16i8 VR128:$src2))),
+ (PBLENDVBrr0 VR128:$src2, VR128:$src1)>;
+ def : Pat<(v4i32 (vselect (v4i32 XMM0), (v4i32 VR128:$src1),
+ (v4i32 VR128:$src2))),
+ (BLENDVPSrr0 VR128:$src2, VR128:$src1)>;
+ def : Pat<(v4f32 (vselect (v4i32 XMM0), (v4f32 VR128:$src1),
+ (v4f32 VR128:$src2))),
+ (BLENDVPSrr0 VR128:$src2, VR128:$src1)>;
+ def : Pat<(v2i64 (vselect (v2i64 XMM0), (v2i64 VR128:$src1),
+ (v2i64 VR128:$src2))),
+ (BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
+ def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1),
+ (v2f64 VR128:$src2))),
+ (BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
+}
let Predicates = [HasAVX] in
def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
@@ -4876,9 +6056,16 @@ multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX] in {
defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq,
0>, VEX_4V;
+
+ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
+ (VPCMPGTQrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
+ (VPCMPGTQrm VR128:$src1, addr:$src2)>;
+}
+
let Constraints = "$src1 = $dst" in
defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>;
@@ -5158,22 +6345,43 @@ let Constraints = "$src1 = $dst" in {
int_x86_aesni_aesdeclast>;
}
-def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)),
- (AESENCrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))),
- (AESENCrm VR128:$src1, addr:$src2)>;
-def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)),
- (AESENCLASTrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))),
- (AESENCLASTrm VR128:$src1, addr:$src2)>;
-def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)),
- (AESDECrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))),
- (AESDECrm VR128:$src1, addr:$src2)>;
-def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)),
- (AESDECLASTrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
- (AESDECLASTrm VR128:$src1, addr:$src2)>;
+let Predicates = [HasAES] in {
+ def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)),
+ (AESENCrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))),
+ (AESENCrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)),
+ (AESENCLASTrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))),
+ (AESENCLASTrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)),
+ (AESDECrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))),
+ (AESDECrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)),
+ (AESDECLASTrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
+ (AESDECLASTrm VR128:$src1, addr:$src2)>;
+}
+
+let Predicates = [HasAVX, HasAES], AddedComplexity = 20 in {
+ def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)),
+ (VAESENCrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))),
+ (VAESENCrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)),
+ (VAESENCLASTrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))),
+ (VAESENCLASTrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)),
+ (VAESDECrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))),
+ (VAESDECrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)),
+ (VAESDECLASTrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
+ (VAESDECLASTrm VR128:$src1, addr:$src2)>;
+}
// Perform the AES InvMixColumn Transformation
let Predicates = [HasAVX, HasAES] in {
@@ -5288,8 +6496,10 @@ defm : pclmul_alias<"lqlq", 0x00>;
// AVX Instructions
//===----------------------------------------------------------------------===//
-
-// Load from memory and broadcast to all elements of the destination operand
+//===----------------------------------------------------------------------===//
+// VBROADCAST - Load from memory and broadcast to all elements of the
+// destination operand
+//
class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
X86MemOperand x86memop, Intrinsic Int> :
AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
@@ -5305,7 +6515,26 @@ def VBROADCASTSD : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
int_x86_avx_vbroadcastf128_pd_256>;
-// Insert packed floating-point values
+def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
+ (VBROADCASTF128 addr:$src)>;
+
+def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
+ (VBROADCASTSSY addr:$src)>;
+def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
+ (VBROADCASTSD addr:$src)>;
+def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
+ (VBROADCASTSSY addr:$src)>;
+def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
+ (VBROADCASTSD addr:$src)>;
+
+def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
+ (VBROADCASTSS addr:$src)>;
+def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
+ (VBROADCASTSS addr:$src)>;
+
+//===----------------------------------------------------------------------===//
+// VINSERTF128 - Insert packed floating-point values
+//
def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR128:$src2, i8imm:$src3),
"vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
@@ -5315,7 +6544,41 @@ def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
"vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[]>, VEX_4V;
-// Extract packed floating-point values
+def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3),
+ (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3),
+ (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3),
+ (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+
+def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+
+//===----------------------------------------------------------------------===//
+// VEXTRACTF128 - Extract packed floating-point values
+//
def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst),
(ins VR256:$src1, i8imm:$src2),
"vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -5325,7 +6588,41 @@ def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
"vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, VEX;
-// Conditional SIMD Packed Loads and Stores
+def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2),
+ (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
+ (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2),
+ (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v4f32 (VEXTRACTF128rr
+ (v8f32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v2f64 (VEXTRACTF128rr
+ (v4f64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v4i32 (VEXTRACTF128rr
+ (v8i32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v2i64 (VEXTRACTF128rr
+ (v4i64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v8i16 (VEXTRACTF128rr
+ (v16i16 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v16i8 (VEXTRACTF128rr
+ (v32i8 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+
+//===----------------------------------------------------------------------===//
+// VMASKMOV - Conditional SIMD Packed Loads and Stores
+//
multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
Intrinsic IntLd, Intrinsic IntLd256,
Intrinsic IntSt, Intrinsic IntSt256,
@@ -5363,7 +6660,9 @@ defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
int_x86_avx_maskstore_pd_256,
memopv2f64, memopv4f64>;
-// Permute Floating-Point Values
+//===----------------------------------------------------------------------===//
+// VPERMIL - Permute Single and Double Floating-Point Values
+//
multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
RegisterClass RC, X86MemOperand x86memop_f,
X86MemOperand x86memop_i, PatFrag f_frag, PatFrag i_frag,
@@ -5404,6 +6703,18 @@ defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
int_x86_avx_vpermilvar_pd_256,
int_x86_avx_vpermil_pd_256>;
+def : Pat<(v8f32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))),
+ (VPERMILPSYri VR256:$src1, imm:$imm)>;
+def : Pat<(v4f64 (X86VPermilpdy VR256:$src1, (i8 imm:$imm))),
+ (VPERMILPDYri VR256:$src1, imm:$imm)>;
+def : Pat<(v8i32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))),
+ (VPERMILPSYri VR256:$src1, imm:$imm)>;
+def : Pat<(v4i64 (X86VPermilpdy VR256:$src1, (i8 imm:$imm))),
+ (VPERMILPDYri VR256:$src1, imm:$imm)>;
+
+//===----------------------------------------------------------------------===//
+// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
+//
def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, i8imm:$src3),
"vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
@@ -5413,65 +6724,6 @@ def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
"vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[]>, VEX_4V;
-// Zero All YMM registers
-def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
- [(int_x86_avx_vzeroall)]>, VEX, VEX_L, Requires<[HasAVX]>;
-
-// Zero Upper bits of YMM registers
-def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
- [(int_x86_avx_vzeroupper)]>, VEX, Requires<[HasAVX]>;
-
-def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3),
- (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
-def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3),
- (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
-def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3),
- (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
-
-def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
- (i32 imm)),
- (VINSERTF128rr VR256:$src1, VR128:$src2,
- (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
- (i32 imm)),
- (VINSERTF128rr VR256:$src1, VR128:$src2,
- (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
- (i32 imm)),
- (VINSERTF128rr VR256:$src1, VR128:$src2,
- (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
- (i32 imm)),
- (VINSERTF128rr VR256:$src1, VR128:$src2,
- (INSERT_get_vinsertf128_imm VR256:$ins))>;
-
-def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2),
- (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
-def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
- (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
-def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2),
- (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
-
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v4f32 (VEXTRACTF128rr
- (v8f32 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v2f64 (VEXTRACTF128rr
- (v4f64 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v4i32 (VEXTRACTF128rr
- (v8i32 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v2i64 (VEXTRACTF128rr
- (v4i64 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-
-def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
- (VBROADCASTF128 addr:$src)>;
-
def : Pat<(int_x86_avx_vperm2f128_ps_256 VR256:$src1, VR256:$src2, imm:$src3),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
def : Pat<(int_x86_avx_vperm2f128_pd_256 VR256:$src1, VR256:$src2, imm:$src3),
@@ -5489,377 +6741,59 @@ def : Pat<(int_x86_avx_vperm2f128_si_256
VR256:$src1, (memopv8i32 addr:$src2), imm:$src3),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+def : Pat<(v8f32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v8i32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v4i64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v4f64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v32i8 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+
//===----------------------------------------------------------------------===//
-// SSE Shuffle pattern fragments
-//===----------------------------------------------------------------------===//
+// VZERO - Zero YMM registers
+//
+let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
+ YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in {
+ // Zero All YMM registers
+ def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
+ [(int_x86_avx_vzeroall)]>, TB, VEX, VEX_L, Requires<[HasAVX]>;
-// This is part of a "work in progress" refactoring. The idea is that all
-// vector shuffles are going to be translated into target specific nodes and
-// directly matched by the patterns below (which can be changed along the way)
-// The AVX version of some but not all of them are described here, and more
-// should come in a near future.
-
-// Shuffle with PSHUFD instruction folding loads. The first two patterns match
-// SSE2 loads, which are always promoted to v2i64. The last one should match
-// the SSE1 case, where the only legal load is v4f32, but there is no PSHUFD
-// in SSE2, how does it ever worked? Anyway, the pattern will remain here until
-// we investigate further.
-def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
- (i8 imm:$imm))),
- (VPSHUFDmi addr:$src1, imm:$imm)>, Requires<[HasAVX]>;
-def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
- (i8 imm:$imm))),
- (PSHUFDmi addr:$src1, imm:$imm)>;
-def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)),
- (i8 imm:$imm))),
- (PSHUFDmi addr:$src1, imm:$imm)>; // FIXME: has this ever worked?
-
-// Shuffle with PSHUFD instruction.
-def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
- (VPSHUFDri VR128:$src1, imm:$imm)>, Requires<[HasAVX]>;
-def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
- (PSHUFDri VR128:$src1, imm:$imm)>;
-
-def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
- (VPSHUFDri VR128:$src1, imm:$imm)>, Requires<[HasAVX]>;
-def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
- (PSHUFDri VR128:$src1, imm:$imm)>;
-
-// Shuffle with SHUFPD instruction.
-def : Pat<(v2f64 (X86Shufps VR128:$src1,
- (memopv2f64 addr:$src2), (i8 imm:$imm))),
- (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
-def : Pat<(v2f64 (X86Shufps VR128:$src1,
- (memopv2f64 addr:$src2), (i8 imm:$imm))),
- (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
-
-def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
-def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
-
-def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
-def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
-
-// Shuffle with SHUFPS instruction.
-def : Pat<(v4f32 (X86Shufps VR128:$src1,
- (memopv4f32 addr:$src2), (i8 imm:$imm))),
- (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
-def : Pat<(v4f32 (X86Shufps VR128:$src1,
- (memopv4f32 addr:$src2), (i8 imm:$imm))),
- (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
-
-def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
-def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
-
-def : Pat<(v4i32 (X86Shufps VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
- (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
-def : Pat<(v4i32 (X86Shufps VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
- (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
-
-def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
-def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
-
-// Shuffle with MOVHLPS instruction
-def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)),
- (MOVHLPSrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)),
- (MOVHLPSrr VR128:$src1, VR128:$src2)>;
-
-// Shuffle with MOVDDUP instruction
-def : Pat<(X86Movddup (memopv2f64 addr:$src)),
- (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(X86Movddup (memopv2f64 addr:$src)),
- (MOVDDUPrm addr:$src)>;
-
-def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
- (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
- (MOVDDUPrm addr:$src)>;
-
-def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
- (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
- (MOVDDUPrm addr:$src)>;
-
-def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
- (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
- (MOVDDUPrm addr:$src)>;
-
-def : Pat<(X86Movddup (bc_v2f64
- (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
- (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(X86Movddup (bc_v2f64
- (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
- (MOVDDUPrm addr:$src)>;
-
-
-// Shuffle with UNPCKLPS
-def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
- (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
-def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))),
- (VUNPCKLPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
-def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
- (UNPCKLPSrm VR128:$src1, addr:$src2)>;
-
-def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
- (VUNPCKLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
-def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
- (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
-def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
- (UNPCKLPSrr VR128:$src1, VR128:$src2)>;
-
-// Shuffle with UNPCKHPS
-def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
- (VUNPCKHPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
-def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
- (UNPCKHPSrm VR128:$src1, addr:$src2)>;
-
-def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
- (VUNPCKHPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
-def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
- (UNPCKHPSrr VR128:$src1, VR128:$src2)>;
-
-// Shuffle with UNPCKLPD
-def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
- (VUNPCKLPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
-def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))),
- (VUNPCKLPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
-def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
- (UNPCKLPDrm VR128:$src1, addr:$src2)>;
-
-def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
- (VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
-def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
- (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
-def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
- (UNPCKLPDrr VR128:$src1, VR128:$src2)>;
-
-// Shuffle with UNPCKHPD
-def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
- (VUNPCKHPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
-def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
- (UNPCKHPDrm VR128:$src1, addr:$src2)>;
-
-def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
- (VUNPCKHPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
-def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
- (UNPCKHPDrr VR128:$src1, VR128:$src2)>;
-
-// Shuffle with PUNPCKLBW
-def : Pat<(v16i8 (X86Punpcklbw VR128:$src1,
- (bc_v16i8 (memopv2i64 addr:$src2)))),
- (PUNPCKLBWrm VR128:$src1, addr:$src2)>;
-def : Pat<(v16i8 (X86Punpcklbw VR128:$src1, VR128:$src2)),
- (PUNPCKLBWrr VR128:$src1, VR128:$src2)>;
-
-// Shuffle with PUNPCKLWD
-def : Pat<(v8i16 (X86Punpcklwd VR128:$src1,
- (bc_v8i16 (memopv2i64 addr:$src2)))),
- (PUNPCKLWDrm VR128:$src1, addr:$src2)>;
-def : Pat<(v8i16 (X86Punpcklwd VR128:$src1, VR128:$src2)),
- (PUNPCKLWDrr VR128:$src1, VR128:$src2)>;
-
-// Shuffle with PUNPCKLDQ
-def : Pat<(v4i32 (X86Punpckldq VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2)))),
- (PUNPCKLDQrm VR128:$src1, addr:$src2)>;
-def : Pat<(v4i32 (X86Punpckldq VR128:$src1, VR128:$src2)),
- (PUNPCKLDQrr VR128:$src1, VR128:$src2)>;
-
-// Shuffle with PUNPCKLQDQ
-def : Pat<(v2i64 (X86Punpcklqdq VR128:$src1, (memopv2i64 addr:$src2))),
- (PUNPCKLQDQrm VR128:$src1, addr:$src2)>;
-def : Pat<(v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)),
- (PUNPCKLQDQrr VR128:$src1, VR128:$src2)>;
-
-// Shuffle with PUNPCKHBW
-def : Pat<(v16i8 (X86Punpckhbw VR128:$src1,
- (bc_v16i8 (memopv2i64 addr:$src2)))),
- (PUNPCKHBWrm VR128:$src1, addr:$src2)>;
-def : Pat<(v16i8 (X86Punpckhbw VR128:$src1, VR128:$src2)),
- (PUNPCKHBWrr VR128:$src1, VR128:$src2)>;
-
-// Shuffle with PUNPCKHWD
-def : Pat<(v8i16 (X86Punpckhwd VR128:$src1,
- (bc_v8i16 (memopv2i64 addr:$src2)))),
- (PUNPCKHWDrm VR128:$src1, addr:$src2)>;
-def : Pat<(v8i16 (X86Punpckhwd VR128:$src1, VR128:$src2)),
- (PUNPCKHWDrr VR128:$src1, VR128:$src2)>;
-
-// Shuffle with PUNPCKHDQ
-def : Pat<(v4i32 (X86Punpckhdq VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2)))),
- (PUNPCKHDQrm VR128:$src1, addr:$src2)>;
-def : Pat<(v4i32 (X86Punpckhdq VR128:$src1, VR128:$src2)),
- (PUNPCKHDQrr VR128:$src1, VR128:$src2)>;
-
-// Shuffle with PUNPCKHQDQ
-def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, (memopv2i64 addr:$src2))),
- (PUNPCKHQDQrm VR128:$src1, addr:$src2)>;
-def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)),
- (PUNPCKHQDQrr VR128:$src1, VR128:$src2)>;
-
-// Shuffle with MOVLHPS
-def : Pat<(X86Movlhps VR128:$src1,
- (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
- (MOVHPSrm VR128:$src1, addr:$src2)>;
-def : Pat<(X86Movlhps VR128:$src1,
- (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
- (MOVHPSrm VR128:$src1, addr:$src2)>;
-def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)),
- (MOVLHPSrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
- (MOVLHPSrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
- (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
-
-// FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the problem
-// is during lowering, where it's not possible to recognize the load fold cause
-// it has two uses through a bitcast. One use disappears at isel time and the
-// fold opportunity reappears.
-def : Pat<(v2f64 (X86Movddup VR128:$src)),
- (UNPCKLPDrr VR128:$src, VR128:$src)>;
-
-// Shuffle with MOVLHPD
-def : Pat<(v2f64 (X86Movlhpd VR128:$src1,
- (scalar_to_vector (loadf64 addr:$src2)))),
- (MOVHPDrm VR128:$src1, addr:$src2)>;
-
-// FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
-// is during lowering, where it's not possible to recognize the load fold cause
-// it has two uses through a bitcast. One use disappears at isel time and the
-// fold opportunity reappears.
-def : Pat<(v2f64 (X86Unpcklpd VR128:$src1,
- (scalar_to_vector (loadf64 addr:$src2)))),
- (MOVHPDrm VR128:$src1, addr:$src2)>;
-
-// Shuffle with MOVSS
-def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))),
- (MOVSSrr VR128:$src1, FR32:$src2)>;
-def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
- (MOVSSrr (v4i32 VR128:$src1),
- (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
-def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
- (MOVSSrr (v4f32 VR128:$src1),
- (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
-// FIXME: Instead of a X86Movss there should be a X86Movlps here, the problem
-// is during lowering, where it's not possible to recognize the load fold cause
-// it has two uses through a bitcast. One use disappears at isel time and the
-// fold opportunity reappears.
-def : Pat<(X86Movss VR128:$src1,
- (bc_v4i32 (v2i64 (load addr:$src2)))),
- (MOVLPSrm VR128:$src1, addr:$src2)>;
-
-// Shuffle with MOVSD
-def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
- (MOVSDrr VR128:$src1, FR64:$src2)>;
-def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr (v2i64 VR128:$src1),
- (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
-def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr (v2f64 VR128:$src1),
- (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
-def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>;
-def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>;
-
-// Shuffle with MOVSHDUP
-def : Pat<(v4i32 (X86Movshdup VR128:$src)),
- (MOVSHDUPrr VR128:$src)>;
-def : Pat<(X86Movshdup (bc_v4i32 (memopv2i64 addr:$src))),
- (MOVSHDUPrm addr:$src)>;
-
-def : Pat<(v4f32 (X86Movshdup VR128:$src)),
- (MOVSHDUPrr VR128:$src)>;
-def : Pat<(X86Movshdup (memopv4f32 addr:$src)),
- (MOVSHDUPrm addr:$src)>;
-
-// Shuffle with MOVSLDUP
-def : Pat<(v4i32 (X86Movsldup VR128:$src)),
- (MOVSLDUPrr VR128:$src)>;
-def : Pat<(X86Movsldup (bc_v4i32 (memopv2i64 addr:$src))),
- (MOVSLDUPrm addr:$src)>;
-
-def : Pat<(v4f32 (X86Movsldup VR128:$src)),
- (MOVSLDUPrr VR128:$src)>;
-def : Pat<(X86Movsldup (memopv4f32 addr:$src)),
- (MOVSLDUPrm addr:$src)>;
-
-// Shuffle with PSHUFHW
-def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))),
- (PSHUFHWri VR128:$src, imm:$imm)>;
-def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))),
- (PSHUFHWmi addr:$src, imm:$imm)>;
-
-// Shuffle with PSHUFLW
-def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))),
- (PSHUFLWri VR128:$src, imm:$imm)>;
-def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))),
- (PSHUFLWmi addr:$src, imm:$imm)>;
-
-// Shuffle with PALIGN
-def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+ // Zero Upper bits of YMM registers
+ def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
+ [(int_x86_avx_vzeroupper)]>, TB, VEX, Requires<[HasAVX]>;
+}
-// Shuffle with MOVLPS
-def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
- (MOVLPSrm VR128:$src1, addr:$src2)>;
-def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
- (MOVLPSrm VR128:$src1, addr:$src2)>;
-def : Pat<(X86Movlps VR128:$src1,
- (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
- (MOVLPSrm VR128:$src1, addr:$src2)>;
-// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
-// is during lowering, where it's not possible to recognize the load fold cause
-// it has two uses through a bitcast. One use disappears at isel time and the
-// fold opportunity reappears.
-def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>;
-
-def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>;
-
-// Shuffle with MOVLPD
-def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
- (MOVLPDrm VR128:$src1, addr:$src2)>;
-def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
- (MOVLPDrm VR128:$src1, addr:$src2)>;
-def : Pat<(v2f64 (X86Movlpd VR128:$src1,
- (scalar_to_vector (loadf64 addr:$src2)))),
- (MOVLPDrm VR128:$src1, addr:$src2)>;
-
-// Extra patterns to match stores with MOVHPS/PD and MOVLPS/PD
-def : Pat<(store (f64 (vector_extract
- (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))),addr:$dst),
- (MOVHPSmr addr:$dst, VR128:$src)>;
-def : Pat<(store (f64 (vector_extract
- (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst),
- (MOVHPDmr addr:$dst, VR128:$src)>;
-
-def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),addr:$src1),
- (MOVLPSmr addr:$src1, VR128:$src2)>;
-def : Pat<(store (v4i32 (X86Movlps
- (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1),
- (MOVLPSmr addr:$src1, VR128:$src2)>;
-
-def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1),
- (MOVLPDmr addr:$src1, VR128:$src2)>;
-def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1),
- (MOVLPDmr addr:$src1, VR128:$src2)>;
+//===----------------------------------------------------------------------===//
+// Half precision conversion instructions
+//
+let Predicates = [HasAVX, HasF16C] in {
+ def VCVTPH2PSrm : I<0x13, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
+ def VCVTPH2PSrr : I<0x13, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
+ def VCVTPH2PSYrm : I<0x13, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
+ "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
+ def VCVTPH2PSYrr : I<0x13, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
+ def VCVTPS2PHmr : Ii8<0x1D, MRMDestMem, (outs f64mem:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ TA, OpSize, VEX;
+ def VCVTPS2PHrr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ TA, OpSize, VEX;
+ def VCVTPS2PHYmr : Ii8<0x1D, MRMDestMem, (outs f128mem:$dst),
+ (ins VR256:$src1, i32i8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ TA, OpSize, VEX;
+ def VCVTPS2PHYrr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
+ (ins VR256:$src1, i32i8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ TA, OpSize, VEX;
+}
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index 31de878..05a5b36 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -67,43 +67,43 @@ def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", []>,
//
let Defs = [AL], Uses = [DX] in
def IN8rr : I<0xEC, RawFrm, (outs), (ins),
- "in{b}\t{%dx, %al|%AL, %DX}", []>;
+ "in{b}\t{%dx, %al|AL, DX}", []>;
let Defs = [AX], Uses = [DX] in
def IN16rr : I<0xED, RawFrm, (outs), (ins),
- "in{w}\t{%dx, %ax|%AX, %DX}", []>, OpSize;
+ "in{w}\t{%dx, %ax|AX, DX}", []>, OpSize;
let Defs = [EAX], Uses = [DX] in
def IN32rr : I<0xED, RawFrm, (outs), (ins),
- "in{l}\t{%dx, %eax|%EAX, %DX}", []>;
+ "in{l}\t{%dx, %eax|EAX, DX}", []>;
let Defs = [AL] in
def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins i8imm:$port),
- "in{b}\t{$port, %al|%AL, $port}", []>;
+ "in{b}\t{$port, %al|AL, $port}", []>;
let Defs = [AX] in
def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port),
- "in{w}\t{$port, %ax|%AX, $port}", []>, OpSize;
+ "in{w}\t{$port, %ax|AX, $port}", []>, OpSize;
let Defs = [EAX] in
def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port),
- "in{l}\t{$port, %eax|%EAX, $port}", []>;
+ "in{l}\t{$port, %eax|EAX, $port}", []>;
let Uses = [DX, AL] in
def OUT8rr : I<0xEE, RawFrm, (outs), (ins),
- "out{b}\t{%al, %dx|%DX, %AL}", []>;
+ "out{b}\t{%al, %dx|DX, AL}", []>;
let Uses = [DX, AX] in
def OUT16rr : I<0xEF, RawFrm, (outs), (ins),
- "out{w}\t{%ax, %dx|%DX, %AX}", []>, OpSize;
+ "out{w}\t{%ax, %dx|DX, AX}", []>, OpSize;
let Uses = [DX, EAX] in
def OUT32rr : I<0xEF, RawFrm, (outs), (ins),
- "out{l}\t{%eax, %dx|%DX, %EAX}", []>;
+ "out{l}\t{%eax, %dx|DX, EAX}", []>;
let Uses = [AL] in
def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins i8imm:$port),
- "out{b}\t{%al, $port|$port, %AL}", []>;
+ "out{b}\t{%al, $port|$port, AL}", []>;
let Uses = [AX] in
def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
- "out{w}\t{%ax, $port|$port, %AX}", []>, OpSize;
+ "out{w}\t{%ax, $port|$port, AX}", []>, OpSize;
let Uses = [EAX] in
def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
- "out{l}\t{%eax, $port|$port, %EAX}", []>;
+ "out{l}\t{%eax, $port|$port, EAX}", []>;
def IN8 : I<0x6C, RawFrm, (outs), (ins), "ins{b}", []>;
def IN16 : I<0x6D, RawFrm, (outs), (ins), "ins{w}", []>, OpSize;
@@ -229,65 +229,65 @@ def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src),
"ltr{w}\t{$src}", []>, TB;
def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins),
- "push{w}\t%cs", []>, Requires<[In32BitMode]>, OpSize;
+ "push{w}\t{%cs|CS}", []>, Requires<[In32BitMode]>, OpSize;
def PUSHCS32 : I<0x0E, RawFrm, (outs), (ins),
- "push{l}\t%cs", []>, Requires<[In32BitMode]>;
+ "push{l}\t{%cs|CS}", []>, Requires<[In32BitMode]>;
def PUSHSS16 : I<0x16, RawFrm, (outs), (ins),
- "push{w}\t%ss", []>, Requires<[In32BitMode]>, OpSize;
+ "push{w}\t{%ss|SS}", []>, Requires<[In32BitMode]>, OpSize;
def PUSHSS32 : I<0x16, RawFrm, (outs), (ins),
- "push{l}\t%ss", []>, Requires<[In32BitMode]>;
+ "push{l}\t{%ss|SS}", []>, Requires<[In32BitMode]>;
def PUSHDS16 : I<0x1E, RawFrm, (outs), (ins),
- "push{w}\t%ds", []>, Requires<[In32BitMode]>, OpSize;
+ "push{w}\t{%ds|DS}", []>, Requires<[In32BitMode]>, OpSize;
def PUSHDS32 : I<0x1E, RawFrm, (outs), (ins),
- "push{l}\t%ds", []>, Requires<[In32BitMode]>;
+ "push{l}\t{%ds|DS}", []>, Requires<[In32BitMode]>;
def PUSHES16 : I<0x06, RawFrm, (outs), (ins),
- "push{w}\t%es", []>, Requires<[In32BitMode]>, OpSize;
+ "push{w}\t{%es|ES}", []>, Requires<[In32BitMode]>, OpSize;
def PUSHES32 : I<0x06, RawFrm, (outs), (ins),
- "push{l}\t%es", []>, Requires<[In32BitMode]>;
+ "push{l}\t{%es|ES}", []>, Requires<[In32BitMode]>;
def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins),
- "push{w}\t%fs", []>, OpSize, TB;
+ "push{w}\t{%fs|FS}", []>, OpSize, TB;
def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins),
- "push{l}\t%fs", []>, TB, Requires<[In32BitMode]>;
+ "push{l}\t{%fs|FS}", []>, TB, Requires<[In32BitMode]>;
def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins),
- "push{w}\t%gs", []>, OpSize, TB;
+ "push{w}\t{%gs|GS}", []>, OpSize, TB;
def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins),
- "push{l}\t%gs", []>, TB, Requires<[In32BitMode]>;
+ "push{l}\t{%gs|GS}", []>, TB, Requires<[In32BitMode]>;
def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins),
- "push{q}\t%fs", []>, TB;
+ "push{q}\t{%fs|FS}", []>, TB;
def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins),
- "push{q}\t%gs", []>, TB;
+ "push{q}\t{%gs|GS}", []>, TB;
// No "pop cs" instruction.
def POPSS16 : I<0x17, RawFrm, (outs), (ins),
- "pop{w}\t%ss", []>, OpSize, Requires<[In32BitMode]>;
+ "pop{w}\t{%ss|SS}", []>, OpSize, Requires<[In32BitMode]>;
def POPSS32 : I<0x17, RawFrm, (outs), (ins),
- "pop{l}\t%ss", []> , Requires<[In32BitMode]>;
+ "pop{l}\t{%ss|SS}", []> , Requires<[In32BitMode]>;
def POPDS16 : I<0x1F, RawFrm, (outs), (ins),
- "pop{w}\t%ds", []>, OpSize, Requires<[In32BitMode]>;
+ "pop{w}\t{%ds|DS}", []>, OpSize, Requires<[In32BitMode]>;
def POPDS32 : I<0x1F, RawFrm, (outs), (ins),
- "pop{l}\t%ds", []> , Requires<[In32BitMode]>;
+ "pop{l}\t{%ds|DS}", []> , Requires<[In32BitMode]>;
def POPES16 : I<0x07, RawFrm, (outs), (ins),
- "pop{w}\t%es", []>, OpSize, Requires<[In32BitMode]>;
+ "pop{w}\t{%es|ES}", []>, OpSize, Requires<[In32BitMode]>;
def POPES32 : I<0x07, RawFrm, (outs), (ins),
- "pop{l}\t%es", []> , Requires<[In32BitMode]>;
+ "pop{l}\t{%es|ES}", []> , Requires<[In32BitMode]>;
def POPFS16 : I<0xa1, RawFrm, (outs), (ins),
- "pop{w}\t%fs", []>, OpSize, TB;
+ "pop{w}\t{%fs|FS}", []>, OpSize, TB;
def POPFS32 : I<0xa1, RawFrm, (outs), (ins),
- "pop{l}\t%fs", []>, TB , Requires<[In32BitMode]>;
+ "pop{l}\t{%fs|FS}", []>, TB , Requires<[In32BitMode]>;
def POPFS64 : I<0xa1, RawFrm, (outs), (ins),
- "pop{q}\t%fs", []>, TB;
+ "pop{q}\t{%fs|FS}", []>, TB;
def POPGS16 : I<0xa9, RawFrm, (outs), (ins),
- "pop{w}\t%gs", []>, OpSize, TB;
+ "pop{w}\t{%gs|GS}", []>, OpSize, TB;
def POPGS32 : I<0xa9, RawFrm, (outs), (ins),
- "pop{l}\t%gs", []>, TB , Requires<[In32BitMode]>;
+ "pop{l}\t{%gs|GS}", []>, TB , Requires<[In32BitMode]>;
def POPGS64 : I<0xa9, RawFrm, (outs), (ins),
- "pop{q}\t%gs", []>, TB;
+ "pop{q}\t{%gs|GS}", []>, TB;
def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
@@ -400,12 +400,29 @@ def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB;
def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB;
def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB;
+//===----------------------------------------------------------------------===//
+// XSAVE instructions
let Defs = [RDX, RAX], Uses = [RCX] in
def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB;
let Uses = [RDX, RAX, RCX] in
def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", []>, TB;
+let Uses = [RDX, RAX] in {
+ def XSAVE : I<0xAE, MRM4m, (outs opaque512mem:$dst), (ins),
+ "xsave\t$dst", []>, TB;
+ def XSAVE64 : I<0xAE, MRM4m, (outs opaque512mem:$dst), (ins),
+ "xsaveq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
+ def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst),
+ "xrstor\t$dst", []>, TB;
+ def XRSTOR64 : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst),
+ "xrstorq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
+ def XSAVEOPT : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins),
+ "xsaveopt\t$dst", []>, TB;
+ def XSAVEOPT64 : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins),
+ "xsaveoptq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
+}
+
//===----------------------------------------------------------------------===//
// VIA PadLock crypto instructions
let Defs = [RAX, RDI], Uses = [RDX, RDI] in
@@ -427,3 +444,24 @@ let Defs = [RAX, RSI, RDI], Uses = [RAX, RSI, RDI] in {
}
let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in
def MONTMUL : I<0xc0, RawFrm, (outs), (ins), "montmul", []>, A6;
+
+//===----------------------------------------------------------------------===//
+// FS/GS Base Instructions
+let Predicates = [In64BitMode] in {
+ def RDFSBASE : I<0xAE, MRM0r, (outs GR32:$dst), (ins),
+ "rdfsbase{l}\t$dst", []>, TB, XS;
+ def RDFSBASE64 : RI<0xAE, MRM0r, (outs GR64:$dst), (ins),
+ "rdfsbase{q}\t$dst", []>, TB, XS;
+ def RDGSBASE : I<0xAE, MRM1r, (outs GR32:$dst), (ins),
+ "rdgsbase{l}\t$dst", []>, TB, XS;
+ def RDGSBASE64 : RI<0xAE, MRM1r, (outs GR64:$dst), (ins),
+ "rdgsbase{q}\t$dst", []>, TB, XS;
+ def WRFSBASE : I<0xAE, MRM2r, (outs), (ins GR32:$dst),
+ "wrfsbase{l}\t$dst", []>, TB, XS;
+ def WRFSBASE64 : RI<0xAE, MRM2r, (outs), (ins GR64:$dst),
+ "wrfsbase{q}\t$dst", []>, TB, XS;
+ def WRGSBASE : I<0xAE, MRM3r, (outs), (ins GR32:$dst),
+ "wrgsbase{l}\t$dst", []>, TB, XS;
+ def WRGSBASE64 : RI<0xAE, MRM3r, (outs), (ins GR64:$dst),
+ "wrgsbase{q}\t$dst", []>, TB, XS;
+}
diff --git a/lib/Target/X86/X86InstrVMX.td b/lib/Target/X86/X86InstrVMX.td
index daf61e4..09a7a7d0c 100644
--- a/lib/Target/X86/X86InstrVMX.td
+++ b/lib/Target/X86/X86InstrVMX.td
@@ -16,9 +16,15 @@
// VMX instructions
// 66 0F 38 80
-def INVEPT : I<0x80, RawFrm, (outs), (ins), "invept", []>, OpSize, T8;
+def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
+ "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8;
+def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
+ "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8;
// 66 0F 38 81
-def INVVPID : I<0x81, RawFrm, (outs), (ins), "invvpid", []>, OpSize, T8;
+def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
+ "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8;
+def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
+ "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8;
// 0F 01 C1
def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index e385335..50bc14d 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -372,15 +372,10 @@ ReSimplify:
case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
case X86::VFsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
case X86::VFsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
- case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
- case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break;
- case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
- case X86::AVX_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::VXORPSrr); break;
case X86::AVX_SET0PSY: LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break;
- case X86::AVX_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::VXORPDrr); break;
case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break;
- case X86::AVX_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
+ case X86::AVX_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDrr); break;
case X86::MOV16r0:
LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0
@@ -468,6 +463,18 @@ ReSimplify:
case X86::JLE_4: OutMI.setOpcode(X86::JLE_1); break;
case X86::JG_4: OutMI.setOpcode(X86::JG_1); break;
+ // Atomic load and store require a separate pseudo-inst because Acquire
+ // implies mayStore and Release implies mayLoad; fix these to regular MOV
+ // instructions here
+ case X86::ACQUIRE_MOV8rm: OutMI.setOpcode(X86::MOV8rm); goto ReSimplify;
+ case X86::ACQUIRE_MOV16rm: OutMI.setOpcode(X86::MOV16rm); goto ReSimplify;
+ case X86::ACQUIRE_MOV32rm: OutMI.setOpcode(X86::MOV32rm); goto ReSimplify;
+ case X86::ACQUIRE_MOV64rm: OutMI.setOpcode(X86::MOV64rm); goto ReSimplify;
+ case X86::RELEASE_MOV8mr: OutMI.setOpcode(X86::MOV8mr); goto ReSimplify;
+ case X86::RELEASE_MOV16mr: OutMI.setOpcode(X86::MOV16mr); goto ReSimplify;
+ case X86::RELEASE_MOV32mr: OutMI.setOpcode(X86::MOV32mr); goto ReSimplify;
+ case X86::RELEASE_MOV64mr: OutMI.setOpcode(X86::MOV64mr); goto ReSimplify;
+
// We don't currently select the correct instruction form for instructions
// which have a short %eax, etc. form. Handle this by custom lowering, for
// now.
@@ -585,6 +592,8 @@ static void LowerTlsAddr(MCStreamer &OutStreamer,
}
void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ OutStreamer.EmitCodeRegion();
+
X86MCInstLower MCInstLowering(Mang, *MF, *this);
switch (MI->getOpcode()) {
case TargetOpcode::DBG_VALUE:
@@ -601,7 +610,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (OutStreamer.hasRawTextSupport())
OutStreamer.EmitRawText(StringRef("\t#MEMBARRIER"));
return;
-
+
case X86::EH_RETURN:
case X86::EH_RETURN64: {
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index 06043ec..b0bb313 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -53,10 +53,6 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// relocation models.
unsigned GlobalBaseReg;
- /// ReserveFP - whether the function should reserve the frame pointer
- /// when allocating, even if there may not actually be a frame pointer used.
- bool ReserveFP;
-
/// VarArgsFrameIndex - FrameIndex for start of varargs area.
int VarArgsFrameIndex;
/// RegSaveFrameIndex - X86-64 vararg func register save area.
@@ -65,6 +61,9 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
unsigned VarArgsGPOffset;
/// VarArgsFPOffset - X86-64 vararg func fp reg offset.
unsigned VarArgsFPOffset;
+ /// ArgumentStackSize - The number of bytes on stack consumed by the arguments
+ /// being passed on the stack.
+ unsigned ArgumentStackSize;
public:
X86MachineFunctionInfo() : ForceFramePointer(false),
@@ -77,7 +76,8 @@ public:
VarArgsFrameIndex(0),
RegSaveFrameIndex(0),
VarArgsGPOffset(0),
- VarArgsFPOffset(0) {}
+ VarArgsFPOffset(0),
+ ArgumentStackSize(0) {}
explicit X86MachineFunctionInfo(MachineFunction &MF)
: ForceFramePointer(false),
@@ -87,11 +87,11 @@ public:
TailCallReturnAddrDelta(0),
SRetReturnReg(0),
GlobalBaseReg(0),
- ReserveFP(false),
VarArgsFrameIndex(0),
RegSaveFrameIndex(0),
VarArgsGPOffset(0),
- VarArgsFPOffset(0) {}
+ VarArgsFPOffset(0),
+ ArgumentStackSize(0) {}
bool getForceFramePointer() const { return ForceFramePointer;}
void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
@@ -114,9 +114,6 @@ public:
unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
- bool getReserveFP() const { return ReserveFP; }
- void setReserveFP(bool reserveFP) { ReserveFP = reserveFP; }
-
int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
void setVarArgsFrameIndex(int Idx) { VarArgsFrameIndex = Idx; }
@@ -128,6 +125,9 @@ public:
unsigned getVarArgsFPOffset() const { return VarArgsFPOffset; }
void setVarArgsFPOffset(unsigned Offset) { VarArgsFPOffset = Offset; }
+
+ unsigned getArgumentStackSize() const { return ArgumentStackSize; }
+ void setArgumentStackSize(unsigned size) { ArgumentStackSize = size; }
};
} // End llvm namespace
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index f2faf59..c1ac9f3 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -27,7 +27,6 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -53,7 +52,13 @@ ForceStackAlign("force-align-stack",
X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
const TargetInstrInfo &tii)
- : X86GenRegisterInfo(), TM(tm), TII(tii) {
+ : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit()
+ ? X86::RIP : X86::EIP,
+ X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), false),
+ X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), true)),
+ TM(tm), TII(tii) {
+ X86_MC::InitLLVM2SEHRegisterMapping(this);
+
// Cache some information.
const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
Is64Bit = Subtarget->is64Bit();
@@ -70,40 +75,6 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
}
}
-static unsigned getFlavour(const X86Subtarget *Subtarget, bool isEH) {
- if (!Subtarget->is64Bit()) {
- if (Subtarget->isTargetDarwin()) {
- if (isEH)
- return DWARFFlavour::X86_32_DarwinEH;
- else
- return DWARFFlavour::X86_32_Generic;
- } else if (Subtarget->isTargetCygMing()) {
- // Unsupported by now, just quick fallback
- return DWARFFlavour::X86_32_Generic;
- } else {
- return DWARFFlavour::X86_32_Generic;
- }
- }
- return DWARFFlavour::X86_64;
-}
-
-/// getDwarfRegNum - This function maps LLVM register identifiers to the DWARF
-/// specific numbering, used in debug info and exception tables.
-int X86RegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const {
- const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
- unsigned Flavour = getFlavour(Subtarget, isEH);
-
- return X86GenRegisterInfo::getDwarfRegNumFull(RegNo, Flavour);
-}
-
-/// getLLVMRegNum - This function maps DWARF register numbers to LLVM register.
-int X86RegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
- const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
- unsigned Flavour = getFlavour(Subtarget, isEH);
-
- return X86GenRegisterInfo::getLLVMRegNumFull(DwarfRegNo, Flavour);
-}
-
/// getCompactUnwindRegNum - This function maps the register to the number for
/// compact unwind encoding. Return -1 if the register isn't valid.
int X86RegisterInfo::getCompactUnwindRegNum(unsigned RegNum, bool isEH) const {
@@ -121,7 +92,7 @@ int X86RegisterInfo::getCompactUnwindRegNum(unsigned RegNum, bool isEH) const {
int
X86RegisterInfo::getSEHRegNum(unsigned i) const {
- int reg = getX86RegNum(i);
+ int reg = X86_MC::getX86RegNum(i);
switch (i) {
case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B:
case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B:
@@ -140,96 +111,16 @@ X86RegisterInfo::getSEHRegNum(unsigned i) const {
return reg;
}
-/// getX86RegNum - This function maps LLVM register identifiers to their X86
-/// specific numbering, which is used in various places encoding instructions.
-unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
- switch(RegNo) {
- case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX;
- case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX;
- case X86::RDX: case X86::EDX: case X86::DX: case X86::DL: return N86::EDX;
- case X86::RBX: case X86::EBX: case X86::BX: case X86::BL: return N86::EBX;
- case X86::RSP: case X86::ESP: case X86::SP: case X86::SPL: case X86::AH:
- return N86::ESP;
- case X86::RBP: case X86::EBP: case X86::BP: case X86::BPL: case X86::CH:
- return N86::EBP;
- case X86::RSI: case X86::ESI: case X86::SI: case X86::SIL: case X86::DH:
- return N86::ESI;
- case X86::RDI: case X86::EDI: case X86::DI: case X86::DIL: case X86::BH:
- return N86::EDI;
-
- case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B:
- return N86::EAX;
- case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B:
- return N86::ECX;
- case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B:
- return N86::EDX;
- case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B:
- return N86::EBX;
- case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B:
- return N86::ESP;
- case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B:
- return N86::EBP;
- case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B:
- return N86::ESI;
- case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
- return N86::EDI;
-
- case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3:
- case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7:
- return RegNo-X86::ST0;
-
- case X86::XMM0: case X86::XMM8:
- case X86::YMM0: case X86::YMM8: case X86::MM0:
- return 0;
- case X86::XMM1: case X86::XMM9:
- case X86::YMM1: case X86::YMM9: case X86::MM1:
- return 1;
- case X86::XMM2: case X86::XMM10:
- case X86::YMM2: case X86::YMM10: case X86::MM2:
- return 2;
- case X86::XMM3: case X86::XMM11:
- case X86::YMM3: case X86::YMM11: case X86::MM3:
- return 3;
- case X86::XMM4: case X86::XMM12:
- case X86::YMM4: case X86::YMM12: case X86::MM4:
- return 4;
- case X86::XMM5: case X86::XMM13:
- case X86::YMM5: case X86::YMM13: case X86::MM5:
- return 5;
- case X86::XMM6: case X86::XMM14:
- case X86::YMM6: case X86::YMM14: case X86::MM6:
- return 6;
- case X86::XMM7: case X86::XMM15:
- case X86::YMM7: case X86::YMM15: case X86::MM7:
- return 7;
-
- case X86::ES: return 0;
- case X86::CS: return 1;
- case X86::SS: return 2;
- case X86::DS: return 3;
- case X86::FS: return 4;
- case X86::GS: return 5;
-
- case X86::CR0: case X86::CR8 : case X86::DR0: return 0;
- case X86::CR1: case X86::CR9 : case X86::DR1: return 1;
- case X86::CR2: case X86::CR10: case X86::DR2: return 2;
- case X86::CR3: case X86::CR11: case X86::DR3: return 3;
- case X86::CR4: case X86::CR12: case X86::DR4: return 4;
- case X86::CR5: case X86::CR13: case X86::DR5: return 5;
- case X86::CR6: case X86::CR14: case X86::DR6: return 6;
- case X86::CR7: case X86::CR15: case X86::DR7: return 7;
-
- // Pseudo index registers are equivalent to a "none"
- // scaled index (See Intel Manual 2A, table 2-3)
- case X86::EIZ:
- case X86::RIZ:
- return 4;
-
- default:
- assert(isVirtualRegister(RegNo) && "Unknown physical register!");
- llvm_unreachable("Register allocator hasn't allocated reg correctly yet!");
- return 0;
- }
+const TargetRegisterClass *
+X86RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC,
+ unsigned Idx) const {
+ // The sub_8bit sub-register index is more constrained in 32-bit mode.
+ // It behaves just like the sub_8bit_hi index.
+ if (!Is64Bit && Idx == X86::sub_8bit)
+ Idx = X86::sub_8bit_hi;
+
+ // Forward to TableGen's default version.
+ return X86GenRegisterInfo::getSubClassWithSubReg(RC, Idx);
}
const TargetRegisterClass *
@@ -355,8 +246,19 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
const TargetRegisterClass*
X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{
+ // Don't allow super-classes of GR8_NOREX. This class is only used after
+ // extrating sub_8bit_hi sub-registers. The H sub-registers cannot be copied
+ // to the full GR8 register class in 64-bit mode, so we cannot allow the
+ // reigster class inflation.
+ //
+ // The GR8_NOREX class is always used in a way that won't be constrained to a
+ // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the
+ // full GR8 class.
+ if (RC == X86::GR8_NOREXRegisterClass)
+ return RC;
+
const TargetRegisterClass *Super = RC;
- TargetRegisterClass::sc_iterator I = RC->superclasses_begin();
+ TargetRegisterClass::sc_iterator I = RC->getSuperClasses();
do {
switch (Super->getID()) {
case X86::GR8RegClassID:
@@ -741,11 +643,6 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
}
-unsigned X86RegisterInfo::getRARegister() const {
- return Is64Bit ? X86::RIP // Should have dwarf #16.
- : X86::EIP; // Should have dwarf #8.
-}
-
unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
return TFI->hasFP(MF) ? FramePtr : StackPtr;
@@ -948,7 +845,7 @@ namespace {
for (unsigned i = 0, e = RI.getNumVirtRegs(); i != e; ++i) {
unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
if (RI.getRegClass(Reg)->getAlignment() > StackAlignment) {
- FuncInfo->setReserveFP(true);
+ FuncInfo->setForceFramePointer(true);
return true;
}
}
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index a12eb12..7d39c68 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -24,22 +24,6 @@ namespace llvm {
class TargetInstrInfo;
class X86TargetMachine;
-/// N86 namespace - Native X86 register numbers
-///
-namespace N86 {
- enum {
- EAX = 0, ECX = 1, EDX = 2, EBX = 3, ESP = 4, EBP = 5, ESI = 6, EDI = 7
- };
-}
-
-/// DWARFFlavour - Flavour of dwarf regnumbers
-///
-namespace DWARFFlavour {
- enum {
- X86_64 = 0, X86_32_DarwinEH = 1, X86_32_Generic = 2
- };
-}
-
class X86RegisterInfo : public X86GenRegisterInfo {
public:
X86TargetMachine &TM;
@@ -73,11 +57,6 @@ public:
/// register identifier.
static unsigned getX86RegNum(unsigned RegNo);
- /// getDwarfRegNum - allows modification of X86GenRegisterInfo::getDwarfRegNum
- /// (created by TableGen) for target dependencies.
- int getDwarfRegNum(unsigned RegNum, bool isEH) const;
- int getLLVMRegNum(unsigned RegNum, bool isEH) const;
-
// FIXME: This should be tablegen'd like getDwarfRegNum is
int getSEHRegNum(unsigned i) const;
@@ -95,6 +74,9 @@ public:
getMatchingSuperRegClass(const TargetRegisterClass *A,
const TargetRegisterClass *B, unsigned Idx) const;
+ virtual const TargetRegisterClass *
+ getSubClassWithSubReg(const TargetRegisterClass *RC, unsigned Idx) const;
+
const TargetRegisterClass*
getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
@@ -136,7 +118,6 @@ public:
int SPAdj, RegScavenger *RS = NULL) const;
// Debug information queries.
- unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
unsigned getStackRegister() const { return StackPtr; }
// FIXME: Move to FrameInfok
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 203722a..9a7db36 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -390,6 +390,13 @@ def GR64_NOREX : RegisterClass<"X86", [i64], 64,
(GR32_NOREX sub_32bit)];
}
+// GR32_NOAX - GR32 registers except EAX. Used by AddRegFrm of XCHG32 in 64-bit
+// mode to prevent encoding using the 0x90 NOP encoding. xchg %eax, %eax needs
+// to clear upper 32-bits of RAX so is not a NOP.
+def GR32_NOAX : RegisterClass<"X86", [i32], 32, (sub GR32, EAX)> {
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
+}
+
// GR32_NOSP - GR32 registers except ESP.
def GR32_NOSP : RegisterClass<"X86", [i32], 32, (sub GR32, ESP)> {
let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
@@ -455,8 +462,8 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)];
}
-def VR256 : RegisterClass<"X86", [v32i8, v8i32, v4i64, v8f32, v4f64], 256,
- (sequence "YMM%u", 0, 15)> {
+def VR256 : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ 256, (sequence "YMM%u", 0, 15)> {
let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)];
}
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 02754f9..6406bce 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -54,7 +54,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
if (const char *bzeroEntry = V &&
V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
EVT IntPtr = TLI.getPointerTy();
- const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
+ Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = Dst;
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 5e6c659..7064dd0 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -16,9 +16,11 @@
#include "X86InstrInfo.h"
#include "llvm/GlobalValue.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Host.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/SmallVector.h"
#define GET_SUBTARGETINFO_TARGET_DESC
@@ -185,24 +187,53 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
X86_MC::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
- if ((EDX >> 15) & 1) HasCMov = true; ToggleFeature(X86::FeatureCMOV);
- if ((EDX >> 23) & 1) X86SSELevel = MMX; ToggleFeature(X86::FeatureMMX);
- if ((EDX >> 25) & 1) X86SSELevel = SSE1; ToggleFeature(X86::FeatureSSE1);
- if ((EDX >> 26) & 1) X86SSELevel = SSE2; ToggleFeature(X86::FeatureSSE2);
- if (ECX & 0x1) X86SSELevel = SSE3; ToggleFeature(X86::FeatureSSE3);
- if ((ECX >> 9) & 1) X86SSELevel = SSSE3; ToggleFeature(X86::FeatureSSSE3);
- if ((ECX >> 19) & 1) X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41);
- if ((ECX >> 20) & 1) X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42);
+ if ((EDX >> 15) & 1) { HasCMov = true; ToggleFeature(X86::FeatureCMOV); }
+ if ((EDX >> 23) & 1) { X86SSELevel = MMX; ToggleFeature(X86::FeatureMMX); }
+ if ((EDX >> 25) & 1) { X86SSELevel = SSE1; ToggleFeature(X86::FeatureSSE1); }
+ if ((EDX >> 26) & 1) { X86SSELevel = SSE2; ToggleFeature(X86::FeatureSSE2); }
+ if (ECX & 0x1) { X86SSELevel = SSE3; ToggleFeature(X86::FeatureSSE3); }
+ if ((ECX >> 9) & 1) { X86SSELevel = SSSE3; ToggleFeature(X86::FeatureSSSE3);}
+ if ((ECX >> 19) & 1) { X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41);}
+ if ((ECX >> 20) & 1) { X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42);}
// FIXME: AVX codegen support is not ready.
- //if ((ECX >> 28) & 1) { HasAVX = true; } ToggleFeature(X86::FeatureAVX);
+ //if ((ECX >> 28) & 1) { HasAVX = true; ToggleFeature(X86::FeatureAVX); }
bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
- HasCLMUL = IsIntel && ((ECX >> 1) & 0x1); ToggleFeature(X86::FeatureCLMUL);
- HasFMA3 = IsIntel && ((ECX >> 12) & 0x1); ToggleFeature(X86::FeatureFMA3);
- HasPOPCNT = IsIntel && ((ECX >> 23) & 0x1); ToggleFeature(X86::FeaturePOPCNT);
- HasAES = IsIntel && ((ECX >> 25) & 0x1); ToggleFeature(X86::FeatureAES);
+ if (IsIntel && ((ECX >> 1) & 0x1)) {
+ HasCLMUL = true;
+ ToggleFeature(X86::FeatureCLMUL);
+ }
+ if (IsIntel && ((ECX >> 12) & 0x1)) {
+ HasFMA3 = true;
+ ToggleFeature(X86::FeatureFMA3);
+ }
+ if (IsIntel && ((ECX >> 22) & 0x1)) {
+ HasMOVBE = true;
+ ToggleFeature(X86::FeatureMOVBE);
+ }
+ if (IsIntel && ((ECX >> 23) & 0x1)) {
+ HasPOPCNT = true;
+ ToggleFeature(X86::FeaturePOPCNT);
+ }
+ if (IsIntel && ((ECX >> 25) & 0x1)) {
+ HasAES = true;
+ ToggleFeature(X86::FeatureAES);
+ }
+ if (IsIntel && ((ECX >> 29) & 0x1)) {
+ HasF16C = true;
+ ToggleFeature(X86::FeatureF16C);
+ }
+ if (IsIntel && ((ECX >> 30) & 0x1)) {
+ HasRDRAND = true;
+ ToggleFeature(X86::FeatureRDRAND);
+ }
+
+ if ((ECX >> 13) & 0x1) {
+ HasCmpxchg16b = true;
+ ToggleFeature(X86::FeatureCMPXCHG16B);
+ }
if (IsIntel || IsAMD) {
// Determine if bit test memory instructions are slow.
@@ -224,6 +255,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
HasX86_64 = true;
ToggleFeature(X86::Feature64Bit);
}
+ if ((ECX >> 5) & 0x1) {
+ HasLZCNT = true;
+ ToggleFeature(X86::FeatureLZCNT);
+ }
if (IsAMD && ((ECX >> 6) & 0x1)) {
HasSSE4A = true;
ToggleFeature(X86::FeatureSSE4A);
@@ -251,14 +286,21 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
, HasCLMUL(false)
, HasFMA3(false)
, HasFMA4(false)
+ , HasMOVBE(false)
+ , HasRDRAND(false)
+ , HasF16C(false)
+ , HasLZCNT(false)
+ , HasBMI(false)
, IsBTMemSlow(false)
, IsUAMemFast(false)
, HasVectorUAMem(false)
+ , HasCmpxchg16b(false)
, stackAlignment(8)
// FIXME: this is a known good value for Yonah. How about others?
, MaxInlineSizeThreshold(128)
, TargetTriple(TT)
- , In64BitMode(is64Bit) {
+ , In64BitMode(is64Bit)
+ , InNaClMode(false) {
// Determine default and user specified characteristics
if (!FS.empty() || !CPU.empty()) {
std::string CPUName = CPU;
@@ -304,6 +346,11 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
if (In64BitMode)
ToggleFeature(X86::Mode64Bit);
+ if (isTargetNaCl()) {
+ InNaClMode = true;
+ ToggleFeature(X86::ModeNaCl);
+ }
+
if (HasAVX)
X86SSELevel = NoMMXSSE;
@@ -313,6 +360,9 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
assert((!In64BitMode || HasX86_64) &&
"64-bit code requested on a subtarget that doesn't support it!");
+ if(EnableSegmentedStacks && !isTargetELF())
+ report_fatal_error("Segmented stacks are only implemented on ELF.");
+
// Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both
// 32 and 64 bit) and for all 64-bit targets.
if (StackAlignOverride)
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 6d22027..3258d3d 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -90,6 +90,21 @@ protected:
/// HasFMA4 - Target has 4-operand fused multiply-add
bool HasFMA4;
+ /// HasMOVBE - True if the processor has the MOVBE instruction.
+ bool HasMOVBE;
+
+ /// HasRDRAND - True if the processor has the RDRAND instruction.
+ bool HasRDRAND;
+
+ /// HasF16C - Processor has 16-bit floating point conversion instructions.
+ bool HasF16C;
+
+ /// HasLZCNT - Processor has LZCNT instruction.
+ bool HasLZCNT;
+
+ /// HasBMI - Processor has BMI1 instructions.
+ bool HasBMI;
+
/// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
bool IsBTMemSlow;
@@ -100,6 +115,10 @@ protected:
/// operands. This may require setting a feature bit in the processor.
bool HasVectorUAMem;
+ /// HasCmpxchg16b - True if this processor has the CMPXCHG16B instruction;
+ /// this is true for most x86-64 chips, but not the first AMD chips.
+ bool HasCmpxchg16b;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
@@ -115,6 +134,9 @@ private:
/// In64BitMode - True if compiling for 64-bit, false for 32-bit.
bool In64BitMode;
+ /// InNaClMode - True if compiling for Native Client target.
+ bool InNaClMode;
+
public:
/// This constructor initializes the data members to match that
@@ -165,9 +187,15 @@ public:
bool hasCLMUL() const { return HasCLMUL; }
bool hasFMA3() const { return HasFMA3; }
bool hasFMA4() const { return HasFMA4; }
+ bool hasMOVBE() const { return HasMOVBE; }
+ bool hasRDRAND() const { return HasRDRAND; }
+ bool hasF16C() const { return HasF16C; }
+ bool hasLZCNT() const { return HasLZCNT; }
+ bool hasBMI() const { return HasBMI; }
bool isBTMemSlow() const { return IsBTMemSlow; }
bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
bool hasVectorUAMem() const { return HasVectorUAMem; }
+ bool hasCmpxchg16b() const { return HasCmpxchg16b; }
const Triple &getTargetTriple() const { return TargetTriple; }
@@ -185,6 +213,11 @@ public:
return !isTargetDarwin() && !isTargetWindows() && !isTargetCygMing();
}
bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
+ bool isTargetNaCl() const {
+ return TargetTriple.getOS() == Triple::NativeClient;
+ }
+ bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
+ bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
bool isTargetWindows() const { return TargetTriple.getOS() == Triple::Win32; }
bool isTargetMingw() const { return TargetTriple.getOS() == Triple::MinGW32; }
@@ -199,7 +232,8 @@ public:
}
bool isTargetWin64() const {
- return In64BitMode && (isTargetMingw() || isTargetWindows());
+ // FIXME: x86_64-cygwin has not been released yet.
+ return In64BitMode && (isTargetCygMing() || isTargetWindows());
}
bool isTargetEnvMacho() const {
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 9cab0e0..15c6c4e 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -16,65 +16,32 @@
#include "llvm/PassManager.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
-static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
- MCContext &Ctx, TargetAsmBackend &TAB,
- raw_ostream &_OS,
- MCCodeEmitter *_Emitter,
- bool RelaxAll,
- bool NoExecStack) {
- Triple TheTriple(TT);
-
- if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
- return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
-
- if (TheTriple.isOSWindows())
- return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll);
-
- return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack);
-}
-
extern "C" void LLVMInitializeX86Target() {
// Register the target.
RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target);
RegisterTargetMachine<X86_64TargetMachine> Y(TheX86_64Target);
-
- // Register the code emitter.
- TargetRegistry::RegisterCodeEmitter(TheX86_32Target,
- createX86MCCodeEmitter);
- TargetRegistry::RegisterCodeEmitter(TheX86_64Target,
- createX86MCCodeEmitter);
-
- // Register the asm backend.
- TargetRegistry::RegisterAsmBackend(TheX86_32Target,
- createX86_32AsmBackend);
- TargetRegistry::RegisterAsmBackend(TheX86_64Target,
- createX86_64AsmBackend);
-
- // Register the object streamer.
- TargetRegistry::RegisterObjectStreamer(TheX86_32Target,
- createMCStreamer);
- TargetRegistry::RegisterObjectStreamer(TheX86_64Target,
- createMCStreamer);
}
-X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : X86TargetMachine(T, TT, CPU, FS, false),
+X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : X86TargetMachine(T, TT, CPU, FS, RM, CM, false),
DataLayout(getSubtargetImpl()->isTargetDarwin() ?
- "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-n8:16:32" :
+ "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-"
+ "n8:16:32-S128" :
(getSubtargetImpl()->isTargetCygMing() ||
getSubtargetImpl()->isTargetWindows()) ?
- "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-f128:128:128-n8:16:32" :
- "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-n8:16:32"),
+ "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-f128:128:128-"
+ "n8:16:32-S32" :
+ "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-"
+ "n8:16:32-S128"),
InstrInfo(*this),
TSInfo(*this),
TLInfo(*this),
@@ -82,11 +49,12 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT,
}
-X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : X86TargetMachine(T, TT, CPU, FS, true),
- DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-n8:16:32:64"),
+X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : X86TargetMachine(T, TT, CPU, FS, RM, CM, true),
+ DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
+ "n8:16:32:64-S128"),
InstrInfo(*this),
TSInfo(*this),
TLInfo(*this),
@@ -95,52 +63,14 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT,
/// X86TargetMachine ctor - Create an X86 target.
///
-X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU,
- const std::string &FS, bool is64Bit)
- : LLVMTargetMachine(T, TT, CPU, FS),
+X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM,
+ bool is64Bit)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
Subtarget(TT, CPU, FS, StackAlignmentOverride, is64Bit),
FrameLowering(*this, Subtarget),
ELFWriterInfo(is64Bit, true) {
- DefRelocModel = getRelocationModel();
-
- // If no relocation model was picked, default as appropriate for the target.
- if (getRelocationModel() == Reloc::Default) {
- // Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode.
- // Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we
- // use static relocation model by default.
- if (Subtarget.isTargetDarwin()) {
- if (Subtarget.is64Bit())
- setRelocationModel(Reloc::PIC_);
- else
- setRelocationModel(Reloc::DynamicNoPIC);
- } else if (Subtarget.isTargetWin64())
- setRelocationModel(Reloc::PIC_);
- else
- setRelocationModel(Reloc::Static);
- }
-
- assert(getRelocationModel() != Reloc::Default &&
- "Relocation mode not picked");
-
- // ELF and X86-64 don't have a distinct DynamicNoPIC model. DynamicNoPIC
- // is defined as a model for code which may be used in static or dynamic
- // executables but not necessarily a shared library. On X86-32 we just
- // compile in -static mode, in x86-64 we use PIC.
- if (getRelocationModel() == Reloc::DynamicNoPIC) {
- if (is64Bit)
- setRelocationModel(Reloc::PIC_);
- else if (!Subtarget.isTargetDarwin())
- setRelocationModel(Reloc::Static);
- }
-
- // If we are on Darwin, disallow static relocation model in X86-64 mode, since
- // the Mach-O file format doesn't support it.
- if (getRelocationModel() == Reloc::Static &&
- Subtarget.isTargetDarwin() &&
- is64Bit)
- setRelocationModel(Reloc::PIC_);
-
// Determine the PICStyle based on the target selected.
if (getRelocationModel() == Reloc::Static) {
// Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
@@ -161,16 +91,20 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
Subtarget.setPICStyle(PICStyles::GOT);
}
- // Finally, if we have "none" as our PIC style, force to static mode.
- if (Subtarget.getPICStyle() == PICStyles::None)
- setRelocationModel(Reloc::Static);
-
// default to hard float ABI
if (FloatABIType == FloatABI::Default)
FloatABIType = FloatABI::Hard;
}
//===----------------------------------------------------------------------===//
+// Command line options for x86
+//===----------------------------------------------------------------------===//
+static cl::opt<bool>
+UseVZeroUpper("x86-use-vzeroupper",
+ cl::desc("Minimize AVX to SSE transition penalty"),
+ cl::init(false));
+
+//===----------------------------------------------------------------------===//
// Pass Pipeline Configuration
//===----------------------------------------------------------------------===//
@@ -200,46 +134,25 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM,
bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
- if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) {
- PM.add(createSSEDomainFixPass());
- return true;
+ bool ShouldPrint = false;
+ if (OptLevel != CodeGenOpt::None &&
+ (Subtarget.hasSSE2() || Subtarget.hasAVX())) {
+ PM.add(createExecutionDependencyFixPass(&X86::VR128RegClass));
+ ShouldPrint = true;
}
- return false;
+
+ if (Subtarget.hasAVX() && UseVZeroUpper) {
+ PM.add(createX86IssueVZeroUpperPass());
+ ShouldPrint = true;
+ }
+
+ return ShouldPrint;
}
bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
CodeGenOpt::Level OptLevel,
JITCodeEmitter &JCE) {
- // FIXME: Move this to TargetJITInfo!
- // On Darwin, do not override 64-bit setting made in X86TargetMachine().
- if (DefRelocModel == Reloc::Default &&
- (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) {
- setRelocationModel(Reloc::Static);
- Subtarget.setPICStyle(PICStyles::None);
- }
-
-
PM.add(createX86JITCodeEmitterPass(*this, JCE));
return false;
}
-
-void X86TargetMachine::setCodeModelForStatic() {
-
- if (getCodeModel() != CodeModel::Default) return;
-
- // For static codegen, if we're not already set, use Small codegen.
- setCodeModel(CodeModel::Small);
-}
-
-
-void X86TargetMachine::setCodeModelForJIT() {
-
- if (getCodeModel() != CodeModel::Default) return;
-
- // 64-bit JIT places everything in the same buffer except external functions.
- if (Subtarget.is64Bit())
- setCodeModel(CodeModel::Large);
- else
- setCodeModel(CodeModel::Small);
-}
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 885334a..d1569aa 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -29,21 +29,17 @@
namespace llvm {
class formatted_raw_ostream;
+class StringRef;
class X86TargetMachine : public LLVMTargetMachine {
X86Subtarget Subtarget;
X86FrameLowering FrameLowering;
X86ELFWriterInfo ELFWriterInfo;
- Reloc::Model DefRelocModel; // Reloc model before it's overridden.
-private:
- // We have specific defaults for X86.
- virtual void setCodeModelForJIT();
- virtual void setCodeModelForStatic();
-
public:
- X86TargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS,
+ X86TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM,
bool is64Bit);
virtual const X86InstrInfo *getInstrInfo() const {
@@ -87,8 +83,9 @@ class X86_32TargetMachine : public X86TargetMachine {
X86TargetLowering TLInfo;
X86JITInfo JITInfo;
public:
- X86_32TargetMachine(const Target &T, const std::string &M,
- const std::string &CPU, const std::string &FS);
+ X86_32TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
virtual const TargetData *getTargetData() const { return &DataLayout; }
virtual const X86TargetLowering *getTargetLowering() const {
return &TLInfo;
@@ -113,8 +110,9 @@ class X86_64TargetMachine : public X86TargetMachine {
X86TargetLowering TLInfo;
X86JITInfo JITInfo;
public:
- X86_64TargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ X86_64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
virtual const TargetData *getTargetData() const { return &DataLayout; }
virtual const X86TargetLowering *getTargetLowering() const {
return &TLInfo;
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index 1231798..991f322 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -43,79 +43,3 @@ getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
MachineModuleInfo *MMI) const {
return Mang->getSymbol(GV);
}
-
-unsigned X8632_ELFTargetObjectFile::getPersonalityEncoding() const {
- if (TM.getRelocationModel() == Reloc::PIC_)
- return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
- else
- return DW_EH_PE_absptr;
-}
-
-unsigned X8632_ELFTargetObjectFile::getLSDAEncoding() const {
- if (TM.getRelocationModel() == Reloc::PIC_)
- return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
- else
- return DW_EH_PE_absptr;
-}
-
-unsigned X8632_ELFTargetObjectFile::getFDEEncoding(bool FDE) const {
- if (TM.getRelocationModel() == Reloc::PIC_)
- return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
- else
- return DW_EH_PE_absptr;
-}
-
-unsigned X8632_ELFTargetObjectFile::getTTypeEncoding() const {
- if (TM.getRelocationModel() == Reloc::PIC_)
- return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
- else
- return DW_EH_PE_absptr;
-}
-
-unsigned X8664_ELFTargetObjectFile::getPersonalityEncoding() const {
- CodeModel::Model Model = TM.getCodeModel();
- if (TM.getRelocationModel() == Reloc::PIC_)
- return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small ||
- Model == CodeModel::Medium ?
- DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
-
- if (Model == CodeModel::Small || Model == CodeModel::Medium)
- return DW_EH_PE_udata4;
-
- return DW_EH_PE_absptr;
-}
-
-unsigned X8664_ELFTargetObjectFile::getLSDAEncoding() const {
- CodeModel::Model Model = TM.getCodeModel();
- if (TM.getRelocationModel() == Reloc::PIC_)
- return DW_EH_PE_pcrel | (Model == CodeModel::Small ?
- DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
-
- if (Model == CodeModel::Small)
- return DW_EH_PE_udata4;
-
- return DW_EH_PE_absptr;
-}
-
-unsigned X8664_ELFTargetObjectFile::getFDEEncoding(bool CFI) const {
- if (CFI)
- return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
-
- if (TM.getRelocationModel() == Reloc::PIC_)
- return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
-
- return DW_EH_PE_udata4;
-}
-
-unsigned X8664_ELFTargetObjectFile::getTTypeEncoding() const {
- CodeModel::Model Model = TM.getCodeModel();
- if (TM.getRelocationModel() == Reloc::PIC_)
- return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small ||
- Model == CodeModel::Medium ?
- DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
-
- if (Model == CodeModel::Small)
- return DW_EH_PE_udata4;
-
- return DW_EH_PE_absptr;
-}
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index e21b5bf..d7adf27 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -33,28 +33,6 @@ namespace llvm {
MachineModuleInfo *MMI) const;
};
- class X8632_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
- const X86TargetMachine &TM;
- public:
- X8632_ELFTargetObjectFile(const X86TargetMachine &tm)
- :TM(tm) { }
- virtual unsigned getPersonalityEncoding() const;
- virtual unsigned getLSDAEncoding() const;
- virtual unsigned getFDEEncoding(bool CFI) const;
- virtual unsigned getTTypeEncoding() const;
- };
-
- class X8664_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
- const X86TargetMachine &TM;
- public:
- X8664_ELFTargetObjectFile(const X86TargetMachine &tm)
- :TM(tm) { }
- virtual unsigned getPersonalityEncoding() const;
- virtual unsigned getLSDAEncoding() const;
- virtual unsigned getFDEEncoding(bool CFI) const;
- virtual unsigned getTTypeEncoding() const;
- };
-
} // end namespace llvm
#endif
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
new file mode 100644
index 0000000..3958494
--- /dev/null
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -0,0 +1,105 @@
+//===-- X86VZeroUpper.cpp - AVX vzeroupper instruction inserter -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass which inserts x86 AVX vzeroupper instructions
+// before calls to SSE encoded functions. This avoids transition latency
+// penalty when tranfering control between AVX encoded instructions and old
+// SSE encoding mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-codegen"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Target/TargetInstrInfo.h"
+using namespace llvm;
+
+STATISTIC(NumVZU, "Number of vzeroupper instructions inserted");
+
+namespace {
+ struct VZeroUpperInserter : public MachineFunctionPass {
+ static char ID;
+ VZeroUpperInserter() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
+
+ virtual const char *getPassName() const { return "X86 vzeroupper inserter";}
+
+ private:
+ const TargetInstrInfo *TII; // Machine instruction info.
+ MachineBasicBlock *MBB; // Current basic block
+ };
+ char VZeroUpperInserter::ID = 0;
+}
+
+FunctionPass *llvm::createX86IssueVZeroUpperPass() {
+ return new VZeroUpperInserter();
+}
+
+/// runOnMachineFunction - Loop over all of the basic blocks, inserting
+/// vzero upper instructions before function calls.
+bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getTarget().getInstrInfo();
+ bool Changed = false;
+
+ // Process any unreachable blocks in arbitrary order now.
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
+ Changed |= processBasicBlock(MF, *BB);
+
+ return Changed;
+}
+
+static bool isCallToModuleFn(const MachineInstr *MI) {
+ assert(MI->getDesc().isCall() && "Isn't a call instruction");
+
+ for (int i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ if (!MO.isGlobal())
+ continue;
+
+ const GlobalValue *GV = MO.getGlobal();
+ GlobalValue::LinkageTypes LT = GV->getLinkage();
+ if (GV->isInternalLinkage(LT) || GV->isPrivateLinkage(LT) ||
+ (GV->isExternalLinkage(LT) && !GV->isDeclaration()))
+ return true;
+
+ return false;
+ }
+ return false;
+}
+
+/// processBasicBlock - Loop over all of the instructions in the basic block,
+/// inserting vzero upper instructions before function calls.
+bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
+ MachineBasicBlock &BB) {
+ bool Changed = false;
+ MBB = &BB;
+
+ for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
+ MachineInstr *MI = I;
+ DebugLoc dl = I->getDebugLoc();
+
+ // Insert a vzeroupper instruction before each control transfer
+ // to functions outside this module
+ if (MI->getDesc().isCall() && !isCallToModuleFn(MI)) {
+ BuildMI(*MBB, I, dl, TII->get(X86::VZEROUPPER));
+ ++NumVZU;
+ }
+ }
+
+ return Changed;
+}
diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt
index a1d73c6..3dc51e1 100644
--- a/lib/Target/XCore/CMakeLists.txt
+++ b/lib/Target/XCore/CMakeLists.txt
@@ -1,11 +1,12 @@
set(LLVM_TARGET_DEFINITIONS XCore.td)
-tablegen(XCoreGenRegisterInfo.inc -gen-register-info)
-tablegen(XCoreGenInstrInfo.inc -gen-instr-info)
-tablegen(XCoreGenAsmWriter.inc -gen-asm-writer)
-tablegen(XCoreGenDAGISel.inc -gen-dag-isel)
-tablegen(XCoreGenCallingConv.inc -gen-callingconv)
-tablegen(XCoreGenSubtargetInfo.inc -gen-subtarget)
+llvm_tablegen(XCoreGenRegisterInfo.inc -gen-register-info)
+llvm_tablegen(XCoreGenInstrInfo.inc -gen-instr-info)
+llvm_tablegen(XCoreGenAsmWriter.inc -gen-asm-writer)
+llvm_tablegen(XCoreGenDAGISel.inc -gen-dag-isel)
+llvm_tablegen(XCoreGenCallingConv.inc -gen-callingconv)
+llvm_tablegen(XCoreGenSubtargetInfo.inc -gen-subtarget)
+add_public_tablegen_target(XCoreCommonTableGen)
add_llvm_target(XCoreCodeGen
XCoreAsmPrinter.cpp
@@ -20,5 +21,17 @@ add_llvm_target(XCoreCodeGen
XCoreSelectionDAGInfo.cpp
)
+add_llvm_library_dependencies(LLVMXCoreCodeGen
+ LLVMAsmPrinter
+ LLVMCodeGen
+ LLVMCore
+ LLVMMC
+ LLVMSelectionDAG
+ LLVMSupport
+ LLVMTarget
+ LLVMXCoreDesc
+ LLVMXCoreInfo
+ )
+
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/XCore/MCTargetDesc/CMakeLists.txt b/lib/Target/XCore/MCTargetDesc/CMakeLists.txt
index c3b3dc9..269822d 100644
--- a/lib/Target/XCore/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/XCore/MCTargetDesc/CMakeLists.txt
@@ -3,5 +3,12 @@ add_llvm_library(LLVMXCoreDesc
XCoreMCAsmInfo.cpp
)
+add_llvm_library_dependencies(LLVMXCoreDesc
+ LLVMMC
+ LLVMXCoreInfo
+ )
+
+add_dependencies(LLVMXCoreDesc XCoreCommonTableGen)
+
# Hack: we need to include 'main' target directory to grab private headers
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index 939d97c..276e841 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -13,10 +13,11 @@
#include "XCoreMCTargetDesc.h"
#include "XCoreMCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "XCoreGenInstrInfo.inc"
@@ -35,8 +36,10 @@ static MCInstrInfo *createXCoreMCInstrInfo() {
return X;
}
-extern "C" void LLVMInitializeXCoreMCInstrInfo() {
- TargetRegistry::RegisterMCInstrInfo(TheXCoreTarget, createXCoreMCInstrInfo);
+static MCRegisterInfo *createXCoreMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitXCoreMCRegisterInfo(X, XCore::LR);
+ return X;
}
static MCSubtargetInfo *createXCoreMCSubtargetInfo(StringRef TT, StringRef CPU,
@@ -46,11 +49,40 @@ static MCSubtargetInfo *createXCoreMCSubtargetInfo(StringRef TT, StringRef CPU,
return X;
}
-extern "C" void LLVMInitializeXCoreMCSubtargetInfo() {
- TargetRegistry::RegisterMCSubtargetInfo(TheXCoreTarget,
- createXCoreMCSubtargetInfo);
+static MCAsmInfo *createXCoreMCAsmInfo(const Target &T, StringRef TT) {
+ MCAsmInfo *MAI = new XCoreMCAsmInfo(T, TT);
+
+ // Initial state of the frame pointer is SP.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(XCore::SP, 0);
+ MAI->addInitialFrameState(0, Dst, Src);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createXCoreMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
}
-extern "C" void LLVMInitializeXCoreMCAsmInfo() {
- RegisterMCAsmInfo<XCoreMCAsmInfo> X(TheXCoreTarget);
+// Force static initialization.
+extern "C" void LLVMInitializeXCoreTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(TheXCoreTarget, createXCoreMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheXCoreTarget,
+ createXCoreMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheXCoreTarget, createXCoreMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheXCoreTarget, createXCoreMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheXCoreTarget,
+ createXCoreMCSubtargetInfo);
}
diff --git a/lib/Target/XCore/TargetInfo/CMakeLists.txt b/lib/Target/XCore/TargetInfo/CMakeLists.txt
index c147b8a..7f84f69 100644
--- a/lib/Target/XCore/TargetInfo/CMakeLists.txt
+++ b/lib/Target/XCore/TargetInfo/CMakeLists.txt
@@ -4,4 +4,10 @@ add_llvm_library(LLVMXCoreInfo
XCoreTargetInfo.cpp
)
-add_dependencies(LLVMXCoreInfo XCoreCodeGenTable_gen)
+add_llvm_library_dependencies(LLVMXCoreInfo
+ LLVMMC
+ LLVMSupport
+ LLVMTarget
+ )
+
+add_dependencies(LLVMXCoreInfo XCoreCommonTableGen)
diff --git a/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp b/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
index 7aa8965..9a0971d 100644
--- a/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
+++ b/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
@@ -9,7 +9,7 @@
#include "XCore.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
Target llvm::TheXCoreTarget;
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index 1a43714..8906b24 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -20,6 +20,7 @@
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
+#include "llvm/Analysis/DebugInfo.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -32,11 +33,11 @@
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cctype>
@@ -51,6 +52,7 @@ static cl::opt<unsigned> MaxThreads("xcore-max-threads", cl::Optional,
namespace {
class XCoreAsmPrinter : public AsmPrinter {
const XCoreSubtarget &Subtarget;
+ void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
public:
explicit XCoreAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
: AsmPrinter(TM, Streamer), Subtarget(TM.getSubtarget<XCoreSubtarget>()){}
@@ -79,6 +81,7 @@ namespace {
void EmitFunctionEntryLabel();
void EmitInstruction(const MachineInstr *MI);
void EmitFunctionBodyEnd();
+ virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
};
} // end of anonymous namespace
@@ -88,7 +91,7 @@ void XCoreAsmPrinter::emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV) {
assert(((GV->hasExternalLinkage() ||
GV->hasWeakLinkage()) ||
GV->hasLinkOnceLinkage()) && "Unexpected linkage");
- if (const ArrayType *ATy = dyn_cast<ArrayType>(
+ if (ArrayType *ATy = dyn_cast<ArrayType>(
cast<PointerType>(GV->getType())->getElementType())) {
OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
// FIXME: MCStreamerize.
@@ -261,16 +264,57 @@ bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
return false;
}
+void XCoreAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+ raw_ostream &OS) {
+ unsigned NOps = MI->getNumOperands();
+ assert(NOps == 4);
+ OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+ // cast away const; DIetc do not take const operands for some reason.
+ DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+ OS << V.getName();
+ OS << " <- ";
+ // Frame address. Currently handles register +- offset only.
+ assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+ OS << '['; printOperand(MI, 0, OS); OS << '+'; printOperand(MI, 1, OS);
+ OS << ']';
+ OS << "+";
+ printOperand(MI, NOps-2, OS);
+}
+
+MachineLocation XCoreAsmPrinter::
+getDebugValueLocation(const MachineInstr *MI) const {
+ // Handles frame addresses emitted in XCoreInstrInfo::emitFrameIndexDebugValue.
+ assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+ assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm() &&
+ "Unexpected MachineOperand types");
+ return MachineLocation(MI->getOperand(0).getReg(),
+ MI->getOperand(1).getImm());
+}
+
void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallString<128> Str;
raw_svector_ostream O(Str);
- // Check for mov mnemonic
- if (MI->getOpcode() == XCore::ADD_2rus && !MI->getOperand(2).getImm())
- O << "\tmov " << getRegisterName(MI->getOperand(0).getReg()) << ", "
- << getRegisterName(MI->getOperand(1).getReg());
- else
- printInstruction(MI, O);
+ switch (MI->getOpcode()) {
+ case XCore::DBG_VALUE: {
+ if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+ SmallString<128> TmpStr;
+ raw_svector_ostream OS(TmpStr);
+ PrintDebugValueComment(MI, OS);
+ OutStreamer.EmitRawText(StringRef(OS.str()));
+ }
+ return;
+ }
+ case XCore::ADD_2rus:
+ if (MI->getOperand(2).getImm() == 0) {
+ O << "\tmov " << getRegisterName(MI->getOperand(0).getReg()) << ", "
+ << getRegisterName(MI->getOperand(1).getReg());
+ OutStreamer.EmitRawText(O.str());
+ return;
+ }
+ break;
+ }
+ printInstruction(MI, O);
OutStreamer.EmitRawText(O.str());
}
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index 0578220..7f8b169 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -100,7 +100,8 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
bool FP = hasFP(MF);
- bool Nested = MF.getFunction()->getAttributes().hasAttrSomewhere(Attribute::Nest);
+ bool Nested = MF.getFunction()->
+ getAttributes().hasAttrSomewhere(Attribute::Nest);
if (Nested) {
loadFromStack(MBB, MBBI, XCore::R11, 0, dl, TII);
@@ -270,14 +271,6 @@ void XCoreFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
-void XCoreFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves)
- const {
- // Initial state of the frame pointer is SP.
- MachineLocation Dst(MachineLocation::VirtualFP);
- MachineLocation Src(XCore::SP, 0);
- Moves.push_back(MachineMove(0, Dst, Src));
-}
-
bool XCoreFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h
index 7da19f0..c591e93 100644
--- a/lib/Target/XCore/XCoreFrameLowering.h
+++ b/lib/Target/XCore/XCoreFrameLowering.h
@@ -42,8 +42,6 @@ namespace llvm {
bool hasFP(const MachineFunction &MF) const;
- void getInitialFrameState(std::vector<MachineMove> &Moves) const;
-
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index a8dd847..4dac1ce 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -169,9 +169,14 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
CurDAG->getTargetConstantPool(ConstantInt::get(
Type::getInt32Ty(*CurDAG->getContext()), Val),
TLI.getPointerTy());
- return CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32,
- MVT::Other, CPIdx,
- CurDAG->getEntryNode());
+ SDNode *node = CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32,
+ MVT::Other, CPIdx,
+ CurDAG->getEntryNode());
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = MF->getMachineMemOperand(
+ MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad, 4, 4);
+ cast<MachineSDNode>(node)->setMemRefs(MemOp, MemOp + 1);
+ return node;
}
break;
}
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 6d040e0..2afe0e3 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -81,6 +81,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
// Use i32 for setcc operations results (slt, sgt, ...).
setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
// XCore does not have the NodeTypes below.
setOperationAction(ISD::BR_CC, MVT::Other, Expand);
@@ -147,7 +148,8 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
// TRAMPOLINE is custom lowered.
- setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
maxStoresPerMemset = maxStoresPerMemsetOptSize = 4;
maxStoresPerMemmove = maxStoresPerMemmoveOptSize
@@ -180,7 +182,8 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADD:
case ISD::SUB: return ExpandADDSUB(Op.getNode(), DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
- case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG);
+ case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
+ case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
default:
llvm_unreachable("unimplemented operand");
return SDValue();
@@ -252,8 +255,8 @@ static inline SDValue BuildGetId(SelectionDAG &DAG, DebugLoc dl) {
DAG.getConstant(Intrinsic::xcore_getid, MVT::i32));
}
-static inline bool isZeroLengthArray(const Type *Ty) {
- const ArrayType *AT = dyn_cast_or_null<ArrayType>(Ty);
+static inline bool isZeroLengthArray(Type *Ty) {
+ ArrayType *AT = dyn_cast_or_null<ArrayType>(Ty);
return AT && (AT->getNumElements() == 0);
}
@@ -275,7 +278,7 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
llvm_unreachable("Thread local object not a GlobalVariable?");
return SDValue();
}
- const Type *Ty = cast<PointerType>(GV->getType())->getElementType();
+ Type *Ty = cast<PointerType>(GV->getType())->getElementType();
if (!Ty->isSized() || isZeroLengthArray(Ty)) {
#ifndef NDEBUG
errs() << "Size of thread local object " << GVar->getName()
@@ -465,7 +468,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
}
// Lower to a call to __misaligned_load(BasePtr).
- const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
+ Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -524,7 +527,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
}
// Lower to a call to __misaligned_store(BasePtr, Value).
- const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
+ Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -789,7 +792,12 @@ SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op,
}
SDValue XCoreTargetLowering::
-LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
+LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
+ return Op.getOperand(0);
+}
+
+SDValue XCoreTargetLowering::
+LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue Trmp = Op.getOperand(1); // trampoline
SDValue FPtr = Op.getOperand(2); // nested function
@@ -841,9 +849,7 @@ LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo(TrmpAddr, 16), false, false,
0);
- SDValue Ops[] =
- { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5) };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5);
}
//===----------------------------------------------------------------------===//
@@ -1148,10 +1154,10 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
int offset = 0;
// Save remaining registers, storing higher register numbers at a higher
// address
- for (unsigned i = array_lengthof(ArgRegs) - 1; i >= FirstVAReg; --i) {
+ for (int i = array_lengthof(ArgRegs) - 1; i >= (int)FirstVAReg; --i) {
// Create a stack slot
int FI = MFI->CreateFixedObject(4, offset, true);
- if (i == FirstVAReg) {
+ if (i == (int)FirstVAReg) {
XFI->setVarArgsFrameIndex(FI);
}
offset -= StackSlotSize;
@@ -1409,7 +1415,8 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
// operands are constant canonicalize smallest to RHS.
if ((N0C && !N1C) ||
(N0C && N1C && N0C->getZExtValue() < N1C->getZExtValue()))
- return DAG.getNode(XCoreISD::LMUL, dl, DAG.getVTList(VT, VT), N1, N0, N2, N3);
+ return DAG.getNode(XCoreISD::LMUL, dl, DAG.getVTList(VT, VT),
+ N1, N0, N2, N3);
// lmul(x, 0, a, b)
if (N1C && N1C->isNullValue()) {
@@ -1548,7 +1555,7 @@ static inline bool isImmUs4(int64_t val)
/// by AM is legal for this target, for a load/store of the specified type.
bool
XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- const Type *Ty) const {
+ Type *Ty) const {
if (Ty->getTypeID() == Type::VoidTyID)
return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs);
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 9c803be..d6c5b32 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -101,7 +101,7 @@ namespace llvm {
MachineBasicBlock *MBB) const;
virtual bool isLegalAddressingMode(const AddrMode &AM,
- const Type *Ty) const;
+ Type *Ty) const;
private:
const XCoreTargetMachine &TM;
@@ -145,7 +145,8 @@ namespace llvm {
SDValue LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
// Inline asm support
std::pair<unsigned, const TargetRegisterClass*>
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index f90481f..a0946a1 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -17,11 +17,10 @@
#include "llvm/MC/MCContext.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineLocation.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_CTOR
#include "XCoreGenInstrInfo.inc"
@@ -387,6 +386,15 @@ void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
.addImm(0);
}
+MachineInstr*
+XCoreInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
+ uint64_t Offset, const MDNode *MDPtr,
+ DebugLoc DL) const {
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(XCore::DBG_VALUE))
+ .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr);
+ return &*MIB;
+}
+
/// ReverseBranchCondition - Return the inverse opcode of the
/// specified Branch instruction.
bool XCoreInstrInfo::
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index 840b1e1..d354802 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -78,6 +78,11 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
+ virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx,
+ uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const;
virtual bool ReverseBranchCondition(
SmallVectorImpl<MachineOperand> &Cond) const;
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 55c7527..4d2e93b 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -572,7 +572,7 @@ def STWDP_lru6 : _FLRU6<(outs), (ins GRRegs:$val, MEMii:$addr),
[(store GRRegs:$val, ADDRdpii:$addr)]>;
//let Uses = [CP] in ..
-let mayLoad = 1, isReMaterializable = 1 in
+let mayLoad = 1, isReMaterializable = 1, neverHasSideEffects = 1 in
defm LDWCP : FRU6_LRU6_cp<"ldw">;
let Uses = [SP] in {
@@ -739,7 +739,7 @@ def LDAP_lu10_ba : _FLU10<(outs),
let isCall=1,
// All calls clobber the link register and the non-callee-saved registers:
-Defs = [R0, R1, R2, R3, R11, LR] in {
+Defs = [R0, R1, R2, R3, R11, LR], Uses = [SP] in {
def BL_u10 : _FU10<
(outs),
(ins calltarget:$target, variable_ops),
@@ -754,7 +754,7 @@ def BL_lu10 : _FLU10<
}
// Two operand short
-// TODO eet, eef, testwct, tsetmr, sext (reg), zext (reg)
+// TODO eet, eef, tsetmr
def NOT : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
"not $dst, $b",
[(set GRRegs:$dst, (not GRRegs:$b))]>;
@@ -764,15 +764,25 @@ def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
[(set GRRegs:$dst, (ineg GRRegs:$b))]>;
let Constraints = "$src1 = $dst" in {
-let neverHasSideEffects = 1 in
def SEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
- "sext $dst, $src2",
- []>;
+ "sext $dst, $src2",
+ [(set GRRegs:$dst, (int_xcore_sext GRRegs:$src1,
+ immBitp:$src2))]>;
+
+def SEXT_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
+ "sext $dst, $src2",
+ [(set GRRegs:$dst, (int_xcore_sext GRRegs:$src1,
+ GRRegs:$src2))]>;
-let neverHasSideEffects = 1 in
def ZEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
- "zext $dst, $src2",
- []>;
+ "zext $dst, $src2",
+ [(set GRRegs:$dst, (int_xcore_zext GRRegs:$src1,
+ immBitp:$src2))]>;
+
+def ZEXT_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
+ "zext $dst, $src2",
+ [(set GRRegs:$dst, (int_xcore_zext GRRegs:$src1,
+ GRRegs:$src2))]>;
def ANDNOT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
"andnot $dst, $src2",
@@ -819,7 +829,8 @@ def OUT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
let Constraints = "$src = $dst" in
def OUTSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src),
"outshr res[$r], $src",
- [(set GRRegs:$dst, (int_xcore_outshr GRRegs:$r, GRRegs:$src))]>;
+ [(set GRRegs:$dst, (int_xcore_outshr GRRegs:$r,
+ GRRegs:$src))]>;
def INCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
"inct $dst, res[$r]",
@@ -836,7 +847,8 @@ def IN_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
let Constraints = "$src = $dst" in
def INSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src),
"inshr $dst, res[$r]",
- [(set GRRegs:$dst, (int_xcore_inshr GRRegs:$r, GRRegs:$src))]>;
+ [(set GRRegs:$dst, (int_xcore_inshr GRRegs:$r,
+ GRRegs:$src))]>;
def CHKCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
"chkct res[$r], $val",
@@ -846,6 +858,14 @@ def CHKCT_rus : _F2R<(outs), (ins GRRegs:$r, i32imm:$val),
"chkct res[$r], $val",
[(int_xcore_chkct GRRegs:$r, immUs:$val)]>;
+def TESTCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+ "testct $dst, res[$src]",
+ [(set GRRegs:$dst, (int_xcore_testct GRRegs:$src))]>;
+
+def TESTWCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+ "testwct $dst, res[$src]",
+ [(set GRRegs:$dst, (int_xcore_testwct GRRegs:$src))]>;
+
def SETD_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
"setd res[$r], $val",
[(int_xcore_setd GRRegs:$r, GRRegs:$val)]>;
@@ -871,7 +891,6 @@ def INITDP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
[(int_xcore_initdp GRRegs:$t, GRRegs:$src)]>;
// Two operand long
-// TODO endin, peek,
// getd, testlcl
def BITREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
"bitrev $dst, $src",
@@ -917,6 +936,14 @@ def SETPSC_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
"setpsc res[$src1], $src2",
[(int_xcore_setpsc GRRegs:$src1, GRRegs:$src2)]>;
+def PEEK_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+ "peek $dst, res[$src]",
+ [(set GRRegs:$dst, (int_xcore_peek GRRegs:$src))]>;
+
+def ENDIN_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+ "endin $dst, res[$src]",
+ [(set GRRegs:$dst, (int_xcore_endin GRRegs:$src))]>;
+
// One operand short
// TODO edu, eeu, waitet, waitef, tstart, clrtp
// setdp, setcp, setev, kcall
@@ -960,7 +987,7 @@ def ECALLF_1r : _F1R<(outs), (ins GRRegs:$src),
let isCall=1,
// All calls clobber the link register and the non-callee-saved registers:
-Defs = [R0, R1, R2, R3, R11, LR] in {
+Defs = [R0, R1, R2, R3, R11, LR], Uses = [SP] in {
def BLA_1r : _F1R<(outs), (ins GRRegs:$addr, variable_ops),
"bla $addr",
[(XCoreBranchLink GRRegs:$addr)]>;
@@ -974,10 +1001,15 @@ def FREER_1r : _F1R<(outs), (ins GRRegs:$r),
"freer res[$r]",
[(int_xcore_freer GRRegs:$r)]>;
-let Uses=[R11] in
+let Uses=[R11] in {
def SETV_1r : _F1R<(outs), (ins GRRegs:$r),
- "setv res[$r], r11",
- [(int_xcore_setv GRRegs:$r, R11)]>;
+ "setv res[$r], r11",
+ [(int_xcore_setv GRRegs:$r, R11)]>;
+
+def SETEV_1r : _F1R<(outs), (ins GRRegs:$r),
+ "setev res[$r], r11",
+ [(int_xcore_setev GRRegs:$r, R11)]>;
+}
def EEU_1r : _F1R<(outs), (ins GRRegs:$r),
"eeu res[$r]",
@@ -985,15 +1017,24 @@ def EEU_1r : _F1R<(outs), (ins GRRegs:$r),
// Zero operand short
// TODO freet, ldspc, stspc, ldssr, stssr, ldsed, stsed,
-// stet, geted, getet, getkep, getksp, setkep, getid, kret, dcall, dret,
+// stet, getkep, getksp, setkep, getid, kret, dcall, dret,
// dentsp, drestsp
def CLRE_0R : _F0R<(outs), (ins), "clre", [(int_xcore_clre)]>;
-let Defs = [R11] in
+let Defs = [R11] in {
def GETID_0R : _F0R<(outs), (ins),
- "get r11, id",
- [(set R11, (int_xcore_getid))]>;
+ "get r11, id",
+ [(set R11, (int_xcore_getid))]>;
+
+def GETED_0R : _F0R<(outs), (ins),
+ "get r11, ed",
+ [(set R11, (int_xcore_geted))]>;
+
+def GETET_0R : _F0R<(outs), (ins),
+ "get r11, et",
+ [(set R11, (int_xcore_getet))]>;
+}
def SSYNC_0r : _F0R<(outs), (ins),
"ssync",
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index 357a4a0..1b78b37 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -17,7 +17,6 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
@@ -39,7 +38,7 @@
using namespace llvm;
XCoreRegisterInfo::XCoreRegisterInfo(const TargetInstrInfo &tii)
- : XCoreGenRegisterInfo(), TII(tii) {
+ : XCoreGenRegisterInfo(XCore::LR), TII(tii) {
}
// helper functions
@@ -321,20 +320,8 @@ loadConstant(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
BuildMI(MBB, I, dl, TII.get(Opcode), DstReg).addImm(Value);
}
-int XCoreRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
- return XCoreGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
-}
-
-int XCoreRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
- return XCoreGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0);
-}
-
unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
return TFI->hasFP(MF) ? XCore::R10 : XCore::SP;
}
-
-unsigned XCoreRegisterInfo::getRARegister() const {
- return XCore::LR;
-}
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index 801d9eb..5c28f39 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -60,7 +60,6 @@ public:
int SPAdj, RegScavenger *RS = NULL) const;
// Debug information queries.
- unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
//! Return the array of argument passing registers
@@ -74,10 +73,6 @@ public:
//! Return whether to emit frame moves
static bool needsFrameMoves(const MachineFunction &MF);
-
- //! Get DWARF debugging register number
- int getDwarfRegNum(unsigned RegNum, bool isEH) const;
- int getLLVMRegNum(unsigned RegNum, bool isEH) const;
};
} // end namespace llvm
diff --git a/lib/Target/XCore/XCoreSubtarget.cpp b/lib/Target/XCore/XCoreSubtarget.cpp
index ad069bf..b4e9927 100644
--- a/lib/Target/XCore/XCoreSubtarget.cpp
+++ b/lib/Target/XCore/XCoreSubtarget.cpp
@@ -13,7 +13,7 @@
#include "XCoreSubtarget.h"
#include "XCore.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 342966a..fdc5d35 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -14,15 +14,15 @@
#include "XCore.h"
#include "llvm/Module.h"
#include "llvm/PassManager.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
/// XCoreTargetMachine ctor - Create an ILP32 architecture model
///
-XCoreTargetMachine::XCoreTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : LLVMTargetMachine(T, TT, CPU, FS),
+XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
Subtarget(TT, CPU, FS),
DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-"
"i16:16:32-i32:32:32-i64:32:32-n32"),
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index 6235ac3..83d09d6d 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -32,8 +32,9 @@ class XCoreTargetMachine : public LLVMTargetMachine {
XCoreTargetLowering TLInfo;
XCoreSelectionDAGInfo TSInfo;
public:
- XCoreTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS);
+ XCoreTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const XCoreFrameLowering *getFrameLowering() const {
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index fa007cf..e160f63 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -155,12 +155,12 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
for (unsigned i = 0; i != PointerArgs.size(); ++i) {
bool isByVal = F->paramHasAttr(PointerArgs[i].second+1, Attribute::ByVal);
Argument *PtrArg = PointerArgs[i].first;
- const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
+ Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
// If this is a byval argument, and if the aggregate type is small, just
// pass the elements, which is always safe.
if (isByVal) {
- if (const StructType *STy = dyn_cast<StructType>(AgTy)) {
+ if (StructType *STy = dyn_cast<StructType>(AgTy)) {
if (maxElements > 0 && STy->getNumElements() > maxElements) {
DEBUG(dbgs() << "argpromotion disable promoting argument '"
<< PtrArg->getName() << "' because it would require adding more"
@@ -190,7 +190,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
// If the argument is a recursive type and we're in a recursive
// function, we could end up infinitely peeling the function argument.
if (isSelfRecursive) {
- if (const StructType *STy = dyn_cast<StructType>(AgTy)) {
+ if (StructType *STy = dyn_cast<StructType>(AgTy)) {
bool RecursiveType = false;
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
if (STy->getElementType(i) == PtrArg->getType()) {
@@ -382,7 +382,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
User *U = *UI;
Operands.clear();
if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
- if (LI->isVolatile()) return false; // Don't hack volatile loads
+ // Don't hack volatile/atomic loads
+ if (!LI->isSimple()) return false;
Loads.push_back(LI);
// Direct loads are equivalent to a GEP with a zero index and then a load.
Operands.push_back(0);
@@ -410,7 +411,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
for (Value::use_iterator UI = GEP->use_begin(), E = GEP->use_end();
UI != E; ++UI)
if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
- if (LI->isVolatile()) return false; // Don't hack volatile loads
+ // Don't hack volatile/atomic loads
+ if (!LI->isSimple()) return false;
Loads.push_back(LI);
} else {
// Other uses than load?
@@ -492,7 +494,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// Start by computing a new prototype for the function, which is the same as
// the old function, but has modified arguments.
- const FunctionType *FTy = F->getFunctionType();
+ FunctionType *FTy = F->getFunctionType();
std::vector<Type*> Params;
typedef std::set<IndicesVector> ScalarizeTable;
@@ -527,8 +529,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
++I, ++ArgIndex) {
if (ByValArgsToTransform.count(I)) {
// Simple byval argument? Just add all the struct element types.
- const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
- const StructType *STy = cast<StructType>(AgTy);
+ Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+ StructType *STy = cast<StructType>(AgTy);
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
Params.push_back(STy->getElementType(i));
++NumByValArgsPromoted;
@@ -576,9 +578,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
for (ScalarizeTable::iterator SI = ArgIndices.begin(),
E = ArgIndices.end(); SI != E; ++SI) {
// not allowed to dereference ->begin() if size() is 0
- Params.push_back(GetElementPtrInst::getIndexedType(I->getType(),
- SI->begin(),
- SI->end()));
+ Params.push_back(GetElementPtrInst::getIndexedType(I->getType(), *SI));
assert(Params.back());
}
@@ -593,7 +593,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
if (Attributes attrs = PAL.getFnAttributes())
AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));
- const Type *RetTy = FTy->getReturnType();
+ Type *RetTy = FTy->getReturnType();
// Work around LLVM bug PR56: the CWriter cannot emit varargs functions which
// have zero fixed arguments.
@@ -662,13 +662,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
} else if (ByValArgsToTransform.count(I)) {
// Emit a GEP and load for each element of the struct.
- const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
- const StructType *STy = cast<StructType>(AgTy);
+ Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+ StructType *STy = cast<StructType>(AgTy);
Value *Idxs[2] = {
ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
- Value *Idx = GetElementPtrInst::Create(*AI, Idxs, Idxs+2,
+ Value *Idx = GetElementPtrInst::Create(*AI, Idxs,
(*AI)->getName()+"."+utostr(i),
Call);
// TODO: Tell AA about the new values?
@@ -686,12 +686,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
LoadInst *OrigLoad = OriginalLoads[*SI];
if (!SI->empty()) {
Ops.reserve(SI->size());
- const Type *ElTy = V->getType();
+ Type *ElTy = V->getType();
for (IndicesVector::const_iterator II = SI->begin(),
IE = SI->end(); II != IE; ++II) {
// Use i32 to index structs, and i64 for others (pointers/arrays).
// This satisfies GEP constraints.
- const Type *IdxTy = (ElTy->isStructTy() ?
+ Type *IdxTy = (ElTy->isStructTy() ?
Type::getInt32Ty(F->getContext()) :
Type::getInt64Ty(F->getContext()));
Ops.push_back(ConstantInt::get(IdxTy, *II));
@@ -699,8 +699,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II);
}
// And create a GEP to extract those indices.
- V = GetElementPtrInst::Create(V, Ops.begin(), Ops.end(),
- V->getName()+".idx", Call);
+ V = GetElementPtrInst::Create(V, Ops, V->getName()+".idx", Call);
Ops.clear();
AA.copyValue(OrigLoad->getOperand(0), V);
}
@@ -792,16 +791,16 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
Instruction *InsertPt = NF->begin()->begin();
// Just add all the struct element types.
- const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+ Type *AgTy = cast<PointerType>(I->getType())->getElementType();
Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt);
- const StructType *STy = cast<StructType>(AgTy);
+ StructType *STy = cast<StructType>(AgTy);
Value *Idxs[2] = {
ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
Value *Idx =
- GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2,
+ GetElementPtrInst::Create(TheAlloca, Idxs,
TheAlloca->getName()+"."+Twine(i),
InsertPt);
I2->setName(I->getName()+"."+Twine(i));
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 3de7bfc..4d8dbc2 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -13,10 +13,20 @@ add_llvm_library(LLVMipo
Inliner.cpp
Internalize.cpp
LoopExtractor.cpp
- LowerSetJmp.cpp
MergeFunctions.cpp
PartialInlining.cpp
+ PassManagerBuilder.cpp
PruneEH.cpp
StripDeadPrototypes.cpp
StripSymbols.cpp
)
+
+add_llvm_library_dependencies(LLVMipo
+ LLVMAnalysis
+ LLVMCore
+ LLVMScalarOpts
+ LLVMSupport
+ LLVMTarget
+ LLVMTransformUtils
+ LLVMipa
+ )
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index a21efce..c3ecb7a 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -23,7 +23,9 @@
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
@@ -37,10 +39,18 @@ namespace {
initializeConstantMergePass(*PassRegistry::getPassRegistry());
}
- // run - For this pass, process all of the globals in the module,
- // eliminating duplicate constants.
- //
+ // For this pass, process all of the globals in the module, eliminating
+ // duplicate constants.
bool runOnModule(Module &M);
+
+ // Return true iff we can determine the alignment of this global variable.
+ bool hasKnownAlignment(GlobalVariable *GV) const;
+
+ // Return the alignment of the global, including converting the default
+ // alignment to a concrete value.
+ unsigned getAlignment(GlobalVariable *GV) const;
+
+ const TargetData *TD;
};
}
@@ -77,15 +87,28 @@ static bool IsBetterCannonical(const GlobalVariable &A,
return A.hasUnnamedAddr();
}
+bool ConstantMerge::hasKnownAlignment(GlobalVariable *GV) const {
+ return TD || GV->getAlignment() != 0;
+}
+
+unsigned ConstantMerge::getAlignment(GlobalVariable *GV) const {
+ if (TD)
+ return TD->getPreferredAlignment(GV);
+ return GV->getAlignment();
+}
+
bool ConstantMerge::runOnModule(Module &M) {
+ TD = getAnalysisIfAvailable<TargetData>();
+
// Find all the globals that are marked "used". These cannot be merged.
SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals);
FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals);
- // Map unique constant/section pairs to globals. We don't want to merge
- // globals in different sections.
- DenseMap<Constant*, GlobalVariable*> CMap;
+ // Map unique <constants, has-unknown-alignment> pairs to globals. We don't
+ // want to merge globals of unknown alignment with those of explicit
+ // alignment. If we have TargetData, we always know the alignment.
+ DenseMap<PointerIntPair<Constant*, 1, bool>, GlobalVariable*> CMap;
// Replacements - This vector contains a list of replacements to perform.
SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements;
@@ -120,7 +143,8 @@ bool ConstantMerge::runOnModule(Module &M) {
Constant *Init = GV->getInitializer();
// Check to see if the initializer is already known.
- GlobalVariable *&Slot = CMap[Init];
+ PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV));
+ GlobalVariable *&Slot = CMap[Pair];
// If this is the first constant we find or if the old on is local,
// replace with the current one. It the current is externally visible
@@ -152,7 +176,8 @@ bool ConstantMerge::runOnModule(Module &M) {
Constant *Init = GV->getInitializer();
// Check to see if the initializer is already known.
- GlobalVariable *Slot = CMap[Init];
+ PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV));
+ GlobalVariable *Slot = CMap[Pair];
if (!Slot || Slot == GV)
continue;
@@ -175,6 +200,14 @@ bool ConstantMerge::runOnModule(Module &M) {
// now. This avoid invalidating the pointers in CMap, which are unneeded
// now.
for (unsigned i = 0, e = Replacements.size(); i != e; ++i) {
+ // Bump the alignment if necessary.
+ if (Replacements[i].first->getAlignment() ||
+ Replacements[i].second->getAlignment()) {
+ Replacements[i].second->setAlignment(std::max(
+ Replacements[i].first->getAlignment(),
+ Replacements[i].second->getAlignment()));
+ }
+
// Eliminate any uses of the dead global.
Replacements[i].first->replaceAllUsesWith(Replacements[i].second);
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 1517765..4bb6f7a 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -206,7 +206,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
// Start by computing a new prototype for the function, which is the same as
// the old function, but doesn't have isVarArg set.
- const FunctionType *FTy = Fn.getFunctionType();
+ FunctionType *FTy = Fn.getFunctionType();
std::vector<Type*> Params(FTy->param_begin(), FTy->param_end());
FunctionType *NFTy = FunctionType::get(FTy->getReturnType(),
@@ -344,7 +344,7 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
static unsigned NumRetVals(const Function *F) {
if (F->getReturnType()->isVoidTy())
return 0;
- else if (const StructType *STy = dyn_cast<StructType>(F->getReturnType()))
+ else if (StructType *STy = dyn_cast<StructType>(F->getReturnType()))
return STy->getNumElements();
else
return 1;
@@ -491,7 +491,7 @@ void DAE::SurveyFunction(const Function &F) {
// Keep track of the number of live retvals, so we can skip checks once all
// of them turn out to be live.
unsigned NumLiveRetVals = 0;
- const Type *STy = dyn_cast<StructType>(F.getReturnType());
+ Type *STy = dyn_cast<StructType>(F.getReturnType());
// Loop all uses of the function.
for (Value::const_use_iterator I = F.use_begin(), E = F.use_end();
I != E; ++I) {
@@ -646,7 +646,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
// Start by computing a new prototype for the function, which is the same as
// the old function, but has fewer arguments and a different return type.
- const FunctionType *FTy = F->getFunctionType();
+ FunctionType *FTy = F->getFunctionType();
std::vector<Type*> Params;
// Set up to build a new list of parameter attributes.
@@ -660,7 +660,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
// Find out the new return value.
Type *RetTy = FTy->getReturnType();
- const Type *NRetTy = NULL;
+ Type *NRetTy = NULL;
unsigned RetCount = NumRetVals(F);
// -1 means unused, other numbers are the new index
@@ -669,7 +669,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
if (RetTy->isVoidTy()) {
NRetTy = RetTy;
} else {
- const StructType *STy = dyn_cast<StructType>(RetTy);
+ StructType *STy = dyn_cast<StructType>(RetTy);
if (STy)
// Look at each of the original return values individually.
for (unsigned i = 0; i != RetCount; ++i) {
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 95decec..0edf342 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -163,14 +163,14 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
ReadsMemory = true;
continue;
} else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- // Ignore non-volatile loads from local memory.
+ // Ignore non-volatile loads from local memory. (Atomic is okay here.)
if (!LI->isVolatile()) {
AliasAnalysis::Location Loc = AA->getLocation(LI);
if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
}
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- // Ignore non-volatile stores to local memory.
+ // Ignore non-volatile stores to local memory. (Atomic is okay here.)
if (!SI->isVolatile()) {
AliasAnalysis::Location Loc = AA->getLocation(SI);
if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 4ac721d..3552d03 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -195,12 +195,14 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
}
if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
GS.isLoaded = true;
- if (LI->isVolatile()) return true; // Don't hack on volatile loads.
+ // Don't hack on volatile/atomic loads.
+ if (!LI->isSimple()) return true;
} else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
// Don't allow a store OF the address, only stores TO the address.
if (SI->getOperand(0) == V) return true;
- if (SI->isVolatile()) return true; // Don't hack on volatile stores.
+ // Don't hack on volatile/atomic stores.
+ if (!SI->isSimple()) return true;
// If this is a direct store to the global (i.e., the global is a scalar
// value, not an aggregate), keep more specific information about
@@ -281,18 +283,18 @@ static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) {
} else if (ConstantVector *CP = dyn_cast<ConstantVector>(Agg)) {
if (IdxV < CP->getNumOperands()) return CP->getOperand(IdxV);
} else if (isa<ConstantAggregateZero>(Agg)) {
- if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) {
+ if (StructType *STy = dyn_cast<StructType>(Agg->getType())) {
if (IdxV < STy->getNumElements())
return Constant::getNullValue(STy->getElementType(IdxV));
- } else if (const SequentialType *STy =
+ } else if (SequentialType *STy =
dyn_cast<SequentialType>(Agg->getType())) {
return Constant::getNullValue(STy->getElementType());
}
} else if (isa<UndefValue>(Agg)) {
- if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) {
+ if (StructType *STy = dyn_cast<StructType>(Agg->getType())) {
if (IdxV < STy->getNumElements())
return UndefValue::get(STy->getElementType(IdxV));
- } else if (const SequentialType *STy =
+ } else if (SequentialType *STy =
dyn_cast<SequentialType>(Agg->getType())) {
return UndefValue::get(STy->getElementType());
}
@@ -430,7 +432,7 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
++GEPI; // Skip over the pointer index.
// If this is a use of an array allocation, do a bit more checking for sanity.
- if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) {
+ if (ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) {
uint64_t NumElements = AT->getNumElements();
ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2));
@@ -451,9 +453,9 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
GEPI != E;
++GEPI) {
uint64_t NumElements;
- if (const ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI))
+ if (ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI))
NumElements = SubArrayTy->getNumElements();
- else if (const VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI))
+ else if (VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI))
NumElements = SubVectorTy->getNumElements();
else {
assert((*GEPI)->isStructTy() &&
@@ -498,7 +500,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
assert(GV->hasLocalLinkage() && !GV->isConstant());
Constant *Init = GV->getInitializer();
- const Type *Ty = Init->getType();
+ Type *Ty = Init->getType();
std::vector<GlobalVariable*> NewGlobals;
Module::GlobalListType &Globals = GV->getParent()->getGlobalList();
@@ -508,7 +510,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
if (StartAlignment == 0)
StartAlignment = TD.getABITypeAlignment(GV->getType());
- if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ if (StructType *STy = dyn_cast<StructType>(Ty)) {
NewGlobals.reserve(STy->getNumElements());
const StructLayout &Layout = *TD.getStructLayout(STy);
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
@@ -531,9 +533,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
if (NewAlign > TD.getABITypeAlignment(STy->getElementType(i)))
NGV->setAlignment(NewAlign);
}
- } else if (const SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
+ } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
unsigned NumElements = 0;
- if (const ArrayType *ATy = dyn_cast<ArrayType>(STy))
+ if (ArrayType *ATy = dyn_cast<ArrayType>(STy))
NumElements = ATy->getNumElements();
else
NumElements = cast<VectorType>(STy)->getNumElements();
@@ -596,15 +598,14 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
Idxs.push_back(NullInt);
for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i)
Idxs.push_back(CE->getOperand(i));
- NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr),
- &Idxs[0], Idxs.size());
+ NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), Idxs);
} else {
GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);
SmallVector<Value*, 8> Idxs;
Idxs.push_back(NullInt);
for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
Idxs.push_back(GEPI->getOperand(i));
- NewPtr = GetElementPtrInst::Create(NewPtr, Idxs.begin(), Idxs.end(),
+ NewPtr = GetElementPtrInst::Create(NewPtr, Idxs,
GEPI->getName()+"."+Twine(Val),GEPI);
}
}
@@ -753,8 +754,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
break;
if (Idxs.size() == GEPI->getNumOperands()-1)
Changed |= OptimizeAwayTrappingUsesOfValue(GEPI,
- ConstantExpr::getGetElementPtr(NewV, &Idxs[0],
- Idxs.size()));
+ ConstantExpr::getGetElementPtr(NewV, Idxs));
if (GEPI->use_empty()) {
Changed = true;
GEPI->eraseFromParent();
@@ -846,12 +846,12 @@ static void ConstantPropUsersOf(Value *V) {
/// malloc into a global, and any loads of GV as uses of the new global.
static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
CallInst *CI,
- const Type *AllocTy,
+ Type *AllocTy,
ConstantInt *NElements,
TargetData* TD) {
DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n');
- const Type *GlobalType;
+ Type *GlobalType;
if (NElements->getZExtValue() == 1)
GlobalType = AllocTy;
else
@@ -1192,7 +1192,7 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
} else if (PHINode *PN = dyn_cast<PHINode>(V)) {
// PN's type is pointer to struct. Make a new PHI of pointer to struct
// field.
- const StructType *ST =
+ StructType *ST =
cast<StructType>(cast<PointerType>(PN->getType())->getElementType());
PHINode *NewPN =
@@ -1245,8 +1245,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
GEPIdx.push_back(GEPI->getOperand(1));
GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
- Value *NGEPI = GetElementPtrInst::Create(NewPtr,
- GEPIdx.begin(), GEPIdx.end(),
+ Value *NGEPI = GetElementPtrInst::Create(NewPtr, GEPIdx,
GEPI->getName(), GEPI);
GEPI->replaceAllUsesWith(NGEPI);
GEPI->eraseFromParent();
@@ -1260,11 +1259,9 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
// already been seen first by another load, so its uses have already been
// processed.
PHINode *PN = cast<PHINode>(LoadUser);
- bool Inserted;
- DenseMap<Value*, std::vector<Value*> >::iterator InsertPos;
- tie(InsertPos, Inserted) =
- InsertedScalarizedValues.insert(std::make_pair(PN, std::vector<Value*>()));
- if (!Inserted) return;
+ if (!InsertedScalarizedValues.insert(std::make_pair(PN,
+ std::vector<Value*>())).second)
+ return;
// If this is the first time we've seen this PHI, recursively process all
// users.
@@ -1298,8 +1295,8 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
Value* NElems, TargetData *TD) {
DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n');
- const Type* MAT = getMallocAllocatedType(CI);
- const StructType *STy = cast<StructType>(MAT);
+ Type* MAT = getMallocAllocatedType(CI);
+ StructType *STy = cast<StructType>(MAT);
// There is guaranteed to be at least one use of the malloc (storing
// it into GV). If there are other uses, change them to be uses of
@@ -1313,8 +1310,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
std::vector<Value*> FieldMallocs;
for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
- const Type *FieldTy = STy->getElementType(FieldNo);
- const PointerType *PFieldTy = PointerType::getUnqual(FieldTy);
+ Type *FieldTy = STy->getElementType(FieldNo);
+ PointerType *PFieldTy = PointerType::getUnqual(FieldTy);
GlobalVariable *NGV =
new GlobalVariable(*GV->getParent(),
@@ -1325,9 +1322,9 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
FieldGlobals.push_back(NGV);
unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
- if (const StructType *ST = dyn_cast<StructType>(FieldTy))
+ if (StructType *ST = dyn_cast<StructType>(FieldTy))
TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
- const Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
+ Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
ConstantInt::get(IntPtrTy, TypeSize),
NElems, 0,
@@ -1379,8 +1376,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
- Constant::getNullValue(GVVal->getType()),
- "tmp");
+ Constant::getNullValue(GVVal->getType()));
BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
OrigBB->getParent());
BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next",
@@ -1428,7 +1424,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
// Insert a store of null into each global.
for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
- const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
+ PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
Constant *Null = Constant::getNullValue(PT->getElementType());
new StoreInst(Null, FieldGlobals[i], SI);
}
@@ -1485,7 +1481,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
/// cast of malloc.
static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
CallInst *CI,
- const Type *AllocTy,
+ Type *AllocTy,
Module::global_iterator &GVI,
TargetData *TD) {
if (!TD)
@@ -1538,10 +1534,10 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// If this is an allocation of a fixed size array of structs, analyze as a
// variable size array. malloc [100 x struct],1 -> malloc struct, 100
if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
- if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
+ if (ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
AllocTy = AT->getElementType();
- const StructType *AllocSTy = dyn_cast<StructType>(AllocTy);
+ StructType *AllocSTy = dyn_cast<StructType>(AllocTy);
if (!AllocSTy)
return false;
@@ -1552,8 +1548,8 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// If this is a fixed size array, transform the Malloc to be an alloc of
// structs. malloc [100 x struct],1 -> malloc struct, 100
- if (const ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) {
- const Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
+ if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) {
+ Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
@@ -1596,7 +1592,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC))
return true;
} else if (CallInst *CI = extractMallocCall(StoredOnceVal)) {
- const Type* MallocType = getMallocAllocatedType(CI);
+ Type* MallocType = getMallocAllocatedType(CI);
if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
GVI, TD))
return true;
@@ -1611,7 +1607,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
/// can shrink the global into a boolean and select between the two values
/// whenever it is used. This exposes the values to other scalar optimizations.
static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
- const Type *GVElType = GV->getType()->getElementType();
+ Type *GVElType = GV->getType()->getElementType();
// If GVElType is already i1, it is already shrunk. If the type of the GV is
// an FP value, pointer or vector, don't do this optimization because a select
@@ -1761,7 +1757,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
Instruction& FirstI = const_cast<Instruction&>(*GS.AccessingFunction
->getEntryBlock().begin());
- const Type* ElemTy = GV->getType()->getElementType();
+ Type* ElemTy = GV->getType()->getElementType();
// FIXME: Pass Global's alignment when globals have alignment
AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI);
if (!isa<UndefValue>(GV->getInitializer()))
@@ -2003,7 +1999,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 65535);
CSVals[1] = 0;
- const StructType *StructTy =
+ StructType *StructTy =
cast <StructType>(
cast<ArrayType>(GCL->getType()->getElementType())->getElementType());
@@ -2013,9 +2009,9 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
if (Ctors[i]) {
CSVals[1] = Ctors[i];
} else {
- const Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()),
+ Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()),
false);
- const PointerType *PFTy = PointerType::getUnqual(FTy);
+ PointerType *PFTy = PointerType::getUnqual(FTy);
CSVals[1] = Constant::getNullValue(PFTy);
CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()),
0x7fffffff);
@@ -2196,7 +2192,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
}
std::vector<Constant*> Elts;
- if (const StructType *STy = dyn_cast<StructType>(Init->getType())) {
+ if (StructType *STy = dyn_cast<StructType>(Init->getType())) {
// Break up the constant into its elements.
if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) {
@@ -2224,10 +2220,10 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
}
ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
- const SequentialType *InitTy = cast<SequentialType>(Init->getType());
+ SequentialType *InitTy = cast<SequentialType>(Init->getType());
uint64_t NumElts;
- if (const ArrayType *ATy = dyn_cast<ArrayType>(InitTy))
+ if (ArrayType *ATy = dyn_cast<ArrayType>(InitTy))
NumElts = ATy->getNumElements();
else
NumElts = cast<VectorType>(InitTy)->getNumElements();
@@ -2338,7 +2334,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
Constant *InstResult = 0;
if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
- if (SI->isVolatile()) return false; // no volatile accesses.
+ if (!SI->isSimple()) return false; // no volatile/atomic accesses.
Constant *Ptr = getVal(Values, SI->getOperand(1));
if (!isSimpleEnoughPointerToCommit(Ptr))
// If this is too complex for us to commit, reject it.
@@ -2358,7 +2354,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
// stored value.
Ptr = CE->getOperand(0);
- const Type *NewTy=cast<PointerType>(Ptr->getType())->getElementType();
+ Type *NewTy=cast<PointerType>(Ptr->getType())->getElementType();
// In order to push the bitcast onto the stored value, a bitcast
// from NewTy to Val's type must be legal. If it's not, we can try
@@ -2367,14 +2363,14 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
// If NewTy is a struct, we can convert the pointer to the struct
// into a pointer to its first member.
// FIXME: This could be extended to support arrays as well.
- if (const StructType *STy = dyn_cast<StructType>(NewTy)) {
+ if (StructType *STy = dyn_cast<StructType>(NewTy)) {
NewTy = STy->getTypeAtIndex(0U);
- const IntegerType *IdxTy =IntegerType::get(NewTy->getContext(), 32);
+ IntegerType *IdxTy =IntegerType::get(NewTy->getContext(), 32);
Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
Constant * const IdxList[] = {IdxZero, IdxZero};
- Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList, 2);
+ Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList);
// If we can't improve the situation by introspecting NewTy,
// we have to give up.
@@ -2411,17 +2407,17 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
i != e; ++i)
GEPOps.push_back(getVal(Values, *i));
- InstResult = cast<GEPOperator>(GEP)->isInBounds() ?
- ConstantExpr::getInBoundsGetElementPtr(P, &GEPOps[0], GEPOps.size()) :
- ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size());
+ InstResult =
+ ConstantExpr::getGetElementPtr(P, GEPOps,
+ cast<GEPOperator>(GEP)->isInBounds());
} else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
- if (LI->isVolatile()) return false; // no volatile accesses.
+ if (!LI->isSimple()) return false; // no volatile/atomic accesses.
InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)),
MutatedMemory);
if (InstResult == 0) return false; // Could not evaluate load.
} else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
if (AI->isArrayAllocation()) return false; // Cannot handle array allocs.
- const Type *Ty = AI->getType()->getElementType();
+ Type *Ty = AI->getType()->getElementType();
AllocaTmps.push_back(new GlobalVariable(Ty, false,
GlobalValue::InternalLinkage,
UndefValue::get(Ty),
@@ -2465,8 +2461,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
if (Callee->isDeclaration()) {
// If this is a function we can constant fold, do it.
- if (Constant *C = ConstantFoldCall(Callee, Formals.data(),
- Formals.size())) {
+ if (Constant *C = ConstantFoldCall(Callee, Formals)) {
InstResult = C;
} else {
return false;
@@ -2512,7 +2507,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
CallStack.pop_back(); // return from fn.
return true; // We succeeded at evaluating this ctor!
} else {
- // invoke, unwind, unreachable.
+ // invoke, unwind, resume, unreachable.
return false; // Cannot handle this terminator.
}
@@ -2711,7 +2706,7 @@ static Function *FindCXAAtExit(Module &M) {
if (!Fn)
return 0;
- const FunctionType *FTy = Fn->getFunctionType();
+ FunctionType *FTy = Fn->getFunctionType();
// Checking that the function has the right return type, the right number of
// parameters and that they all have pointer types should be enough.
diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp
index 25c0134..d757e1f 100644
--- a/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -167,7 +167,7 @@ bool IPCP::PropagateConstantReturn(Function &F) {
// Check to see if this function returns a constant.
SmallVector<Value *,4> RetVals;
- const StructType *STy = dyn_cast<StructType>(F.getReturnType());
+ StructType *STy = dyn_cast<StructType>(F.getReturnType());
if (STy)
for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i)
RetVals.push_back(UndefValue::get(STy->getElementType(i)));
diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
index 31ce95f..6233922 100644
--- a/lib/Transforms/IPO/IPO.cpp
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -13,6 +13,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm-c/Initialization.h"
#include "llvm-c/Transforms/IPO.h"
#include "llvm/InitializePasses.h"
#include "llvm/PassManager.h"
@@ -35,7 +36,6 @@ void llvm::initializeIPO(PassRegistry &Registry) {
initializeLoopExtractorPass(Registry);
initializeBlockExtractorPassPass(Registry);
initializeSingleLoopExtractorPass(Registry);
- initializeLowerSetJmpPass(Registry);
initializeMergeFunctionsPass(Registry);
initializePartialInlinerPass(Registry);
initializePruneEHPass(Registry);
@@ -70,6 +70,10 @@ void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createFunctionInliningPass());
}
+void LLVMAddAlwaysInlinerPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(llvm::createAlwaysInlinerPass());
+}
+
void LLVMAddGlobalDCEPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createGlobalDCEPass());
}
@@ -82,10 +86,6 @@ void LLVMAddIPConstantPropagationPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createIPConstantPropagationPass());
}
-void LLVMAddLowerSetJmpPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createLowerSetJmpPass());
-}
-
void LLVMAddPruneEHPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createPruneEHPass());
}
@@ -98,11 +98,6 @@ void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) {
unwrap(PM)->add(createInternalizePass(AllButMain != 0));
}
-
-void LLVMAddRaiseAllocationsPass(LLVMPassManagerRef PM) {
- // FIXME: Remove in LLVM 3.0.
-}
-
void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createStripDeadPrototypesPass());
}
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index ce795b7..c0426da 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -23,6 +23,7 @@
#include "llvm/Support/CallSite.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/Target/TargetData.h"
#include "llvm/ADT/SmallPtrSet.h"
using namespace llvm;
@@ -32,10 +33,10 @@ namespace {
// AlwaysInliner only inlines functions that are mark as "always inline".
class AlwaysInliner : public Inliner {
// Functions that are never inlined
- SmallPtrSet<const Function*, 16> NeverInline;
+ SmallPtrSet<const Function*, 16> NeverInline;
InlineCostAnalyzer CA;
public:
- // Use extremely low threshold.
+ // Use extremely low threshold.
AlwaysInliner() : Inliner(ID, -2000000000) {
initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
}
@@ -52,8 +53,8 @@ namespace {
void growCachedCostInfo(Function* Caller, Function* Callee) {
CA.growCachedCostInfo(Caller, Callee);
}
- virtual bool doFinalization(CallGraph &CG) {
- return removeDeadFunctions(CG, &NeverInline);
+ virtual bool doFinalization(CallGraph &CG) {
+ return removeDeadFunctions(CG, &NeverInline);
}
virtual bool doInitialization(CallGraph &CG);
void releaseMemory() {
@@ -71,11 +72,13 @@ INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); }
-// doInitialization - Initializes the vector of functions that have not
+// doInitialization - Initializes the vector of functions that have not
// been annotated with the "always inline" attribute.
bool AlwaysInliner::doInitialization(CallGraph &CG) {
+ CA.setTargetData(getAnalysisIfAvailable<TargetData>());
+
Module &M = CG.getModule();
-
+
for (Module::iterator I = M.begin(), E = M.end();
I != E; ++I)
if (!I->isDeclaration() && !I->hasFnAttr(Attribute::AlwaysInline))
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index 0c5b3be..84dd4fd 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -22,6 +22,7 @@
#include "llvm/Support/CallSite.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/Target/TargetData.h"
#include "llvm/ADT/SmallPtrSet.h"
using namespace llvm;
@@ -30,7 +31,7 @@ namespace {
class SimpleInliner : public Inliner {
// Functions that are never inlined
- SmallPtrSet<const Function*, 16> NeverInline;
+ SmallPtrSet<const Function*, 16> NeverInline;
InlineCostAnalyzer CA;
public:
SimpleInliner() : Inliner(ID) {
@@ -68,16 +69,17 @@ INITIALIZE_PASS_END(SimpleInliner, "inline",
Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); }
-Pass *llvm::createFunctionInliningPass(int Threshold) {
+Pass *llvm::createFunctionInliningPass(int Threshold) {
return new SimpleInliner(Threshold);
}
// doInitialization - Initializes the vector of functions that have been
// annotated with the noinline attribute.
bool SimpleInliner::doInitialization(CallGraph &CG) {
-
+ CA.setTargetData(getAnalysisIfAvailable<TargetData>());
+
Module &M = CG.getModule();
-
+
for (Module::iterator I = M.begin(), E = M.end();
I != E; ++I)
if (!I->isDeclaration() && I->hasFnAttr(Attribute::NoInline))
@@ -85,34 +87,34 @@ bool SimpleInliner::doInitialization(CallGraph &CG) {
// Get llvm.noinline
GlobalVariable *GV = M.getNamedGlobal("llvm.noinline");
-
+
if (GV == 0)
return false;
// Don't crash on invalid code
if (!GV->hasDefinitiveInitializer())
return false;
-
+
const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
-
+
if (InitList == 0)
return false;
// Iterate over each element and add to the NeverInline set
for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
-
+
// Get Source
const Constant *Elt = InitList->getOperand(i);
-
+
if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(Elt))
- if (CE->getOpcode() == Instruction::BitCast)
+ if (CE->getOpcode() == Instruction::BitCast)
Elt = CE->getOperand(0);
-
+
// Insert into set of functions to never inline
if (const Function *F = dyn_cast<Function>(Elt))
NeverInline.insert(F);
}
-
+
return false;
}
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 57f3e77..f00935b 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -62,7 +62,7 @@ void Inliner::getAnalysisUsage(AnalysisUsage &Info) const {
}
-typedef DenseMap<const ArrayType*, std::vector<AllocaInst*> >
+typedef DenseMap<ArrayType*, std::vector<AllocaInst*> >
InlinedArrayAllocasTy;
/// InlineCallIfPossible - If it is possible to inline the specified call site,
@@ -139,7 +139,7 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
// Don't bother trying to merge array allocations (they will usually be
// canonicalized to be an allocation *of* an array), or allocations whose
// type is not itself an array (because we're afraid of pessimizing SRoA).
- const ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
+ ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
if (ATy == 0 || AI->isArrayAllocation())
continue;
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index 848944d..4f96afe4 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -23,6 +23,7 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/FunctionUtils.h"
#include "llvm/ADT/Statistic.h"
#include <fstream>
@@ -53,12 +54,12 @@ namespace {
char LoopExtractor::ID = 0;
INITIALIZE_PASS_BEGIN(LoopExtractor, "loop-extract",
- "Extract loops into new functions", false, false)
+ "Extract loops into new functions", false, false)
INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_END(LoopExtractor, "loop-extract",
- "Extract loops into new functions", false, false)
+ "Extract loops into new functions", false, false)
namespace {
/// SingleLoopExtractor - For bugpoint.
@@ -100,9 +101,9 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
L->getHeader()->getParent()->getEntryBlock().getTerminator();
if (!isa<BranchInst>(EntryTI) ||
!cast<BranchInst>(EntryTI)->isUnconditional() ||
- EntryTI->getSuccessor(0) != L->getHeader())
+ EntryTI->getSuccessor(0) != L->getHeader()) {
ShouldExtractLoop = true;
- else {
+ } else {
// Check to see if any exits from the loop are more than just return
// blocks.
SmallVector<BasicBlock*, 8> ExitBlocks;
@@ -113,6 +114,21 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
break;
}
}
+
+ if (ShouldExtractLoop) {
+ // We must omit landing pads. Landing pads must accompany the invoke
+ // instruction. But this would result in a loop in the extracted
+ // function. An infinite cycle occurs when it tries to extract that loop as
+ // well.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ if (ExitBlocks[i]->isLandingPad()) {
+ ShouldExtractLoop = false;
+ break;
+ }
+ }
+
if (ShouldExtractLoop) {
if (NumLoops == 0) return Changed;
--NumLoops;
@@ -149,6 +165,7 @@ namespace {
/// BlocksToNotExtract list.
class BlockExtractorPass : public ModulePass {
void LoadFile(const char *Filename);
+ void SplitLandingPadPreds(Function *F);
std::vector<BasicBlock*> BlocksToNotExtract;
std::vector<std::pair<std::string, std::string> > BlocksToNotExtractByName;
@@ -171,8 +188,7 @@ INITIALIZE_PASS(BlockExtractorPass, "extract-blocks",
// createBlockExtractorPass - This pass extracts all blocks (except those
// specified in the argument list) from the functions in the module.
//
-ModulePass *llvm::createBlockExtractorPass()
-{
+ModulePass *llvm::createBlockExtractorPass() {
return new BlockExtractorPass();
}
@@ -194,6 +210,37 @@ void BlockExtractorPass::LoadFile(const char *Filename) {
}
}
+/// SplitLandingPadPreds - The landing pad needs to be extracted with the invoke
+/// instruction. The critical edge breaker will refuse to break critical edges
+/// to a landing pad. So do them here. After this method runs, all landing pads
+/// should have only one predecessor.
+void BlockExtractorPass::SplitLandingPadPreds(Function *F) {
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+ InvokeInst *II = dyn_cast<InvokeInst>(I);
+ if (!II) continue;
+ BasicBlock *Parent = II->getParent();
+ BasicBlock *LPad = II->getUnwindDest();
+
+ // Look through the landing pad's predecessors. If one of them ends in an
+ // 'invoke', then we want to split the landing pad.
+ bool Split = false;
+ for (pred_iterator
+ PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ++PI) {
+ BasicBlock *BB = *PI;
+ if (BB->isLandingPad() && BB != Parent &&
+ isa<InvokeInst>(Parent->getTerminator())) {
+ Split = true;
+ break;
+ }
+ }
+
+ if (!Split) continue;
+
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", 0, NewBBs);
+ }
+}
+
bool BlockExtractorPass::runOnModule(Module &M) {
std::set<BasicBlock*> TranslatedBlocksToNotExtract;
for (unsigned i = 0, e = BlocksToNotExtract.size(); i != e; ++i) {
@@ -236,13 +283,21 @@ bool BlockExtractorPass::runOnModule(Module &M) {
// Now that we know which blocks to not extract, figure out which ones we WANT
// to extract.
std::vector<BasicBlock*> BlocksToExtract;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ SplitLandingPadPreds(&*F);
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
if (!TranslatedBlocksToNotExtract.count(BB))
BlocksToExtract.push_back(BB);
+ }
- for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i)
- ExtractBasicBlock(BlocksToExtract[i]);
+ for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i) {
+ SmallVector<BasicBlock*, 2> BlocksToExtractVec;
+ BlocksToExtractVec.push_back(BlocksToExtract[i]);
+ if (const InvokeInst *II =
+ dyn_cast<InvokeInst>(BlocksToExtract[i]->getTerminator()))
+ BlocksToExtractVec.push_back(II->getUnwindDest());
+ ExtractBasicBlock(BlocksToExtractVec);
+ }
return !BlocksToExtract.empty();
}
diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp
deleted file mode 100644
index 659476b..0000000
--- a/lib/Transforms/IPO/LowerSetJmp.cpp
+++ /dev/null
@@ -1,547 +0,0 @@
-//===- LowerSetJmp.cpp - Code pertaining to lowering set/long jumps -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the lowering of setjmp and longjmp to use the
-// LLVM invoke and unwind instructions as necessary.
-//
-// Lowering of longjmp is fairly trivial. We replace the call with a
-// call to the LLVM library function "__llvm_sjljeh_throw_longjmp()".
-// This unwinds the stack for us calling all of the destructors for
-// objects allocated on the stack.
-//
-// At a setjmp call, the basic block is split and the setjmp removed.
-// The calls in a function that have a setjmp are converted to invoke
-// where the except part checks to see if it's a longjmp exception and,
-// if so, if it's handled in the function. If it is, then it gets the
-// value returned by the longjmp and goes to where the basic block was
-// split. Invoke instructions are handled in a similar fashion with the
-// original except block being executed if it isn't a longjmp except
-// that is handled by that function.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// FIXME: This pass doesn't deal with PHI statements just yet. That is,
-// we expect this to occur before SSAification is done. This would seem
-// to make sense, but in general, it might be a good idea to make this
-// pass invokable via the "opt" command at will.
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "lowersetjmp"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/InstVisitor.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/Statistic.h"
-#include <map>
-using namespace llvm;
-
-STATISTIC(LongJmpsTransformed, "Number of longjmps transformed");
-STATISTIC(SetJmpsTransformed , "Number of setjmps transformed");
-STATISTIC(CallsTransformed , "Number of calls invokified");
-STATISTIC(InvokesTransformed , "Number of invokes modified");
-
-namespace {
- //===--------------------------------------------------------------------===//
- // LowerSetJmp pass implementation.
- class LowerSetJmp : public ModulePass, public InstVisitor<LowerSetJmp> {
- // LLVM library functions...
- Constant *InitSJMap; // __llvm_sjljeh_init_setjmpmap
- Constant *DestroySJMap; // __llvm_sjljeh_destroy_setjmpmap
- Constant *AddSJToMap; // __llvm_sjljeh_add_setjmp_to_map
- Constant *ThrowLongJmp; // __llvm_sjljeh_throw_longjmp
- Constant *TryCatchLJ; // __llvm_sjljeh_try_catching_longjmp_exception
- Constant *IsLJException; // __llvm_sjljeh_is_longjmp_exception
- Constant *GetLJValue; // __llvm_sjljeh_get_longjmp_value
-
- typedef std::pair<SwitchInst*, CallInst*> SwitchValuePair;
-
- // Keep track of those basic blocks reachable via a depth-first search of
- // the CFG from a setjmp call. We only need to transform those "call" and
- // "invoke" instructions that are reachable from the setjmp call site.
- std::set<BasicBlock*> DFSBlocks;
-
- // The setjmp map is going to hold information about which setjmps
- // were called (each setjmp gets its own number) and with which
- // buffer it was called.
- std::map<Function*, AllocaInst*> SJMap;
-
- // The rethrow basic block map holds the basic block to branch to if
- // the exception isn't handled in the current function and needs to
- // be rethrown.
- std::map<const Function*, BasicBlock*> RethrowBBMap;
-
- // The preliminary basic block map holds a basic block that grabs the
- // exception and determines if it's handled by the current function.
- std::map<const Function*, BasicBlock*> PrelimBBMap;
-
- // The switch/value map holds a switch inst/call inst pair. The
- // switch inst controls which handler (if any) gets called and the
- // value is the value returned to that handler by the call to
- // __llvm_sjljeh_get_longjmp_value.
- std::map<const Function*, SwitchValuePair> SwitchValMap;
-
- // A map of which setjmps we've seen so far in a function.
- std::map<const Function*, unsigned> SetJmpIDMap;
-
- AllocaInst* GetSetJmpMap(Function* Func);
- BasicBlock* GetRethrowBB(Function* Func);
- SwitchValuePair GetSJSwitch(Function* Func, BasicBlock* Rethrow);
-
- void TransformLongJmpCall(CallInst* Inst);
- void TransformSetJmpCall(CallInst* Inst);
-
- bool IsTransformableFunction(StringRef Name);
- public:
- static char ID; // Pass identification, replacement for typeid
- LowerSetJmp() : ModulePass(ID) {
- initializeLowerSetJmpPass(*PassRegistry::getPassRegistry());
- }
-
- void visitCallInst(CallInst& CI);
- void visitInvokeInst(InvokeInst& II);
- void visitReturnInst(ReturnInst& RI);
- void visitUnwindInst(UnwindInst& UI);
-
- bool runOnModule(Module& M);
- bool doInitialization(Module& M);
- };
-} // end anonymous namespace
-
-char LowerSetJmp::ID = 0;
-INITIALIZE_PASS(LowerSetJmp, "lowersetjmp", "Lower Set Jump", false, false)
-
-// run - Run the transformation on the program. We grab the function
-// prototypes for longjmp and setjmp. If they are used in the program,
-// then we can go directly to the places they're at and transform them.
-bool LowerSetJmp::runOnModule(Module& M) {
- bool Changed = false;
-
- // These are what the functions are called.
- Function* SetJmp = M.getFunction("llvm.setjmp");
- Function* LongJmp = M.getFunction("llvm.longjmp");
-
- // This program doesn't have longjmp and setjmp calls.
- if ((!LongJmp || LongJmp->use_empty()) &&
- (!SetJmp || SetJmp->use_empty())) return false;
-
- // Initialize some values and functions we'll need to transform the
- // setjmp/longjmp functions.
- doInitialization(M);
-
- if (SetJmp) {
- for (Value::use_iterator B = SetJmp->use_begin(), E = SetJmp->use_end();
- B != E; ++B) {
- BasicBlock* BB = cast<Instruction>(*B)->getParent();
- for (df_ext_iterator<BasicBlock*> I = df_ext_begin(BB, DFSBlocks),
- E = df_ext_end(BB, DFSBlocks); I != E; ++I)
- /* empty */;
- }
-
- while (!SetJmp->use_empty()) {
- assert(isa<CallInst>(SetJmp->use_back()) &&
- "User of setjmp intrinsic not a call?");
- TransformSetJmpCall(cast<CallInst>(SetJmp->use_back()));
- Changed = true;
- }
- }
-
- if (LongJmp)
- while (!LongJmp->use_empty()) {
- assert(isa<CallInst>(LongJmp->use_back()) &&
- "User of longjmp intrinsic not a call?");
- TransformLongJmpCall(cast<CallInst>(LongJmp->use_back()));
- Changed = true;
- }
-
- // Now go through the affected functions and convert calls and invokes
- // to new invokes...
- for (std::map<Function*, AllocaInst*>::iterator
- B = SJMap.begin(), E = SJMap.end(); B != E; ++B) {
- Function* F = B->first;
- for (Function::iterator BB = F->begin(), BE = F->end(); BB != BE; ++BB)
- for (BasicBlock::iterator IB = BB->begin(), IE = BB->end(); IB != IE; ) {
- visit(*IB++);
- if (IB != BB->end() && IB->getParent() != BB)
- break; // The next instruction got moved to a different block!
- }
- }
-
- DFSBlocks.clear();
- SJMap.clear();
- RethrowBBMap.clear();
- PrelimBBMap.clear();
- SwitchValMap.clear();
- SetJmpIDMap.clear();
-
- return Changed;
-}
-
-// doInitialization - For the lower long/setjmp pass, this ensures that a
-// module contains a declaration for the intrisic functions we are going
-// to call to convert longjmp and setjmp calls.
-//
-// This function is always successful, unless it isn't.
-bool LowerSetJmp::doInitialization(Module& M)
-{
- const Type *SBPTy = Type::getInt8PtrTy(M.getContext());
- const Type *SBPPTy = PointerType::getUnqual(SBPTy);
-
- // N.B. See llvm/runtime/GCCLibraries/libexception/SJLJ-Exception.h for
- // a description of the following library functions.
-
- // void __llvm_sjljeh_init_setjmpmap(void**)
- InitSJMap = M.getOrInsertFunction("__llvm_sjljeh_init_setjmpmap",
- Type::getVoidTy(M.getContext()),
- SBPPTy, (Type *)0);
- // void __llvm_sjljeh_destroy_setjmpmap(void**)
- DestroySJMap = M.getOrInsertFunction("__llvm_sjljeh_destroy_setjmpmap",
- Type::getVoidTy(M.getContext()),
- SBPPTy, (Type *)0);
-
- // void __llvm_sjljeh_add_setjmp_to_map(void**, void*, unsigned)
- AddSJToMap = M.getOrInsertFunction("__llvm_sjljeh_add_setjmp_to_map",
- Type::getVoidTy(M.getContext()),
- SBPPTy, SBPTy,
- Type::getInt32Ty(M.getContext()),
- (Type *)0);
-
- // void __llvm_sjljeh_throw_longjmp(int*, int)
- ThrowLongJmp = M.getOrInsertFunction("__llvm_sjljeh_throw_longjmp",
- Type::getVoidTy(M.getContext()), SBPTy,
- Type::getInt32Ty(M.getContext()),
- (Type *)0);
-
- // unsigned __llvm_sjljeh_try_catching_longjmp_exception(void **)
- TryCatchLJ =
- M.getOrInsertFunction("__llvm_sjljeh_try_catching_longjmp_exception",
- Type::getInt32Ty(M.getContext()), SBPPTy, (Type *)0);
-
- // bool __llvm_sjljeh_is_longjmp_exception()
- IsLJException = M.getOrInsertFunction("__llvm_sjljeh_is_longjmp_exception",
- Type::getInt1Ty(M.getContext()),
- (Type *)0);
-
- // int __llvm_sjljeh_get_longjmp_value()
- GetLJValue = M.getOrInsertFunction("__llvm_sjljeh_get_longjmp_value",
- Type::getInt32Ty(M.getContext()),
- (Type *)0);
- return true;
-}
-
-// IsTransformableFunction - Return true if the function name isn't one
-// of the ones we don't want transformed. Currently, don't transform any
-// "llvm.{setjmp,longjmp}" functions and none of the setjmp/longjmp error
-// handling functions (beginning with __llvm_sjljeh_...they don't throw
-// exceptions).
-bool LowerSetJmp::IsTransformableFunction(StringRef Name) {
- return !Name.startswith("__llvm_sjljeh_");
-}
-
-// TransformLongJmpCall - Transform a longjmp call into a call to the
-// internal __llvm_sjljeh_throw_longjmp function. It then takes care of
-// throwing the exception for us.
-void LowerSetJmp::TransformLongJmpCall(CallInst* Inst)
-{
- const Type* SBPTy = Type::getInt8PtrTy(Inst->getContext());
-
- // Create the call to "__llvm_sjljeh_throw_longjmp". This takes the
- // same parameters as "longjmp", except that the buffer is cast to a
- // char*. It returns "void", so it doesn't need to replace any of
- // Inst's uses and doesn't get a name.
- CastInst* CI =
- new BitCastInst(Inst->getArgOperand(0), SBPTy, "LJBuf", Inst);
- Value *Args[] = { CI, Inst->getArgOperand(1) };
- CallInst::Create(ThrowLongJmp, Args, "", Inst);
-
- SwitchValuePair& SVP = SwitchValMap[Inst->getParent()->getParent()];
-
- // If the function has a setjmp call in it (they are transformed first)
- // we should branch to the basic block that determines if this longjmp
- // is applicable here. Otherwise, issue an unwind.
- if (SVP.first)
- BranchInst::Create(SVP.first->getParent(), Inst);
- else
- new UnwindInst(Inst->getContext(), Inst);
-
- // Remove all insts after the branch/unwind inst. Go from back to front to
- // avoid replaceAllUsesWith if possible.
- BasicBlock *BB = Inst->getParent();
- Instruction *Removed;
- do {
- Removed = &BB->back();
- // If the removed instructions have any users, replace them now.
- if (!Removed->use_empty())
- Removed->replaceAllUsesWith(UndefValue::get(Removed->getType()));
- Removed->eraseFromParent();
- } while (Removed != Inst);
-
- ++LongJmpsTransformed;
-}
-
-// GetSetJmpMap - Retrieve (create and initialize, if necessary) the
-// setjmp map. This map is going to hold information about which setjmps
-// were called (each setjmp gets its own number) and with which buffer it
-// was called. There can be only one!
-AllocaInst* LowerSetJmp::GetSetJmpMap(Function* Func)
-{
- if (SJMap[Func]) return SJMap[Func];
-
- // Insert the setjmp map initialization before the first instruction in
- // the function.
- Instruction* Inst = Func->getEntryBlock().begin();
- assert(Inst && "Couldn't find even ONE instruction in entry block!");
-
- // Fill in the alloca and call to initialize the SJ map.
- const Type *SBPTy =
- Type::getInt8PtrTy(Func->getContext());
- AllocaInst* Map = new AllocaInst(SBPTy, 0, "SJMap", Inst);
- CallInst::Create(InitSJMap, Map, "", Inst);
- return SJMap[Func] = Map;
-}
-
-// GetRethrowBB - Only one rethrow basic block is needed per function.
-// If this is a longjmp exception but not handled in this block, this BB
-// performs the rethrow.
-BasicBlock* LowerSetJmp::GetRethrowBB(Function* Func)
-{
- if (RethrowBBMap[Func]) return RethrowBBMap[Func];
-
- // The basic block we're going to jump to if we need to rethrow the
- // exception.
- BasicBlock* Rethrow =
- BasicBlock::Create(Func->getContext(), "RethrowExcept", Func);
-
- // Fill in the "Rethrow" BB with a call to rethrow the exception. This
- // is the last instruction in the BB since at this point the runtime
- // should exit this function and go to the next function.
- new UnwindInst(Func->getContext(), Rethrow);
- return RethrowBBMap[Func] = Rethrow;
-}
-
-// GetSJSwitch - Return the switch statement that controls which handler
-// (if any) gets called and the value returned to that handler.
-LowerSetJmp::SwitchValuePair LowerSetJmp::GetSJSwitch(Function* Func,
- BasicBlock* Rethrow)
-{
- if (SwitchValMap[Func].first) return SwitchValMap[Func];
-
- BasicBlock* LongJmpPre =
- BasicBlock::Create(Func->getContext(), "LongJmpBlkPre", Func);
-
- // Keep track of the preliminary basic block for some of the other
- // transformations.
- PrelimBBMap[Func] = LongJmpPre;
-
- // Grab the exception.
- CallInst* Cond = CallInst::Create(IsLJException, "IsLJExcept", LongJmpPre);
-
- // The "decision basic block" gets the number associated with the
- // setjmp call returning to switch on and the value returned by
- // longjmp.
- BasicBlock* DecisionBB =
- BasicBlock::Create(Func->getContext(), "LJDecisionBB", Func);
-
- BranchInst::Create(DecisionBB, Rethrow, Cond, LongJmpPre);
-
- // Fill in the "decision" basic block.
- CallInst* LJVal = CallInst::Create(GetLJValue, "LJVal", DecisionBB);
- CallInst* SJNum = CallInst::Create(TryCatchLJ, GetSetJmpMap(Func), "SJNum",
- DecisionBB);
-
- SwitchInst* SI = SwitchInst::Create(SJNum, Rethrow, 0, DecisionBB);
- return SwitchValMap[Func] = SwitchValuePair(SI, LJVal);
-}
-
-// TransformSetJmpCall - The setjmp call is a bit trickier to transform.
-// We're going to convert all setjmp calls to nops. Then all "call" and
-// "invoke" instructions in the function are converted to "invoke" where
-// the "except" branch is used when returning from a longjmp call.
-void LowerSetJmp::TransformSetJmpCall(CallInst* Inst)
-{
- BasicBlock* ABlock = Inst->getParent();
- Function* Func = ABlock->getParent();
-
- // Add this setjmp to the setjmp map.
- const Type* SBPTy =
- Type::getInt8PtrTy(Inst->getContext());
- CastInst* BufPtr =
- new BitCastInst(Inst->getArgOperand(0), SBPTy, "SBJmpBuf", Inst);
- Value *Args[] = {
- GetSetJmpMap(Func), BufPtr,
- ConstantInt::get(Type::getInt32Ty(Inst->getContext()), SetJmpIDMap[Func]++)
- };
- CallInst::Create(AddSJToMap, Args, "", Inst);
-
- // We are guaranteed that there are no values live across basic blocks
- // (because we are "not in SSA form" yet), but there can still be values live
- // in basic blocks. Because of this, splitting the setjmp block can cause
- // values above the setjmp to not dominate uses which are after the setjmp
- // call. For all of these occasions, we must spill the value to the stack.
- //
- std::set<Instruction*> InstrsAfterCall;
-
- // The call is probably very close to the end of the basic block, for the
- // common usage pattern of: 'if (setjmp(...))', so keep track of the
- // instructions after the call.
- for (BasicBlock::iterator I = ++BasicBlock::iterator(Inst), E = ABlock->end();
- I != E; ++I)
- InstrsAfterCall.insert(I);
-
- for (BasicBlock::iterator II = ABlock->begin();
- II != BasicBlock::iterator(Inst); ++II)
- // Loop over all of the uses of instruction. If any of them are after the
- // call, "spill" the value to the stack.
- for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
- UI != E; ++UI) {
- User *U = *UI;
- if (cast<Instruction>(U)->getParent() != ABlock ||
- InstrsAfterCall.count(cast<Instruction>(U))) {
- DemoteRegToStack(*II);
- break;
- }
- }
- InstrsAfterCall.clear();
-
- // Change the setjmp call into a branch statement. We'll remove the
- // setjmp call in a little bit. No worries.
- BasicBlock* SetJmpContBlock = ABlock->splitBasicBlock(Inst);
- assert(SetJmpContBlock && "Couldn't split setjmp BB!!");
-
- SetJmpContBlock->setName(ABlock->getName()+"SetJmpCont");
-
- // Add the SetJmpContBlock to the set of blocks reachable from a setjmp.
- DFSBlocks.insert(SetJmpContBlock);
-
- // This PHI node will be in the new block created from the
- // splitBasicBlock call.
- PHINode* PHI = PHINode::Create(Type::getInt32Ty(Inst->getContext()), 2,
- "SetJmpReturn", Inst);
-
- // Coming from a call to setjmp, the return is 0.
- PHI->addIncoming(Constant::getNullValue(Type::getInt32Ty(Inst->getContext())),
- ABlock);
-
- // Add the case for this setjmp's number...
- SwitchValuePair SVP = GetSJSwitch(Func, GetRethrowBB(Func));
- SVP.first->addCase(ConstantInt::get(Type::getInt32Ty(Inst->getContext()),
- SetJmpIDMap[Func] - 1),
- SetJmpContBlock);
-
- // Value coming from the handling of the exception.
- PHI->addIncoming(SVP.second, SVP.second->getParent());
-
- // Replace all uses of this instruction with the PHI node created by
- // the eradication of setjmp.
- Inst->replaceAllUsesWith(PHI);
- Inst->eraseFromParent();
-
- ++SetJmpsTransformed;
-}
-
-// visitCallInst - This converts all LLVM call instructions into invoke
-// instructions. The except part of the invoke goes to the "LongJmpBlkPre"
-// that grabs the exception and proceeds to determine if it's a longjmp
-// exception or not.
-void LowerSetJmp::visitCallInst(CallInst& CI)
-{
- if (CI.getCalledFunction())
- if (!IsTransformableFunction(CI.getCalledFunction()->getName()) ||
- CI.getCalledFunction()->isIntrinsic()) return;
-
- BasicBlock* OldBB = CI.getParent();
-
- // If not reachable from a setjmp call, don't transform.
- if (!DFSBlocks.count(OldBB)) return;
-
- BasicBlock* NewBB = OldBB->splitBasicBlock(CI);
- assert(NewBB && "Couldn't split BB of \"call\" instruction!!");
- DFSBlocks.insert(NewBB);
- NewBB->setName("Call2Invoke");
-
- Function* Func = OldBB->getParent();
-
- // Construct the new "invoke" instruction.
- TerminatorInst* Term = OldBB->getTerminator();
- CallSite CS(&CI);
- std::vector<Value*> Params(CS.arg_begin(), CS.arg_end());
- InvokeInst* II =
- InvokeInst::Create(CI.getCalledValue(), NewBB, PrelimBBMap[Func],
- Params, CI.getName(), Term);
- II->setCallingConv(CI.getCallingConv());
- II->setAttributes(CI.getAttributes());
-
- // Replace the old call inst with the invoke inst and remove the call.
- CI.replaceAllUsesWith(II);
- CI.eraseFromParent();
-
- // The old terminator is useless now that we have the invoke inst.
- Term->eraseFromParent();
- ++CallsTransformed;
-}
-
-// visitInvokeInst - Converting the "invoke" instruction is fairly
-// straight-forward. The old exception part is replaced by a query asking
-// if this is a longjmp exception. If it is, then it goes to the longjmp
-// exception blocks. Otherwise, control is passed the old exception.
-void LowerSetJmp::visitInvokeInst(InvokeInst& II)
-{
- if (II.getCalledFunction())
- if (!IsTransformableFunction(II.getCalledFunction()->getName()) ||
- II.getCalledFunction()->isIntrinsic()) return;
-
- BasicBlock* BB = II.getParent();
-
- // If not reachable from a setjmp call, don't transform.
- if (!DFSBlocks.count(BB)) return;
-
- BasicBlock* ExceptBB = II.getUnwindDest();
-
- Function* Func = BB->getParent();
- BasicBlock* NewExceptBB = BasicBlock::Create(II.getContext(),
- "InvokeExcept", Func);
-
- // If this is a longjmp exception, then branch to the preliminary BB of
- // the longjmp exception handling. Otherwise, go to the old exception.
- CallInst* IsLJExcept = CallInst::Create(IsLJException, "IsLJExcept",
- NewExceptBB);
-
- BranchInst::Create(PrelimBBMap[Func], ExceptBB, IsLJExcept, NewExceptBB);
-
- II.setUnwindDest(NewExceptBB);
- ++InvokesTransformed;
-}
-
-// visitReturnInst - We want to destroy the setjmp map upon exit from the
-// function.
-void LowerSetJmp::visitReturnInst(ReturnInst &RI) {
- Function* Func = RI.getParent()->getParent();
- CallInst::Create(DestroySJMap, GetSetJmpMap(Func), "", &RI);
-}
-
-// visitUnwindInst - We want to destroy the setjmp map upon exit from the
-// function.
-void LowerSetJmp::visitUnwindInst(UnwindInst &UI) {
- Function* Func = UI.getParent()->getParent();
- CallInst::Create(DestroySJMap, GetSetJmpMap(Func), "", &UI);
-}
-
-ModulePass *llvm::createLowerSetJmpPass() {
- return new LowerSetJmp();
-}
-
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 7796d05..0b01c38 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -76,7 +76,7 @@ STATISTIC(NumDoubleWeak, "Number of new functions created");
/// functions that will compare equal, without looking at the instructions
/// inside the function.
static unsigned profileFunction(const Function *F) {
- const FunctionType *FTy = F->getFunctionType();
+ FunctionType *FTy = F->getFunctionType();
FoldingSetNodeID ID;
ID.AddInteger(F->size());
@@ -185,7 +185,7 @@ private:
}
/// Compare two Types, treating all pointer types as equal.
- bool isEquivalentType(const Type *Ty1, const Type *Ty2) const;
+ bool isEquivalentType(Type *Ty1, Type *Ty2) const;
// The two functions undergoing comparison.
const Function *F1, *F2;
@@ -200,8 +200,8 @@ private:
// Any two pointers in the same address space are equivalent, intptr_t and
// pointers are equivalent. Otherwise, standard type equivalence rules apply.
-bool FunctionComparator::isEquivalentType(const Type *Ty1,
- const Type *Ty2) const {
+bool FunctionComparator::isEquivalentType(Type *Ty1,
+ Type *Ty2) const {
if (Ty1 == Ty2)
return true;
if (Ty1->getTypeID() != Ty2->getTypeID()) {
@@ -233,14 +233,14 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1,
return true;
case Type::PointerTyID: {
- const PointerType *PTy1 = cast<PointerType>(Ty1);
- const PointerType *PTy2 = cast<PointerType>(Ty2);
+ PointerType *PTy1 = cast<PointerType>(Ty1);
+ PointerType *PTy2 = cast<PointerType>(Ty2);
return PTy1->getAddressSpace() == PTy2->getAddressSpace();
}
case Type::StructTyID: {
- const StructType *STy1 = cast<StructType>(Ty1);
- const StructType *STy2 = cast<StructType>(Ty2);
+ StructType *STy1 = cast<StructType>(Ty1);
+ StructType *STy2 = cast<StructType>(Ty2);
if (STy1->getNumElements() != STy2->getNumElements())
return false;
@@ -255,8 +255,8 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1,
}
case Type::FunctionTyID: {
- const FunctionType *FTy1 = cast<FunctionType>(Ty1);
- const FunctionType *FTy2 = cast<FunctionType>(Ty2);
+ FunctionType *FTy1 = cast<FunctionType>(Ty1);
+ FunctionType *FTy2 = cast<FunctionType>(Ty2);
if (FTy1->getNumParams() != FTy2->getNumParams() ||
FTy1->isVarArg() != FTy2->isVarArg())
return false;
@@ -272,8 +272,8 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1,
}
case Type::ArrayTyID: {
- const ArrayType *ATy1 = cast<ArrayType>(Ty1);
- const ArrayType *ATy2 = cast<ArrayType>(Ty2);
+ ArrayType *ATy1 = cast<ArrayType>(Ty1);
+ ArrayType *ATy2 = cast<ArrayType>(Ty2);
return ATy1->getNumElements() == ATy2->getNumElements() &&
isEquivalentType(ATy1->getElementType(), ATy2->getElementType());
}
@@ -305,10 +305,14 @@ bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
// Check special state that is a part of some instructions.
if (const LoadInst *LI = dyn_cast<LoadInst>(I1))
return LI->isVolatile() == cast<LoadInst>(I2)->isVolatile() &&
- LI->getAlignment() == cast<LoadInst>(I2)->getAlignment();
+ LI->getAlignment() == cast<LoadInst>(I2)->getAlignment() &&
+ LI->getOrdering() == cast<LoadInst>(I2)->getOrdering() &&
+ LI->getSynchScope() == cast<LoadInst>(I2)->getSynchScope();
if (const StoreInst *SI = dyn_cast<StoreInst>(I1))
return SI->isVolatile() == cast<StoreInst>(I2)->isVolatile() &&
- SI->getAlignment() == cast<StoreInst>(I2)->getAlignment();
+ SI->getAlignment() == cast<StoreInst>(I2)->getAlignment() &&
+ SI->getOrdering() == cast<StoreInst>(I2)->getOrdering() &&
+ SI->getSynchScope() == cast<StoreInst>(I2)->getSynchScope();
if (const CmpInst *CI = dyn_cast<CmpInst>(I1))
return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate();
if (const CallInst *CI = dyn_cast<CallInst>(I1))
@@ -317,22 +321,22 @@ bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1))
return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() &&
CI->getAttributes() == cast<InvokeInst>(I2)->getAttributes();
- if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1)) {
- if (IVI->getNumIndices() != cast<InsertValueInst>(I2)->getNumIndices())
- return false;
- for (unsigned i = 0, e = IVI->getNumIndices(); i != e; ++i)
- if (IVI->idx_begin()[i] != cast<InsertValueInst>(I2)->idx_begin()[i])
- return false;
- return true;
- }
- if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1)) {
- if (EVI->getNumIndices() != cast<ExtractValueInst>(I2)->getNumIndices())
- return false;
- for (unsigned i = 0, e = EVI->getNumIndices(); i != e; ++i)
- if (EVI->idx_begin()[i] != cast<ExtractValueInst>(I2)->idx_begin()[i])
- return false;
- return true;
- }
+ if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1))
+ return IVI->getIndices() == cast<InsertValueInst>(I2)->getIndices();
+ if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1))
+ return EVI->getIndices() == cast<ExtractValueInst>(I2)->getIndices();
+ if (const FenceInst *FI = dyn_cast<FenceInst>(I1))
+ return FI->getOrdering() == cast<FenceInst>(I2)->getOrdering() &&
+ FI->getSynchScope() == cast<FenceInst>(I2)->getSynchScope();
+ if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I1))
+ return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I2)->isVolatile() &&
+ CXI->getOrdering() == cast<AtomicCmpXchgInst>(I2)->getOrdering() &&
+ CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I2)->getSynchScope();
+ if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I1))
+ return RMWI->getOperation() == cast<AtomicRMWInst>(I2)->getOperation() &&
+ RMWI->isVolatile() == cast<AtomicRMWInst>(I2)->isVolatile() &&
+ RMWI->getOrdering() == cast<AtomicRMWInst>(I2)->getOrdering() &&
+ RMWI->getSynchScope() == cast<AtomicRMWInst>(I2)->getSynchScope();
return true;
}
@@ -346,9 +350,9 @@ bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1,
SmallVector<Value *, 8> Indices1(GEP1->idx_begin(), GEP1->idx_end());
SmallVector<Value *, 8> Indices2(GEP2->idx_begin(), GEP2->idx_end());
uint64_t Offset1 = TD->getIndexedOffset(GEP1->getPointerOperandType(),
- Indices1.data(), Indices1.size());
+ Indices1);
uint64_t Offset2 = TD->getIndexedOffset(GEP2->getPointerOperandType(),
- Indices2.data(), Indices2.size());
+ Indices2);
return Offset1 == Offset2;
}
@@ -725,7 +729,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
SmallVector<Value *, 16> Args;
unsigned i = 0;
- const FunctionType *FFTy = F->getFunctionType();
+ FunctionType *FFTy = F->getFunctionType();
for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end();
AI != AE; ++AI) {
Args.push_back(Builder.CreateBitCast(AI, FFTy->getParamType(i)));
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
new file mode 100644
index 0000000..8fdfd72
--- /dev/null
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -0,0 +1,343 @@
+//===- PassManagerBuilder.cpp - Build Standard Pass -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PassManagerBuilder class, which is used to set up a
+// "standard" optimization sequence suitable for languages like C and C++.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+
+#include "llvm-c/Transforms/PassManagerBuilder.h"
+
+#include "llvm/PassManager.h"
+#include "llvm/DefaultPasses.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/ManagedStatic.h"
+
+using namespace llvm;
+
+PassManagerBuilder::PassManagerBuilder() {
+ OptLevel = 2;
+ SizeLevel = 0;
+ LibraryInfo = 0;
+ Inliner = 0;
+ DisableSimplifyLibCalls = false;
+ DisableUnitAtATime = false;
+ DisableUnrollLoops = false;
+}
+
+PassManagerBuilder::~PassManagerBuilder() {
+ delete LibraryInfo;
+ delete Inliner;
+}
+
+/// Set of global extensions, automatically added as part of the standard set.
+static ManagedStatic<SmallVector<std::pair<PassManagerBuilder::ExtensionPointTy,
+ PassManagerBuilder::ExtensionFn>, 8> > GlobalExtensions;
+
+void PassManagerBuilder::addGlobalExtension(
+ PassManagerBuilder::ExtensionPointTy Ty,
+ PassManagerBuilder::ExtensionFn Fn) {
+ GlobalExtensions->push_back(std::make_pair(Ty, Fn));
+}
+
+void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) {
+ Extensions.push_back(std::make_pair(Ty, Fn));
+}
+
+void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy,
+ PassManagerBase &PM) const {
+ for (unsigned i = 0, e = GlobalExtensions->size(); i != e; ++i)
+ if ((*GlobalExtensions)[i].first == ETy)
+ (*GlobalExtensions)[i].second(*this, PM);
+ for (unsigned i = 0, e = Extensions.size(); i != e; ++i)
+ if (Extensions[i].first == ETy)
+ Extensions[i].second(*this, PM);
+}
+
+void
+PassManagerBuilder::addInitialAliasAnalysisPasses(PassManagerBase &PM) const {
+ // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
+ // BasicAliasAnalysis wins if they disagree. This is intended to help
+ // support "obvious" type-punning idioms.
+ PM.add(createTypeBasedAliasAnalysisPass());
+ PM.add(createBasicAliasAnalysisPass());
+}
+
+void PassManagerBuilder::populateFunctionPassManager(FunctionPassManager &FPM) {
+ addExtensionsToPM(EP_EarlyAsPossible, FPM);
+
+ // Add LibraryInfo if we have some.
+ if (LibraryInfo) FPM.add(new TargetLibraryInfo(*LibraryInfo));
+
+ if (OptLevel == 0) return;
+
+ addInitialAliasAnalysisPasses(FPM);
+
+ FPM.add(createCFGSimplificationPass());
+ FPM.add(createScalarReplAggregatesPass());
+ FPM.add(createEarlyCSEPass());
+ FPM.add(createLowerExpectIntrinsicPass());
+}
+
+void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
+ // If all optimizations are disabled, just run the always-inline pass.
+ if (OptLevel == 0) {
+ if (Inliner) {
+ MPM.add(Inliner);
+ Inliner = 0;
+ }
+ return;
+ }
+
+ // Add LibraryInfo if we have some.
+ if (LibraryInfo) MPM.add(new TargetLibraryInfo(*LibraryInfo));
+
+ addInitialAliasAnalysisPasses(MPM);
+
+ if (!DisableUnitAtATime) {
+ MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
+
+ MPM.add(createIPSCCPPass()); // IP SCCP
+ MPM.add(createDeadArgEliminationPass()); // Dead argument elimination
+
+ MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE
+ MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
+ }
+
+ // Start of CallGraph SCC passes.
+ if (!DisableUnitAtATime)
+ MPM.add(createPruneEHPass()); // Remove dead EH info
+ if (Inliner) {
+ MPM.add(Inliner);
+ Inliner = 0;
+ }
+ if (!DisableUnitAtATime)
+ MPM.add(createFunctionAttrsPass()); // Set readonly/readnone attrs
+ if (OptLevel > 2)
+ MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args
+
+ // Start of function pass.
+ // Break up aggregate allocas, using SSAUpdater.
+ MPM.add(createScalarReplAggregatesPass(-1, false));
+ MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
+ if (!DisableSimplifyLibCalls)
+ MPM.add(createSimplifyLibCallsPass()); // Library Call Optimizations
+ MPM.add(createJumpThreadingPass()); // Thread jumps.
+ MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
+ MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ MPM.add(createInstructionCombiningPass()); // Combine silly seq's
+
+ MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
+ MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ MPM.add(createReassociatePass()); // Reassociate expressions
+ MPM.add(createLoopRotatePass()); // Rotate Loop
+ MPM.add(createLICMPass()); // Hoist loop invariants
+ MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
+ MPM.add(createInstructionCombiningPass());
+ MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
+ MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
+ MPM.add(createLoopDeletionPass()); // Delete dead loops
+ if (!DisableUnrollLoops)
+ MPM.add(createLoopUnrollPass()); // Unroll small loops
+ addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
+
+ if (OptLevel > 1)
+ MPM.add(createGVNPass()); // Remove redundancies
+ MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset
+ MPM.add(createSCCPPass()); // Constant prop with SCCP
+
+ // Run instcombine after redundancy elimination to exploit opportunities
+ // opened up by them.
+ MPM.add(createInstructionCombiningPass());
+ MPM.add(createJumpThreadingPass()); // Thread jumps
+ MPM.add(createCorrelatedValuePropagationPass());
+ MPM.add(createDeadStoreEliminationPass()); // Delete dead stores
+
+ addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
+
+ MPM.add(createAggressiveDCEPass()); // Delete dead instructions
+ MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ MPM.add(createInstructionCombiningPass()); // Clean up after everything.
+
+ if (!DisableUnitAtATime) {
+ // FIXME: We shouldn't bother with this anymore.
+ MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
+
+ // GlobalOpt already deletes dead functions and globals, at -O3 try a
+ // late pass of GlobalDCE. It is capable of deleting dead cycles.
+ if (OptLevel > 2)
+ MPM.add(createGlobalDCEPass()); // Remove dead fns and globals.
+
+ if (OptLevel > 1)
+ MPM.add(createConstantMergePass()); // Merge dup global constants
+ }
+}
+
+void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
+ bool Internalize,
+ bool RunInliner) {
+ // Provide AliasAnalysis services for optimizations.
+ addInitialAliasAnalysisPasses(PM);
+
+ // Now that composite has been compiled, scan through the module, looking
+ // for a main function. If main is defined, mark all other functions
+ // internal.
+ if (Internalize)
+ PM.add(createInternalizePass(true));
+
+ // Propagate constants at call sites into the functions they call. This
+ // opens opportunities for globalopt (and inlining) by substituting function
+ // pointers passed as arguments to direct uses of functions.
+ PM.add(createIPSCCPPass());
+
+ // Now that we internalized some globals, see if we can hack on them!
+ PM.add(createGlobalOptimizerPass());
+
+ // Linking modules together can lead to duplicated global constants, only
+ // keep one copy of each constant.
+ PM.add(createConstantMergePass());
+
+ // Remove unused arguments from functions.
+ PM.add(createDeadArgEliminationPass());
+
+ // Reduce the code after globalopt and ipsccp. Both can open up significant
+ // simplification opportunities, and both can propagate functions through
+ // function pointers. When this happens, we often have to resolve varargs
+ // calls, etc, so let instcombine do this.
+ PM.add(createInstructionCombiningPass());
+
+ // Inline small functions
+ if (RunInliner)
+ PM.add(createFunctionInliningPass());
+
+ PM.add(createPruneEHPass()); // Remove dead EH info.
+
+ // Optimize globals again if we ran the inliner.
+ if (RunInliner)
+ PM.add(createGlobalOptimizerPass());
+ PM.add(createGlobalDCEPass()); // Remove dead functions.
+
+ // If we didn't decide to inline a function, check to see if we can
+ // transform it to pass arguments by value instead of by reference.
+ PM.add(createArgumentPromotionPass());
+
+ // The IPO passes may leave cruft around. Clean up after them.
+ PM.add(createInstructionCombiningPass());
+ PM.add(createJumpThreadingPass());
+ // Break up allocas
+ PM.add(createScalarReplAggregatesPass());
+
+ // Run a few AA driven optimizations here and now, to cleanup the code.
+ PM.add(createFunctionAttrsPass()); // Add nocapture.
+ PM.add(createGlobalsModRefPass()); // IP alias analysis.
+
+ PM.add(createLICMPass()); // Hoist loop invariants.
+ PM.add(createGVNPass()); // Remove redundancies.
+ PM.add(createMemCpyOptPass()); // Remove dead memcpys.
+ // Nuke dead stores.
+ PM.add(createDeadStoreEliminationPass());
+
+ // Cleanup and simplify the code after the scalar optimizations.
+ PM.add(createInstructionCombiningPass());
+
+ PM.add(createJumpThreadingPass());
+
+ // Delete basic blocks, which optimization passes may have killed.
+ PM.add(createCFGSimplificationPass());
+
+ // Now that we have optimized the program, discard unreachable functions.
+ PM.add(createGlobalDCEPass());
+}
+
+LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate(void) {
+ PassManagerBuilder *PMB = new PassManagerBuilder();
+ return wrap(PMB);
+}
+
+void LLVMPassManagerBuilderDispose(LLVMPassManagerBuilderRef PMB) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ delete Builder;
+}
+
+void
+LLVMPassManagerBuilderSetOptLevel(LLVMPassManagerBuilderRef PMB,
+ unsigned OptLevel) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ Builder->OptLevel = OptLevel;
+}
+
+void
+LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB,
+ unsigned SizeLevel) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ Builder->SizeLevel = SizeLevel;
+}
+
+void
+LLVMPassManagerBuilderSetDisableUnitAtATime(LLVMPassManagerBuilderRef PMB,
+ LLVMBool Value) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ Builder->DisableUnitAtATime = Value;
+}
+
+void
+LLVMPassManagerBuilderSetDisableUnrollLoops(LLVMPassManagerBuilderRef PMB,
+ LLVMBool Value) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ Builder->DisableUnrollLoops = Value;
+}
+
+void
+LLVMPassManagerBuilderSetDisableSimplifyLibCalls(LLVMPassManagerBuilderRef PMB,
+ LLVMBool Value) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ Builder->DisableSimplifyLibCalls = Value;
+}
+
+void
+LLVMPassManagerBuilderUseInlinerWithThreshold(LLVMPassManagerBuilderRef PMB,
+ unsigned Threshold) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ Builder->Inliner = createFunctionInliningPass(Threshold);
+}
+
+void
+LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB,
+ LLVMPassManagerRef PM) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ FunctionPassManager *FPM = unwrap<FunctionPassManager>(PM);
+ Builder->populateFunctionPassManager(*FPM);
+}
+
+void
+LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB,
+ LLVMPassManagerRef PM) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ PassManagerBase *MPM = unwrap(PM);
+ Builder->populateModulePassManager(*MPM);
+}
+
+void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
+ LLVMPassManagerRef PM,
+ bool Internalize,
+ bool RunInliner) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ PassManagerBase *LPM = unwrap(PM);
+ Builder->populateLTOPassManager(*LPM, Internalize, RunInliner);
+}
+
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index b7e63dc..cbb80f0 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -101,8 +101,9 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
// Check to see if this function performs an unwind or calls an
// unwinding function.
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- if (CheckUnwind && isa<UnwindInst>(BB->getTerminator())) {
- // Uses unwind!
+ if (CheckUnwind && (isa<UnwindInst>(BB->getTerminator()) ||
+ isa<ResumeInst>(BB->getTerminator()))) {
+ // Uses unwind / resume!
SCCMightUnwind = true;
} else if (CheckReturn && isa<ReturnInst>(BB->getTerminator())) {
SCCMightReturn = true;
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 0fbaff1..b5caa9a 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -180,7 +180,7 @@ static void StripTypeNames(Module &M, bool PreserveDbgInfo) {
for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
StructType *STy = StructTypes[i];
- if (STy->isAnonymous() || STy->getName().empty()) continue;
+ if (STy->isLiteral() || STy->getName().empty()) continue;
if (PreserveDbgInfo && STy->getName().startswith("llvm.dbg"))
continue;
diff --git a/lib/Transforms/InstCombine/CMakeLists.txt b/lib/Transforms/InstCombine/CMakeLists.txt
index d070ccc..a46d5ad 100644
--- a/lib/Transforms/InstCombine/CMakeLists.txt
+++ b/lib/Transforms/InstCombine/CMakeLists.txt
@@ -13,3 +13,11 @@ add_llvm_library(LLVMInstCombine
InstCombineSimplifyDemanded.cpp
InstCombineVectorOps.cpp
)
+
+add_llvm_library_dependencies(LLVMInstCombine
+ LLVMAnalysis
+ LLVMCore
+ LLVMSupport
+ LLVMTarget
+ LLVMTransformUtils
+ )
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 8257d6b..3808278 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -11,6 +11,7 @@
#define INSTCOMBINE_INSTCOMBINE_H
#include "InstCombineWorklist.h"
+#include "llvm/IntrinsicInst.h"
#include "llvm/Operator.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -103,7 +104,7 @@ public:
//
Instruction *visitAdd(BinaryOperator &I);
Instruction *visitFAdd(BinaryOperator &I);
- Value *OptimizePointerDifference(Value *LHS, Value *RHS, const Type *Ty);
+ Value *OptimizePointerDifference(Value *LHS, Value *RHS, Type *Ty);
Instruction *visitSub(BinaryOperator &I);
Instruction *visitFSub(BinaryOperator &I);
Instruction *visitMul(BinaryOperator &I);
@@ -192,15 +193,16 @@ public:
Instruction *visitExtractElementInst(ExtractElementInst &EI);
Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI);
Instruction *visitExtractValueInst(ExtractValueInst &EV);
+ Instruction *visitLandingPadInst(LandingPadInst &LI);
// visitInstruction - Specify what to return for unhandled instructions...
Instruction *visitInstruction(Instruction &I) { return 0; }
private:
- bool ShouldChangeType(const Type *From, const Type *To) const;
+ bool ShouldChangeType(Type *From, Type *To) const;
Value *dyn_castNegVal(Value *V) const;
Value *dyn_castFNegVal(Value *V) const;
- const Type *FindElementAtOffset(const Type *Ty, int64_t Offset,
+ Type *FindElementAtOffset(Type *Ty, int64_t Offset,
SmallVectorImpl<Value*> &NewIndices);
Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
@@ -209,12 +211,13 @@ private:
/// the cast can be eliminated by some other simple transformation, we prefer
/// to do the simplification first.
bool ShouldOptimizeCast(Instruction::CastOps opcode,const Value *V,
- const Type *Ty);
+ Type *Ty);
Instruction *visitCallSite(CallSite CS);
Instruction *tryOptimizeCall(CallInst *CI, const TargetData *TD);
bool transformConstExprCastCall(CallSite CS);
- Instruction *transformCallThroughTrampoline(CallSite CS);
+ Instruction *transformCallThroughTrampoline(CallSite CS,
+ IntrinsicInst *Tramp);
Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI,
bool DoXform = true);
Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
@@ -357,7 +360,7 @@ private:
Instruction *SimplifyMemSet(MemSetInst *MI);
- Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned);
+ Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned);
};
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index c36a955..d10046c 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -188,7 +188,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
return BinaryOperator::CreateMul(LHS, AddOne(C2));
// A+B --> A|B iff A and B have no bits set in common.
- if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
+ if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
APInt Mask = APInt::getAllOnesValue(IT->getBitWidth());
APInt LHSKnownOne(IT->getBitWidth(), 0);
APInt LHSKnownZero(IT->getBitWidth(), 0);
@@ -401,7 +401,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
Value *InstCombiner::EmitGEPOffset(User *GEP) {
TargetData &TD = *getTargetData();
gep_type_iterator GTI = gep_type_begin(GEP);
- const Type *IntPtrTy = TD.getIntPtrType(GEP->getContext());
+ Type *IntPtrTy = TD.getIntPtrType(GEP->getContext());
Value *Result = Constant::getNullValue(IntPtrTy);
// If the GEP is inbounds, we know that none of the addressing operations will
@@ -420,7 +420,7 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) {
if (OpC->isZero()) continue;
// Handle a struct index, which adds its field offset to the pointer.
- if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
if (Size)
@@ -460,7 +460,7 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) {
/// operands to the ptrtoint instructions for the LHS/RHS of the subtract.
///
Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
- const Type *Ty) {
+ Type *Ty) {
assert(TD && "Must have target data info for this");
// If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 64ea36f..5e0bfe8 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1174,30 +1174,31 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
((A == C && B == D) || (A == D && B == C)))
return BinaryOperator::CreateXor(A, B);
- if (Op0->hasOneUse() &&
- match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
- if (A == Op1) { // (A^B)&A -> A&(A^B)
- I.swapOperands(); // Simplify below
- std::swap(Op0, Op1);
- } else if (B == Op1) { // (A^B)&B -> B&(B^A)
- cast<BinaryOperator>(Op0)->swapOperands();
- I.swapOperands(); // Simplify below
- std::swap(Op0, Op1);
+ // A&(A^B) => A & ~B
+ {
+ Value *tmpOp0 = Op0;
+ Value *tmpOp1 = Op1;
+ if (Op0->hasOneUse() &&
+ match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
+ if (A == Op1 || B == Op1 ) {
+ tmpOp1 = Op0;
+ tmpOp0 = Op1;
+ // Simplify below
+ }
}
- }
- if (Op1->hasOneUse() &&
- match(Op1, m_Xor(m_Value(A), m_Value(B)))) {
- if (B == Op0) { // B&(A^B) -> B&(B^A)
- cast<BinaryOperator>(Op1)->swapOperands();
- std::swap(A, B);
+ if (tmpOp1->hasOneUse() &&
+ match(tmpOp1, m_Xor(m_Value(A), m_Value(B)))) {
+ if (B == tmpOp0) {
+ std::swap(A, B);
+ }
+ // Notice that the patten (A&(~B)) is actually (A&(-1^B)), so if
+ // A is originally -1 (or a vector of -1 and undefs), then we enter
+ // an endless loop. By checking that A is non-constant we ensure that
+ // we will never get to the loop.
+ if (A == tmpOp0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B
+ return BinaryOperator::CreateAnd(A, Builder->CreateNot(B));
}
- // Notice that the patten (A&(~B)) is actually (A&(-1^B)), so if
- // A is originally -1 (or a vector of -1 and undefs), then we enter
- // an endless loop. By checking that A is non-constant we ensure that
- // we will never get to the loop.
- if (A == Op0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B
- return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp"));
}
// (A&((~A)|B)) -> A&B
@@ -1224,7 +1225,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
// fold (and (cast A), (cast B)) -> (cast (and A, B))
if (CastInst *Op0C = dyn_cast<CastInst>(Op0))
if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) {
- const Type *SrcTy = Op0C->getOperand(0)->getType();
+ Type *SrcTy = Op0C->getOperand(0)->getType();
if (Op0C->getOpcode() == Op1C->getOpcode() && // same cast kind ?
SrcTy == Op1C->getOperand(0)->getType() &&
SrcTy->isIntOrIntVectorTy()) {
@@ -2008,7 +2009,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
CastInst *Op1C = dyn_cast<CastInst>(Op1);
if (Op1C && Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
- const Type *SrcTy = Op0C->getOperand(0)->getType();
+ Type *SrcTy = Op0C->getOperand(0)->getType();
if (SrcTy == Op1C->getOperand(0)->getType() &&
SrcTy->isIntOrIntVectorTy()) {
Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
@@ -2227,14 +2228,14 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (A == Op1) // (B|A)^B == (A|B)^B
std::swap(A, B);
if (B == Op1) // (A|B)^B == A & ~B
- return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp"));
+ return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1));
} else if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
Op0I->hasOneUse()){
if (A == Op1) // (A&B)^A -> (B&A)^A
std::swap(A, B);
if (B == Op1 && // (B&A)^A == ~B & A
!isa<ConstantInt>(Op1)) { // Canonical form is (B&C)^C
- return BinaryOperator::CreateAnd(Builder->CreateNot(A, "tmp"), Op1);
+ return BinaryOperator::CreateAnd(Builder->CreateNot(A), Op1);
}
}
}
@@ -2288,7 +2289,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind?
- const Type *SrcTy = Op0C->getOperand(0)->getType();
+ Type *SrcTy = Op0C->getOperand(0)->getType();
if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isIntegerTy() &&
// Only do this if the casts both really cause code to be generated.
ShouldOptimizeCast(Op0C->getOpcode(), Op0C->getOperand(0),
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 537f2b3..c7b3ff8 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "InstCombine.h"
-#include "llvm/IntrinsicInst.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Analysis/MemoryBuiltins.h"
@@ -22,8 +21,8 @@ using namespace llvm;
/// getPromotedType - Return the specified type promoted as it would be to pass
/// though a va_arg area.
-static const Type *getPromotedType(const Type *Ty) {
- if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
+static Type *getPromotedType(Type *Ty) {
+ if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
if (ITy->getBitWidth() < 32)
return Type::getInt32Ty(Ty->getContext());
}
@@ -64,7 +63,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
unsigned DstAddrSp =
cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
- const IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
+ IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
@@ -76,18 +75,18 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
// integer datatype.
Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts();
if (StrippedDest != MI->getArgOperand(0)) {
- const Type *SrcETy = cast<PointerType>(StrippedDest->getType())
+ Type *SrcETy = cast<PointerType>(StrippedDest->getType())
->getElementType();
if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
// The SrcETy might be something like {{{double}}} or [1 x double]. Rip
// down through these levels if so.
while (!SrcETy->isSingleValueType()) {
- if (const StructType *STy = dyn_cast<StructType>(SrcETy)) {
+ if (StructType *STy = dyn_cast<StructType>(SrcETy)) {
if (STy->getNumElements() == 1)
SrcETy = STy->getElementType(0);
else
break;
- } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) {
+ } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) {
if (ATy->getNumElements() == 1)
SrcETy = ATy->getElementType();
else
@@ -142,7 +141,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
// memset(s,c,n) -> store s, c (for n=1,2,4,8)
if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
- const Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.
+ Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.
Value *Dest = MI->getDest();
unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
@@ -250,7 +249,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// We need target data for just about everything so depend on it.
if (!TD) break;
- const Type *ReturnTy = CI.getType();
+ Type *ReturnTy = CI.getType();
uint64_t DontKnow = II->getArgOperand(1) == Builder->getTrue() ? 0 : -1ULL;
// Get to the real allocated thing and offset as fast as possible.
@@ -266,8 +265,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// Get the current byte offset into the thing. Use the original
// operand in case we're looking through a bitcast.
SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end());
- Offset = TD->getIndexedOffset(GEP->getPointerOperandType(),
- Ops.data(), Ops.size());
+ Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops);
Op1 = GEP->getPointerOperand()->stripPointerCasts();
@@ -300,7 +298,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
} else if (CallInst *MI = extractMallocCall(Op1)) {
// Get allocation size.
- const Type* MallocType = getMallocAllocatedType(MI);
+ Type* MallocType = getMallocAllocatedType(MI);
if (MallocType && MallocType->isSized())
if (Value *NElems = getMallocArraySize(MI, TD, true))
if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
@@ -355,7 +353,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::cttz: {
// If all bits below the first known one are known zero,
// this value is constant.
- const IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
+ IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
// FIXME: Try to simplify vectors of integers.
if (!IT) break;
uint32_t BitWidth = IT->getBitWidth();
@@ -374,7 +372,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::ctlz: {
// If all bits above the first known one are known zero,
// this value is constant.
- const IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
+ IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
// FIXME: Try to simplify vectors of integers.
if (!IT) break;
uint32_t BitWidth = IT->getBitWidth();
@@ -392,7 +390,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
case Intrinsic::uadd_with_overflow: {
Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
+ IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
uint32_t BitWidth = IT->getBitWidth();
APInt Mask = APInt::getSignBit(BitWidth);
APInt LHSKnownZero(BitWidth, 0);
@@ -416,7 +414,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
UndefValue::get(LHS->getType()),
ConstantInt::getTrue(II->getContext())
};
- const StructType *ST = cast<StructType>(II->getType());
+ StructType *ST = cast<StructType>(II->getType());
Constant *Struct = ConstantStruct::get(ST, V);
return InsertValueInst::Create(Struct, Add, 0);
}
@@ -430,7 +428,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
UndefValue::get(LHS->getType()),
ConstantInt::getFalse(II->getContext())
};
- const StructType *ST = cast<StructType>(II->getType());
+ StructType *ST = cast<StructType>(II->getType());
Constant *Struct = ConstantStruct::get(ST, V);
return InsertValueInst::Create(Struct, Add, 0);
}
@@ -559,7 +557,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::ppc_altivec_stvxl:
// Turn stvx -> store if the pointer is known aligned.
if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, TD) >= 16) {
- const Type *OpPtrTy =
+ Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(0)->getType());
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
return new StoreInst(II->getArgOperand(0), Ptr);
@@ -570,7 +568,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_sse2_storeu_dq:
// Turn X86 storeu -> store if the pointer is known aligned.
if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
- const Type *OpPtrTy =
+ Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(1)->getType());
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy);
return new StoreInst(II->getArgOperand(1), Ptr);
@@ -656,15 +654,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (ExtractedElts[Idx] == 0) {
ExtractedElts[Idx] =
- Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1,
- ConstantInt::get(Type::getInt32Ty(II->getContext()),
- Idx&15, false), "tmp");
+ Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1,
+ Builder->getInt32(Idx&15));
}
// Insert this value into the result vector.
Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
- ConstantInt::get(Type::getInt32Ty(II->getContext()),
- i, false), "tmp");
+ Builder->getInt32(i));
}
return CastInst::Create(Instruction::BitCast, Result, CI.getType());
}
@@ -733,9 +729,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
}
- // If the stack restore is in a return/unwind block and if there are no
- // allocas or calls between the restore and the return, nuke the restore.
- if (!CannotRemove && (isa<ReturnInst>(TI) || isa<UnwindInst>(TI)))
+ // If the stack restore is in a return, resume, or unwind block and if there
+ // are no allocas or calls between the restore and the return, nuke the
+ // restore.
+ if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI) ||
+ isa<UnwindInst>(TI)))
return EraseInstFromFunction(CI);
break;
}
@@ -765,9 +763,9 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
if (!CS.paramHasAttr(ix, Attribute::ByVal))
return true;
- const Type* SrcTy =
+ Type* SrcTy =
cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
- const Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
+ Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
if (!SrcTy->isSized() || !DstTy->isSized())
return false;
if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy))
@@ -820,6 +818,83 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) {
return Simplifier.NewInstruction;
}
+static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) {
+ // Strip off at most one level of pointer casts, looking for an alloca. This
+ // is good enough in practice and simpler than handling any number of casts.
+ Value *Underlying = TrampMem->stripPointerCasts();
+ if (Underlying != TrampMem &&
+ (!Underlying->hasOneUse() || *Underlying->use_begin() != TrampMem))
+ return 0;
+ if (!isa<AllocaInst>(Underlying))
+ return 0;
+
+ IntrinsicInst *InitTrampoline = 0;
+ for (Value::use_iterator I = TrampMem->use_begin(), E = TrampMem->use_end();
+ I != E; I++) {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(*I);
+ if (!II)
+ return 0;
+ if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
+ if (InitTrampoline)
+ // More than one init_trampoline writes to this value. Give up.
+ return 0;
+ InitTrampoline = II;
+ continue;
+ }
+ if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
+ // Allow any number of calls to adjust.trampoline.
+ continue;
+ return 0;
+ }
+
+ // No call to init.trampoline found.
+ if (!InitTrampoline)
+ return 0;
+
+ // Check that the alloca is being used in the expected way.
+ if (InitTrampoline->getOperand(0) != TrampMem)
+ return 0;
+
+ return InitTrampoline;
+}
+
+static IntrinsicInst *FindInitTrampolineFromBB(IntrinsicInst *AdjustTramp,
+ Value *TrampMem) {
+ // Visit all the previous instructions in the basic block, and try to find a
+ // init.trampoline which has a direct path to the adjust.trampoline.
+ for (BasicBlock::iterator I = AdjustTramp,
+ E = AdjustTramp->getParent()->begin(); I != E; ) {
+ Instruction *Inst = --I;
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
+ II->getOperand(0) == TrampMem)
+ return II;
+ if (Inst->mayWriteToMemory())
+ return 0;
+ }
+ return 0;
+}
+
+// Given a call to llvm.adjust.trampoline, find and return the corresponding
+// call to llvm.init.trampoline if the call to the trampoline can be optimized
+// to a direct call to a function. Otherwise return NULL.
+//
+static IntrinsicInst *FindInitTrampoline(Value *Callee) {
+ Callee = Callee->stripPointerCasts();
+ IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
+ if (!AdjustTramp ||
+ AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
+ return 0;
+
+ Value *TrampMem = AdjustTramp->getOperand(0);
+
+ if (IntrinsicInst *IT = FindInitTrampolineFromAlloca(TrampMem))
+ return IT;
+ if (IntrinsicInst *IT = FindInitTrampolineFromBB(AdjustTramp, TrampMem))
+ return IT;
+ return 0;
+}
+
// visitCallSite - Improvements for call and invoke instructions.
//
Instruction *InstCombiner::visitCallSite(CallSite CS) {
@@ -879,13 +954,11 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
return EraseInstFromFunction(*CS.getInstruction());
}
- if (BitCastInst *BC = dyn_cast<BitCastInst>(Callee))
- if (IntrinsicInst *In = dyn_cast<IntrinsicInst>(BC->getOperand(0)))
- if (In->getIntrinsicID() == Intrinsic::init_trampoline)
- return transformCallThroughTrampoline(CS);
+ if (IntrinsicInst *II = FindInitTrampoline(Callee))
+ return transformCallThroughTrampoline(CS, II);
- const PointerType *PTy = cast<PointerType>(Callee->getType());
- const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+ PointerType *PTy = cast<PointerType>(Callee->getType());
+ FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
if (FTy->isVarArg()) {
int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 3 : 1);
// See if we can optimize any arguments passed through the varargs area of
@@ -934,9 +1007,9 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// would cause a type conversion of one of our arguments, change this call to
// be a direct call with arguments casted to the appropriate types.
//
- const FunctionType *FT = Callee->getFunctionType();
- const Type *OldRetTy = Caller->getType();
- const Type *NewRetTy = FT->getReturnType();
+ FunctionType *FT = Callee->getFunctionType();
+ Type *OldRetTy = Caller->getType();
+ Type *NewRetTy = FT->getReturnType();
if (NewRetTy->isStructTy())
return false; // TODO: Handle multiple return values.
@@ -982,8 +1055,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
CallSite::arg_iterator AI = CS.arg_begin();
for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
- const Type *ParamTy = FT->getParamType(i);
- const Type *ActTy = (*AI)->getType();
+ Type *ParamTy = FT->getParamType(i);
+ Type *ActTy = (*AI)->getType();
if (!CastInst::isCastable(ActTy, ParamTy))
return false; // Cannot transform this parameter value.
@@ -995,11 +1068,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// If the parameter is passed as a byval argument, then we have to have a
// sized type and the sized type has to have the same size as the old type.
if (ParamTy != ActTy && (Attrs & Attribute::ByVal)) {
- const PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
+ PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0)
return false;
- const Type *CurElTy = cast<PointerType>(ActTy)->getElementType();
+ Type *CurElTy = cast<PointerType>(ActTy)->getElementType();
if (TD->getTypeAllocSize(CurElTy) !=
TD->getTypeAllocSize(ParamPTy->getElementType()))
return false;
@@ -1023,7 +1096,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// If the callee is just a declaration, don't change the varargsness of the
// call. We don't want to introduce a varargs call where one doesn't
// already exist.
- const PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType());
+ PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType());
if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())
return false;
}
@@ -1062,13 +1135,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
AI = CS.arg_begin();
for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
- const Type *ParamTy = FT->getParamType(i);
+ Type *ParamTy = FT->getParamType(i);
if ((*AI)->getType() == ParamTy) {
Args.push_back(*AI);
} else {
Instruction::CastOps opcode = CastInst::getCastOpcode(*AI,
false, ParamTy, false);
- Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy, "tmp"));
+ Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy));
}
// Add any parameter attributes.
@@ -1089,12 +1162,12 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
} else {
// Add all of the arguments in their promoted form to the arg list.
for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
- const Type *PTy = getPromotedType((*AI)->getType());
+ Type *PTy = getPromotedType((*AI)->getType());
if (PTy != (*AI)->getType()) {
// Must promote to pass through va_arg area!
Instruction::CastOps opcode =
CastInst::getCastOpcode(*AI, false, PTy, false);
- Args.push_back(Builder->CreateCast(opcode, *AI, PTy, "tmp"));
+ Args.push_back(Builder->CreateCast(opcode, *AI, PTy));
} else {
Args.push_back(*AI);
}
@@ -1138,13 +1211,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
if (!NV->getType()->isVoidTy()) {
Instruction::CastOps opcode =
CastInst::getCastOpcode(NC, false, OldRetTy, false);
- NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp");
+ NV = NC = CastInst::Create(opcode, NC, OldRetTy);
NC->setDebugLoc(Caller->getDebugLoc());
// If this is an invoke instruction, we should insert it after the first
// non-phi, instruction in the normal successor block.
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
- BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI();
+ BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt();
InsertNewInstBefore(NC, *I);
} else {
// Otherwise, it's a call, just insert cast right after the call.
@@ -1163,13 +1236,16 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
return true;
}
-// transformCallThroughTrampoline - Turn a call to a function created by the
-// init_trampoline intrinsic into a direct call to the underlying function.
+// transformCallThroughTrampoline - Turn a call to a function created by
+// init_trampoline / adjust_trampoline intrinsic pair into a direct call to the
+// underlying function.
//
-Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
+Instruction *
+InstCombiner::transformCallThroughTrampoline(CallSite CS,
+ IntrinsicInst *Tramp) {
Value *Callee = CS.getCalledValue();
- const PointerType *PTy = cast<PointerType>(Callee->getType());
- const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+ PointerType *PTy = cast<PointerType>(Callee->getType());
+ FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
const AttrListPtr &Attrs = CS.getAttributes();
// If the call already has the 'nest' attribute somewhere then give up -
@@ -1177,12 +1253,12 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
if (Attrs.hasAttrSomewhere(Attribute::Nest))
return 0;
- IntrinsicInst *Tramp =
- cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0));
+ assert(Tramp &&
+ "transformCallThroughTrampoline called with incorrect CallSite.");
Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
- const PointerType *NestFPTy = cast<PointerType>(NestF->getType());
- const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
+ PointerType *NestFPTy = cast<PointerType>(NestF->getType());
+ FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
const AttrListPtr &NestAttrs = NestF->getAttributes();
if (!NestAttrs.isEmpty()) {
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 82c734e..f10e48a 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "InstCombine.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Support/PatternMatch.h"
using namespace llvm;
@@ -79,14 +80,14 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
// This requires TargetData to get the alloca alignment and size information.
if (!TD) return 0;
- const PointerType *PTy = cast<PointerType>(CI.getType());
+ PointerType *PTy = cast<PointerType>(CI.getType());
BuilderTy AllocaBuilder(*Builder);
AllocaBuilder.SetInsertPoint(AI.getParent(), &AI);
// Get the type really allocated and the type casted to.
- const Type *AllocElTy = AI.getAllocatedType();
- const Type *CastElTy = PTy->getElementType();
+ Type *AllocElTy = AI.getAllocatedType();
+ Type *CastElTy = PTy->getElementType();
if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0;
unsigned AllocElTyAlign = TD->getABITypeAlignment(AllocElTy);
@@ -121,13 +122,13 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
} else {
Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale);
// Insert before the alloca, not before the cast.
- Amt = AllocaBuilder.CreateMul(Amt, NumElements, "tmp");
+ Amt = AllocaBuilder.CreateMul(Amt, NumElements);
}
if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
Value *Off = ConstantInt::get(AI.getArraySize()->getType(),
Offset, true);
- Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp");
+ Amt = AllocaBuilder.CreateAdd(Amt, Off);
}
AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
@@ -151,7 +152,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
/// EvaluateInDifferentType - Given an expression that
/// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually
/// insert the code to evaluate the expression.
-Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
+Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
bool isSigned) {
if (Constant *C = dyn_cast<Constant>(V)) {
C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
@@ -229,12 +230,12 @@ static Instruction::CastOps
isEliminableCastPair(
const CastInst *CI, ///< The first cast instruction
unsigned opcode, ///< The opcode of the second cast instruction
- const Type *DstTy, ///< The target type for the second cast instruction
+ Type *DstTy, ///< The target type for the second cast instruction
TargetData *TD ///< The target data for pointer size
) {
- const Type *SrcTy = CI->getOperand(0)->getType(); // A from above
- const Type *MidTy = CI->getType(); // B from above
+ Type *SrcTy = CI->getOperand(0)->getType(); // A from above
+ Type *MidTy = CI->getType(); // B from above
// Get the opcodes of the two Cast instructions
Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode());
@@ -260,7 +261,7 @@ isEliminableCastPair(
/// the cast can be eliminated by some other simple transformation, we prefer
/// to do the simplification first.
bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V,
- const Type *Ty) {
+ Type *Ty) {
// Noop casts and casts of constants should be eliminated trivially.
if (V->getType() == Ty || isa<Constant>(V)) return false;
@@ -324,7 +325,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
///
/// This function works on both vectors and scalars.
///
-static bool CanEvaluateTruncated(Value *V, const Type *Ty) {
+static bool CanEvaluateTruncated(Value *V, Type *Ty) {
// We can always evaluate constants in another type.
if (isa<Constant>(V))
return true;
@@ -332,7 +333,7 @@ static bool CanEvaluateTruncated(Value *V, const Type *Ty) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false;
- const Type *OrigTy = V->getType();
+ Type *OrigTy = V->getType();
// If this is an extension from the dest type, we can eliminate it, even if it
// has multiple uses.
@@ -435,7 +436,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
return &CI;
Value *Src = CI.getOperand(0);
- const Type *DestTy = CI.getType(), *SrcTy = Src->getType();
+ Type *DestTy = CI.getType(), *SrcTy = Src->getType();
// Attempt to truncate the entire input expression tree to the destination
// type. Only do this if the dest type is a simple type, don't convert the
@@ -456,7 +457,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
// Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector.
if (DestTy->getScalarSizeInBits() == 1) {
Constant *One = ConstantInt::get(Src->getType(), 1);
- Src = Builder->CreateAnd(Src, One, "tmp");
+ Src = Builder->CreateAnd(Src, One);
Value *Zero = Constant::getNullValue(Src->getType());
return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
}
@@ -518,7 +519,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
In->getType()->getScalarSizeInBits()-1);
In = Builder->CreateLShr(In, Sh, In->getName()+".lobit");
if (In->getType() != CI.getType())
- In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/, "tmp");
+ In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/);
if (ICI->getPredicate() == ICmpInst::ICMP_SGT) {
Constant *One = ConstantInt::get(In->getType(), 1);
@@ -572,7 +573,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
if ((Op1CV != 0) == isNE) { // Toggle the low bit.
Constant *One = ConstantInt::get(In->getType(), 1);
- In = Builder->CreateXor(In, One, "tmp");
+ In = Builder->CreateXor(In, One);
}
if (CI.getType() == In->getType())
@@ -586,7 +587,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
// It is also profitable to transform icmp eq into not(xor(A, B)) because that
// may lead to additional simplifications.
if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) {
- if (const IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) {
+ if (IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) {
uint32_t BitWidth = ITy->getBitWidth();
Value *LHS = ICI->getOperand(0);
Value *RHS = ICI->getOperand(1);
@@ -644,7 +645,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
/// clear the top bits anyway, doing this has no extra cost.
///
/// This function works on both vectors and scalars.
-static bool CanEvaluateZExtd(Value *V, const Type *Ty, unsigned &BitsToClear) {
+static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
BitsToClear = 0;
if (isa<Constant>(V))
return true;
@@ -758,7 +759,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
return &CI;
Value *Src = CI.getOperand(0);
- const Type *SrcTy = Src->getType(), *DestTy = CI.getType();
+ Type *SrcTy = Src->getType(), *DestTy = CI.getType();
// Attempt to extend the entire input expression tree to the destination
// type. Only do this if the dest type is a simple type, don't convert the
@@ -820,7 +821,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
AndValue));
}
if (SrcSize > DstSize) {
- Value *Trunc = Builder->CreateTrunc(A, CI.getType(), "tmp");
+ Value *Trunc = Builder->CreateTrunc(A, CI.getType());
APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
return BinaryOperator::CreateAnd(Trunc,
ConstantInt::get(Trunc->getType(),
@@ -867,7 +868,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
Value *TI0 = TI->getOperand(0);
if (TI0->getType() == CI.getType()) {
Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
- Value *NewAnd = Builder->CreateAnd(TI0, ZC, "tmp");
+ Value *NewAnd = Builder->CreateAnd(TI0, ZC);
return BinaryOperator::CreateXor(NewAnd, ZC);
}
}
@@ -900,7 +901,7 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
Op0->getType()->getScalarSizeInBits()-1);
Value *In = Builder->CreateAShr(Op0, Sh, Op0->getName()+".lobit");
if (In->getType() != CI.getType())
- In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/, "tmp");
+ In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/);
if (Pred == ICmpInst::ICMP_SGT)
In = Builder->CreateNot(In, In->getName()+".not");
@@ -965,10 +966,10 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
}
// vector (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed.
- if (const VectorType *VTy = dyn_cast<VectorType>(CI.getType())) {
+ if (VectorType *VTy = dyn_cast<VectorType>(CI.getType())) {
if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_Zero()) &&
Op0->getType() == CI.getType()) {
- const Type *EltTy = VTy->getElementType();
+ Type *EltTy = VTy->getElementType();
// splat the shift constant to a constant vector.
Constant *VSh = ConstantInt::get(VTy, EltTy->getScalarSizeInBits()-1);
@@ -988,7 +989,7 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
///
/// This function works on both vectors and scalars.
///
-static bool CanEvaluateSExtd(Value *V, const Type *Ty) {
+static bool CanEvaluateSExtd(Value *V, Type *Ty) {
assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
"Can't sign extend type to a smaller type");
// If this is a constant, it can be trivially promoted.
@@ -1063,7 +1064,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
return &CI;
Value *Src = CI.getOperand(0);
- const Type *SrcTy = Src->getType(), *DestTy = CI.getType();
+ Type *SrcTy = Src->getType(), *DestTy = CI.getType();
// Attempt to extend the entire input expression tree to the destination
// type. Only do this if the dest type is a simple type, don't convert the
@@ -1192,7 +1193,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
case Instruction::FMul:
case Instruction::FDiv:
case Instruction::FRem:
- const Type *SrcTy = OpI->getType();
+ Type *SrcTy = OpI->getType();
Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0));
Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1));
if (LHSTrunc->getType() != SrcTy &&
@@ -1306,13 +1307,13 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
if (CI.getOperand(0)->getType()->getScalarSizeInBits() >
TD->getPointerSizeInBits()) {
Value *P = Builder->CreateTrunc(CI.getOperand(0),
- TD->getIntPtrType(CI.getContext()), "tmp");
+ TD->getIntPtrType(CI.getContext()));
return new IntToPtrInst(P, CI.getType());
}
if (CI.getOperand(0)->getType()->getScalarSizeInBits() <
TD->getPointerSizeInBits()) {
Value *P = Builder->CreateZExt(CI.getOperand(0),
- TD->getIntPtrType(CI.getContext()), "tmp");
+ TD->getIntPtrType(CI.getContext()));
return new IntToPtrInst(P, CI.getType());
}
}
@@ -1351,7 +1352,7 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
// Get the base pointer input of the bitcast, and the type it points to.
Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0);
- const Type *GEPIdxTy =
+ Type *GEPIdxTy =
cast<PointerType>(OrigBase->getType())->getElementType();
SmallVector<Value*, 8> NewIndices;
if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices)) {
@@ -1359,9 +1360,8 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
// and bitcast the result. This eliminates one bitcast, potentially
// two.
Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ?
- Builder->CreateInBoundsGEP(OrigBase,
- NewIndices.begin(), NewIndices.end()) :
- Builder->CreateGEP(OrigBase, NewIndices.begin(), NewIndices.end());
+ Builder->CreateInBoundsGEP(OrigBase, NewIndices) :
+ Builder->CreateGEP(OrigBase, NewIndices);
NGEP->takeName(GEP);
if (isa<BitCastInst>(CI))
@@ -1382,14 +1382,12 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
if (TD) {
if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
- TD->getIntPtrType(CI.getContext()),
- "tmp");
+ TD->getIntPtrType(CI.getContext()));
return new TruncInst(P, CI.getType());
}
if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) {
Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
- TD->getIntPtrType(CI.getContext()),
- "tmp");
+ TD->getIntPtrType(CI.getContext()));
return new ZExtInst(P, CI.getType());
}
}
@@ -1402,12 +1400,12 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
/// replace it with a shuffle (and vector/vector bitcast) if possible.
///
/// The source and destination vector types may have different element types.
-static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy,
+static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy,
InstCombiner &IC) {
// We can only do this optimization if the output is a multiple of the input
// element size, or the input is a multiple of the output element size.
// Convert the input type to have the same element type as the output.
- const VectorType *SrcTy = cast<VectorType>(InVal->getType());
+ VectorType *SrcTy = cast<VectorType>(InVal->getType());
if (SrcTy->getElementType() != DestTy->getElementType()) {
// The input types don't need to be identical, but for now they must be the
@@ -1427,7 +1425,7 @@ static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy,
// size of the input.
SmallVector<Constant*, 16> ShuffleMask;
Value *V2;
- const IntegerType *Int32Ty = Type::getInt32Ty(SrcTy->getContext());
+ IntegerType *Int32Ty = Type::getInt32Ty(SrcTy->getContext());
if (SrcTy->getNumElements() > DestTy->getNumElements()) {
// If we're shrinking the number of elements, just shuffle in the low
@@ -1453,11 +1451,11 @@ static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy,
return new ShuffleVectorInst(InVal, V2, ConstantVector::get(ShuffleMask));
}
-static bool isMultipleOfTypeSize(unsigned Value, const Type *Ty) {
+static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
return Value % Ty->getPrimitiveSizeInBits() == 0;
}
-static unsigned getTypeSizeIndex(unsigned Value, const Type *Ty) {
+static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
return Value / Ty->getPrimitiveSizeInBits();
}
@@ -1471,7 +1469,7 @@ static unsigned getTypeSizeIndex(unsigned Value, const Type *Ty) {
/// filling in Elements with the elements found here.
static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
SmallVectorImpl<Value*> &Elements,
- const Type *VecEltTy) {
+ Type *VecEltTy) {
// Undef values never contribute useful bits to the result.
if (isa<UndefValue>(V)) return true;
@@ -1508,7 +1506,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
C->getType()->getPrimitiveSizeInBits()));
unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
- const Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
+ Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
for (unsigned i = 0; i != NumElts; ++i) {
Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
@@ -1572,7 +1570,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
/// Into two insertelements that do "buildvector{%inc, %inc5}".
static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
InstCombiner &IC) {
- const VectorType *DestVecTy = cast<VectorType>(CI.getType());
+ VectorType *DestVecTy = cast<VectorType>(CI.getType());
Value *IntInput = CI.getOperand(0);
SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
@@ -1599,7 +1597,7 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
/// bitcast. The various long double bitcasts can't get in here.
static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
Value *Src = CI.getOperand(0);
- const Type *DestTy = CI.getType();
+ Type *DestTy = CI.getType();
// If this is a bitcast from int to float, check to see if the int is an
// extraction from a vector.
@@ -1607,7 +1605,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
// bitcast(trunc(bitcast(somevector)))
if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) &&
isa<VectorType>(VecInput->getType())) {
- const VectorType *VecTy = cast<VectorType>(VecInput->getType());
+ VectorType *VecTy = cast<VectorType>(VecInput->getType());
unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) {
@@ -1628,7 +1626,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)),
m_ConstantInt(ShAmt)))) &&
isa<VectorType>(VecInput->getType())) {
- const VectorType *VecTy = cast<VectorType>(VecInput->getType());
+ VectorType *VecTy = cast<VectorType>(VecInput->getType());
unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 &&
ShAmt->getZExtValue() % DestWidth == 0) {
@@ -1651,18 +1649,18 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// If the operands are integer typed then apply the integer transforms,
// otherwise just apply the common ones.
Value *Src = CI.getOperand(0);
- const Type *SrcTy = Src->getType();
- const Type *DestTy = CI.getType();
+ Type *SrcTy = Src->getType();
+ Type *DestTy = CI.getType();
// Get rid of casts from one type to the same type. These are useless and can
// be replaced by the operand.
if (DestTy == Src->getType())
return ReplaceInstUsesWith(CI, Src);
- if (const PointerType *DstPTy = dyn_cast<PointerType>(DestTy)) {
- const PointerType *SrcPTy = cast<PointerType>(SrcTy);
- const Type *DstElTy = DstPTy->getElementType();
- const Type *SrcElTy = SrcPTy->getElementType();
+ if (PointerType *DstPTy = dyn_cast<PointerType>(DestTy)) {
+ PointerType *SrcPTy = cast<PointerType>(SrcTy);
+ Type *DstElTy = DstPTy->getElementType();
+ Type *SrcElTy = SrcPTy->getElementType();
// If the address spaces don't match, don't eliminate the bitcast, which is
// required for changing types.
@@ -1693,7 +1691,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// If we found a path from the src to dest, create the getelementptr now.
if (SrcElTy == DstElTy) {
SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt);
- return GetElementPtrInst::CreateInBounds(Src, Idxs.begin(), Idxs.end());
+ return GetElementPtrInst::CreateInBounds(Src, Idxs);
}
}
@@ -1702,7 +1700,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this))
return I;
- if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
+ if (VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) {
Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType());
return InsertElementInst::Create(UndefValue::get(DestTy), Elem,
@@ -1731,7 +1729,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
}
}
- if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) {
+ if (VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) {
if (SrcVTy->getNumElements() == 1 && !DestTy->isVectorTy()) {
Value *Elem =
Builder->CreateExtractElement(Src,
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index c78760b..bb1cbfa 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -13,6 +13,7 @@
#include "InstCombine.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Target/TargetData.h"
@@ -56,7 +57,7 @@ static bool AddWithOverflow(Constant *&Result, Constant *In1,
Constant *In2, bool IsSigned = false) {
Result = ConstantExpr::getAdd(In1, In2);
- if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
+ if (VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i);
if (HasAddOverflow(ExtractElement(Result, Idx),
@@ -78,7 +79,7 @@ static bool HasSubOverflow(ConstantInt *Result,
bool IsSigned) {
if (!IsSigned)
return Result->getValue().ugt(In1->getValue());
-
+
if (In2->isNegative())
return Result->getValue().slt(In1->getValue());
@@ -91,7 +92,7 @@ static bool SubWithOverflow(Constant *&Result, Constant *In1,
Constant *In2, bool IsSigned = false) {
Result = ConstantExpr::getSub(In1, In2);
- if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
+ if (VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i);
if (HasSubOverflow(ExtractElement(Result, Idx),
@@ -128,7 +129,7 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS,
// True if LHS u> RHS and RHS == high-bit-mask - 1
TrueIfSigned = true;
return RHS->isMaxValue(true);
- case ICmpInst::ICMP_UGE:
+ case ICmpInst::ICMP_UGE:
// True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
TrueIfSigned = true;
return RHS->getValue().isSignBit();
@@ -143,7 +144,7 @@ static bool isHighOnes(const ConstantInt *CI) {
return (~CI->getValue() + 1).isPowerOf2();
}
-/// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a
+/// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a
/// set of known zero and one bits, compute the maximum and minimum values that
/// could have the specified known zero and known one bits, returning them in
/// min/max.
@@ -160,7 +161,7 @@ static void ComputeSignedMinMaxValuesFromKnownBits(const APInt& KnownZero,
// bit if it is unknown.
Min = KnownOne;
Max = KnownOne|UnknownBits;
-
+
if (UnknownBits.isNegative()) { // Sign bit is unknown
Min.setBit(Min.getBitWidth()-1);
Max.clearBit(Max.getBitWidth()-1);
@@ -179,7 +180,7 @@ static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
KnownZero.getBitWidth() == Max.getBitWidth() &&
"Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth.");
APInt UnknownBits = ~(KnownZero|KnownOne);
-
+
// The minimum value is when the unknown bits are all zeros.
Min = KnownOne;
// The maximum value is when the unknown bits are all ones.
@@ -201,10 +202,10 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
CmpInst &ICI, ConstantInt *AndCst) {
// We need TD information to know the pointer size unless this is inbounds.
if (!GEP->isInBounds() && TD == 0) return 0;
-
+
ConstantArray *Init = dyn_cast<ConstantArray>(GV->getInitializer());
if (Init == 0 || Init->getNumOperands() > 1024) return 0;
-
+
// There are many forms of this optimization we can handle, for now, just do
// the simple index into a single-dimensional array.
//
@@ -219,31 +220,31 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// type they index. Collect the indices. This is typically for arrays of
// structs.
SmallVector<unsigned, 4> LaterIndices;
-
- const Type *EltTy = cast<ArrayType>(Init->getType())->getElementType();
+
+ Type *EltTy = cast<ArrayType>(Init->getType())->getElementType();
for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) {
ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
if (Idx == 0) return 0; // Variable index.
-
+
uint64_t IdxVal = Idx->getZExtValue();
if ((unsigned)IdxVal != IdxVal) return 0; // Too large array index.
-
- if (const StructType *STy = dyn_cast<StructType>(EltTy))
+
+ if (StructType *STy = dyn_cast<StructType>(EltTy))
EltTy = STy->getElementType(IdxVal);
- else if (const ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
+ else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
if (IdxVal >= ATy->getNumElements()) return 0;
EltTy = ATy->getElementType();
} else {
return 0; // Unknown type.
}
-
+
LaterIndices.push_back(IdxVal);
}
-
+
enum { Overdefined = -3, Undefined = -2 };
// Variables for our state machines.
-
+
// FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form
// "i == 47 | i == 87", where 47 is the first index the condition is true for,
// and 87 is the second (and last) index. FirstTrueElement is -2 when
@@ -254,7 +255,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// FirstFalseElement/SecondFalseElement - Used to emit a comparison of the
// form "i != 47 & i != 87". Same state transitions as for true elements.
int FirstFalseElement = Undefined, SecondFalseElement = Undefined;
-
+
/// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these
/// define a state machine that triggers for ranges of values that the index
/// is true or false for. This triggers on things like "abbbbc"[i] == 'b'.
@@ -262,25 +263,25 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
/// index in the range (inclusive). We use -2 for undefined here because we
/// use relative comparisons and don't want 0-1 to match -1.
int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined;
-
+
// MagicBitvector - This is a magic bitvector where we set a bit if the
// comparison is true for element 'i'. If there are 64 elements or less in
// the array, this will fully represent all the comparison results.
uint64_t MagicBitvector = 0;
-
-
+
+
// Scan the array and see if one of our patterns matches.
Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) {
Constant *Elt = Init->getOperand(i);
-
+
// If this is indexing an array of structures, get the structure element.
if (!LaterIndices.empty())
Elt = ConstantExpr::getExtractValue(Elt, LaterIndices);
-
+
// If the element is masked, handle it.
if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst);
-
+
// Find out if the comparison would be true or false for the i'th element.
Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt,
CompareRHS, TD);
@@ -294,15 +295,15 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
FalseRangeEnd = i;
continue;
}
-
+
// If we can't compute the result for any of the elements, we have to give
// up evaluating the entire conditional.
if (!isa<ConstantInt>(C)) return 0;
-
+
// Otherwise, we know if the comparison is true or false for this element,
// update our state machines.
bool IsTrueForElt = !cast<ConstantInt>(C)->isZero();
-
+
// State machine for single/double/range index comparison.
if (IsTrueForElt) {
// Update the TrueElement state machine.
@@ -314,7 +315,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
SecondTrueElement = i;
else
SecondTrueElement = Overdefined;
-
+
// Update range state machine.
if (TrueRangeEnd == (int)i-1)
TrueRangeEnd = i;
@@ -331,7 +332,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
SecondFalseElement = i;
else
SecondFalseElement = Overdefined;
-
+
// Update range state machine.
if (FalseRangeEnd == (int)i-1)
FalseRangeEnd = i;
@@ -339,12 +340,12 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
FalseRangeEnd = Overdefined;
}
}
-
-
+
+
// If this element is in range, update our magic bitvector.
if (i < 64 && IsTrueForElt)
MagicBitvector |= 1ULL << i;
-
+
// If all of our states become overdefined, bail out early. Since the
// predicate is expensive, only check it every 8 elements. This is only
// really useful for really huge arrays.
@@ -364,20 +365,20 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
if (!GEP->isInBounds() &&
Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits())
Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext()));
-
+
// If the comparison is only true for one or two elements, emit direct
// comparisons.
if (SecondTrueElement != Overdefined) {
// None true -> false.
if (FirstTrueElement == Undefined)
return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(GEP->getContext()));
-
+
Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement);
-
+
// True for one element -> 'i == 47'.
if (SecondTrueElement == Undefined)
return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx);
-
+
// True for two elements -> 'i == 47 | i == 72'.
Value *C1 = Builder->CreateICmpEQ(Idx, FirstTrueIdx);
Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement);
@@ -391,36 +392,36 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// None false -> true.
if (FirstFalseElement == Undefined)
return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(GEP->getContext()));
-
+
Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement);
// False for one element -> 'i != 47'.
if (SecondFalseElement == Undefined)
return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx);
-
+
// False for two elements -> 'i != 47 & i != 72'.
Value *C1 = Builder->CreateICmpNE(Idx, FirstFalseIdx);
Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement);
Value *C2 = Builder->CreateICmpNE(Idx, SecondFalseIdx);
return BinaryOperator::CreateAnd(C1, C2);
}
-
+
// If the comparison can be replaced with a range comparison for the elements
// where it is true, emit the range check.
if (TrueRangeEnd != Overdefined) {
assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare");
-
+
// Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
if (FirstTrueElement) {
Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement);
Idx = Builder->CreateAdd(Idx, Offs);
}
-
+
Value *End = ConstantInt::get(Idx->getType(),
TrueRangeEnd-FirstTrueElement+1);
return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End);
}
-
+
// False range check.
if (FalseRangeEnd != Overdefined) {
assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare");
@@ -429,19 +430,19 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
Idx = Builder->CreateAdd(Idx, Offs);
}
-
+
Value *End = ConstantInt::get(Idx->getType(),
FalseRangeEnd-FirstFalseElement);
return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End);
}
-
-
+
+
// If a 32-bit or 64-bit magic bitvector captures the entire comparison state
// of this load, replace it with computation that does:
// ((magic_cst >> i) & 1) != 0
if (Init->getNumOperands() <= 32 ||
(TD && Init->getNumOperands() <= 64 && TD->isLegalInteger(64))) {
- const Type *Ty;
+ Type *Ty;
if (Init->getNumOperands() <= 32)
Ty = Type::getInt32Ty(Init->getContext());
else
@@ -451,7 +452,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V);
return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
}
-
+
return 0;
}
@@ -465,11 +466,11 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
/// to generate the first by knowing that pointer arithmetic doesn't overflow.
///
/// If we can't emit an optimized form for this expression, this returns null.
-///
+///
static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
TargetData &TD = *IC.getTargetData();
gep_type_iterator GTI = gep_type_begin(GEP);
-
+
// Check to see if this gep only has a single variable index. If so, and if
// any constant indices are a multiple of its scale, then we can compute this
// in terms of the scale of the variable index. For example, if the GEP
@@ -481,9 +482,9 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
// Compute the aggregate offset of constant indices.
if (CI->isZero()) continue;
-
+
// Handle a struct index, which adds its field offset to the pointer.
- if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
} else {
uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
@@ -494,33 +495,33 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
break;
}
}
-
+
// If there are no variable indices, we must have a constant offset, just
// evaluate it the general way.
if (i == e) return 0;
-
+
Value *VariableIdx = GEP->getOperand(i);
// Determine the scale factor of the variable element. For example, this is
// 4 if the variable index is into an array of i32.
uint64_t VariableScale = TD.getTypeAllocSize(GTI.getIndexedType());
-
+
// Verify that there are no other variable indices. If so, emit the hard way.
for (++i, ++GTI; i != e; ++i, ++GTI) {
ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i));
if (!CI) return 0;
-
+
// Compute the aggregate offset of constant indices.
if (CI->isZero()) continue;
-
+
// Handle a struct index, which adds its field offset to the pointer.
- if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
} else {
uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
Offset += Size*CI->getSExtValue();
}
}
-
+
// Okay, we know we have a single variable index, which must be a
// pointer/array/vector index. If there is no offset, life is simple, return
// the index.
@@ -530,19 +531,19 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
// we don't need to bother extending: the extension won't affect where the
// computation crosses zero.
if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) {
- const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
+ Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy);
}
return VariableIdx;
}
-
+
// Otherwise, there is an index. The computation we will do will be modulo
// the pointer size, so get it.
uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
-
+
Offset &= PtrSizeMask;
VariableScale &= PtrSizeMask;
-
+
// To do this transformation, any constant index must be a multiple of the
// variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i",
// but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a
@@ -550,9 +551,9 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
int64_t NewOffs = Offset / (int64_t)VariableScale;
if (Offset != NewOffs*(int64_t)VariableScale)
return 0;
-
+
// Okay, we can do this evaluation. Start by converting the index to intptr.
- const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
+ Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
if (VariableIdx->getType() != IntPtrTy)
VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy,
true /*Signed*/);
@@ -576,7 +577,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
// know pointers can't overflow since the gep is inbounds. See if we can
// output an optimized form.
Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this);
-
+
// If not, synthesize the offset the hard way.
if (Offset == 0)
Offset = EmitGEPOffset(GEPLHS);
@@ -686,7 +687,7 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
bool isTrue = ICmpInst::isTrueWhenEqual(Pred);
return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue));
}
-
+
// (X+4) == X -> false.
if (Pred == ICmpInst::ICMP_EQ)
return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext()));
@@ -698,22 +699,22 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
// From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
// so the values can never be equal. Similarly for all other "or equals"
// operators.
-
+
// (X+1) <u X --> X >u (MAXUINT-1) --> X == 255
// (X+2) <u X --> X >u (MAXUINT-2) --> X > 253
// (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0
if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
- Value *R =
+ Value *R =
ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI);
return new ICmpInst(ICmpInst::ICMP_UGT, X, R);
}
-
+
// (X+1) >u X --> X <u (0-1) --> X != 255
// (X+2) >u X --> X <u (0-2) --> X <u 254
// (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0
if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI));
-
+
unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits();
ConstantInt *SMax = ConstantInt::get(X->getContext(),
APInt::getSignedMaxValue(BitWidth));
@@ -726,14 +727,14 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
// (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127
if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI));
-
+
// (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127
// (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126
// (X+MAXSINT) >s X --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1
// (X+MINSINT) >s X --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2
// (X+ -2) >s X --> X <s (MAXSINT-(-2-1)) --> X <s -126
// (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128
-
+
assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
Constant *C = ConstantInt::get(X->getContext(), CI->getValue()-1);
return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C));
@@ -745,14 +746,14 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
ConstantInt *DivRHS) {
ConstantInt *CmpRHS = cast<ConstantInt>(ICI.getOperand(1));
const APInt &CmpRHSV = CmpRHS->getValue();
-
- // FIXME: If the operand types don't match the type of the divide
+
+ // FIXME: If the operand types don't match the type of the divide
// then don't attempt this transform. The code below doesn't have the
// logic to deal with a signed divide and an unsigned compare (and
- // vice versa). This is because (x /s C1) <s C2 produces different
+ // vice versa). This is because (x /s C1) <s C2 produces different
// results than (x /s C1) <u C2 or (x /u C1) <s C2 or even
- // (x /u C1) <u C2. Simply casting the operands and result won't
- // work. :( The if statement below tests that condition and bails
+ // (x /u C1) <u C2. Simply casting the operands and result won't
+ // work. :( The if statement below tests that condition and bails
// if it finds it.
bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv;
if (!ICI.isEquality() && DivIsSigned != ICI.isSigned())
@@ -768,14 +769,14 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
}
// Compute Prod = CI * DivRHS. We are essentially solving an equation
- // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and
- // C2 (CI). By solving for X we can turn this into a range check
- // instead of computing a divide.
+ // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and
+ // C2 (CI). By solving for X we can turn this into a range check
+ // instead of computing a divide.
Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS);
// Determine if the product overflows by seeing if the product is
// not equal to the divide. Make sure we do the same kind of divide
- // as in the LHS instruction that we're folding.
+ // as in the LHS instruction that we're folding.
bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) :
ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS;
@@ -785,9 +786,9 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
/// If the division is known to be exact, then there is no remainder from the
/// divide, so the covered range size is unit, otherwise it is the divisor.
ConstantInt *RangeSize = DivI->isExact() ? getOne(Prod) : DivRHS;
-
+
// Figure out the interval that is being checked. For example, a comparison
- // like "X /u 5 == 0" is really checking that X is in the interval [0, 5).
+ // like "X /u 5 == 0" is really checking that X is in the interval [0, 5).
// Compute this interval based on the constants involved and the signedness of
// the compare/divide. This computes a half-open interval, keeping track of
// whether either value in the interval overflows. After analysis each
@@ -805,7 +806,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
// to the same result value.
HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false);
}
-
+
} else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0.
if (CmpRHSV == 0) { // (X / pos) op 0
// Can't overflow. e.g. X/2 op 0 --> [-1, 2)
@@ -848,7 +849,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
if (!HiOverflow)
HiOverflow = SubWithOverflow(HiBound, Prod, RangeSize, true);
}
-
+
// Dividing by a negative swaps the condition. LT <-> GT
Pred = ICmpInst::getSwappedPredicate(Pred);
}
@@ -901,7 +902,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
ConstantInt *ShAmt) {
const APInt &CmpRHSV = cast<ConstantInt>(ICI.getOperand(1))->getValue();
-
+
// Check that the shift amount is in range. If not, don't perform
// undefined shifts. When the shift is visited it will be
// simplified.
@@ -909,48 +910,48 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
if (ShAmtVal >= TypeBits || ShAmtVal == 0)
return 0;
-
+
if (!ICI.isEquality()) {
// If we have an unsigned comparison and an ashr, we can't simplify this.
// Similarly for signed comparisons with lshr.
if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr))
return 0;
-
+
// Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv
// by a power of 2. Since we already have logic to simplify these,
// transform to div and then simplify the resultant comparison.
if (Shr->getOpcode() == Instruction::AShr &&
(!Shr->isExact() || ShAmtVal == TypeBits - 1))
return 0;
-
+
// Revisit the shift (to delete it).
Worklist.Add(Shr);
-
+
Constant *DivCst =
ConstantInt::get(Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal));
-
+
Value *Tmp =
Shr->getOpcode() == Instruction::AShr ?
Builder->CreateSDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()) :
Builder->CreateUDiv(Shr->getOperand(0), DivCst, "", Shr->isExact());
-
+
ICI.setOperand(0, Tmp);
-
+
// If the builder folded the binop, just return it.
BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp);
if (TheDiv == 0)
return &ICI;
-
+
// Otherwise, fold this div/compare.
assert(TheDiv->getOpcode() == Instruction::SDiv ||
TheDiv->getOpcode() == Instruction::UDiv);
-
+
Instruction *Res = FoldICmpDivCst(ICI, TheDiv, cast<ConstantInt>(DivCst));
assert(Res && "This div/cst should have folded!");
return Res;
}
-
-
+
+
// If we are comparing against bits always shifted out, the
// comparison cannot succeed.
APInt Comp = CmpRHSV << ShAmtVal;
@@ -959,25 +960,25 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
Comp = Comp.lshr(ShAmtVal);
else
Comp = Comp.ashr(ShAmtVal);
-
+
if (Comp != CmpRHSV) { // Comparing against a bit that we know is zero.
bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
Constant *Cst = ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
IsICMP_NE);
return ReplaceInstUsesWith(ICI, Cst);
}
-
+
// Otherwise, check to see if the bits shifted out are known to be zero.
// If so, we can compare against the unshifted value:
// (X & 4) >> 1 == 2 --> (X & 4) == 4.
if (Shr->hasOneUse() && Shr->isExact())
return new ICmpInst(ICI.getPredicate(), Shr->getOperand(0), ShiftedCmpRHS);
-
+
if (Shr->hasOneUse()) {
// Otherwise strength reduce the shift into an and.
APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
Constant *Mask = ConstantInt::get(ICI.getContext(), Val);
-
+
Value *And = Builder->CreateAnd(Shr->getOperand(0),
Mask, Shr->getName()+".mask");
return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS);
@@ -992,7 +993,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
Instruction *LHSI,
ConstantInt *RHS) {
const APInt &RHSV = RHS->getValue();
-
+
switch (LHSI->getOpcode()) {
case Instruction::Trunc:
if (ICI.isEquality() && LHSI->hasOneUse()) {
@@ -1003,7 +1004,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
APInt Mask(APInt::getHighBitsSet(SrcBits, SrcBits-DstBits));
APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0);
ComputeMaskedBits(LHSI->getOperand(0), Mask, KnownZero, KnownOne);
-
+
// If all the high bits are known, we can do this xform.
if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) {
// Pull in the high bits from known-ones set.
@@ -1014,7 +1015,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
}
}
break;
-
+
case Instruction::Xor: // (icmp pred (xor X, XorCST), CI)
if (ConstantInt *XorCST = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
// If this is a comparison that tests the signbit (X < 0) or (x > -1),
@@ -1022,7 +1023,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) ||
(ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) {
Value *CompareVal = LHSI->getOperand(0);
-
+
// If the sign bit of the XorCST is not set, there is no change to
// the operation, just stop using the Xor.
if (!XorCST->isNegative()) {
@@ -1030,13 +1031,13 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
Worklist.Add(LHSI);
return &ICI;
}
-
+
// Was the old condition true if the operand is positive?
bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT;
-
+
// If so, the new one isn't.
isTrueIfPositive ^= true;
-
+
if (isTrueIfPositive)
return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal,
SubOne(RHS));
@@ -1075,13 +1076,13 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
if (LHSI->hasOneUse() && isa<ConstantInt>(LHSI->getOperand(1)) &&
LHSI->getOperand(0)->hasOneUse()) {
ConstantInt *AndCST = cast<ConstantInt>(LHSI->getOperand(1));
-
+
// If the LHS is an AND of a truncating cast, we can widen the
// and/compare to be the input width without changing the value
// produced, eliminating a cast.
if (TruncInst *Cast = dyn_cast<TruncInst>(LHSI->getOperand(0))) {
// We can do this transformation if either the AND constant does not
- // have its sign bit set or if it is an equality comparison.
+ // have its sign bit set or if it is an equality comparison.
// Extending a relational comparison when we're checking the sign
// bit would not work.
if (ICI.isEquality() ||
@@ -1098,7 +1099,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
// If the LHS is an AND of a zext, and we have an equality compare, we can
// shrink the and/compare to the smaller type, eliminating the cast.
if (ZExtInst *Cast = dyn_cast<ZExtInst>(LHSI->getOperand(0))) {
- const IntegerType *Ty = cast<IntegerType>(Cast->getSrcTy());
+ IntegerType *Ty = cast<IntegerType>(Cast->getSrcTy());
// Make sure we don't compare the upper bits, SimplifyDemandedBits
// should fold the icmp to true/false in that case.
if (ICI.isEquality() && RHSV.getActiveBits() <= Ty->getBitWidth()) {
@@ -1118,12 +1119,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0));
if (Shift && !Shift->isShift())
Shift = 0;
-
+
ConstantInt *ShAmt;
ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : 0;
- const Type *Ty = Shift ? Shift->getType() : 0; // Type of the shift.
- const Type *AndTy = AndCST->getType(); // Type of the and.
-
+ Type *Ty = Shift ? Shift->getType() : 0; // Type of the shift.
+ Type *AndTy = AndCST->getType(); // Type of the and.
+
// We can fold this as long as we can't shift unknown bits
// into the mask. This can only happen with signed shift
// rights, as they sign-extend.
@@ -1134,20 +1135,20 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
// of the bits shifted in could be tested after the mask.
uint32_t TyBits = Ty->getPrimitiveSizeInBits();
int ShAmtVal = TyBits - ShAmt->getLimitedValue(TyBits);
-
+
uint32_t BitWidth = AndTy->getPrimitiveSizeInBits();
- if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) &
+ if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) &
AndCST->getValue()) == 0)
CanFold = true;
}
-
+
if (CanFold) {
Constant *NewCst;
if (Shift->getOpcode() == Instruction::Shl)
NewCst = ConstantExpr::getLShr(RHS, ShAmt);
else
NewCst = ConstantExpr::getShl(RHS, ShAmt);
-
+
// Check to see if we are shifting out any of the bits being
// compared.
if (ConstantExpr::get(Shift->getOpcode(),
@@ -1175,7 +1176,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
}
}
}
-
+
// Turn ((X >> Y) & C) == 0 into (X & (C << Y)) == 0. The later is
// preferable because it allows the C<<Y expression to be hoisted out
// of a loop if Y is invariant and X is not.
@@ -1185,21 +1186,21 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
// Compute C << Y.
Value *NS;
if (Shift->getOpcode() == Instruction::LShr) {
- NS = Builder->CreateShl(AndCST, Shift->getOperand(1), "tmp");
+ NS = Builder->CreateShl(AndCST, Shift->getOperand(1));
} else {
// Insert a logical shift.
- NS = Builder->CreateLShr(AndCST, Shift->getOperand(1), "tmp");
+ NS = Builder->CreateLShr(AndCST, Shift->getOperand(1));
}
-
+
// Compute X & (C << Y).
- Value *NewAnd =
+ Value *NewAnd =
Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName());
-
+
ICI.setOperand(0, NewAnd);
return &ICI;
}
}
-
+
// Try to optimize things like "A[i]&42 == 0" to index computations.
if (LoadInst *LI = dyn_cast<LoadInst>(LHSI->getOperand(0))) {
if (GetElementPtrInst *GEP =
@@ -1234,19 +1235,19 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
}
break;
}
-
+
case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI)
ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
if (!ShAmt) break;
-
+
uint32_t TypeBits = RHSV.getBitWidth();
-
+
// Check that the shift amount is in range. If not, don't perform
// undefined shifts. When the shift is visited it will be
// simplified.
if (ShAmt->uge(TypeBits))
break;
-
+
if (ICI.isEquality()) {
// If we are comparing against bits always shifted out, the
// comparison cannot succeed.
@@ -1259,34 +1260,34 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
ConstantInt::get(Type::getInt1Ty(ICI.getContext()), IsICMP_NE);
return ReplaceInstUsesWith(ICI, Cst);
}
-
+
// If the shift is NUW, then it is just shifting out zeros, no need for an
// AND.
if (cast<BinaryOperator>(LHSI)->hasNoUnsignedWrap())
return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
ConstantExpr::getLShr(RHS, ShAmt));
-
+
if (LHSI->hasOneUse()) {
// Otherwise strength reduce the shift into an and.
uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
Constant *Mask =
- ConstantInt::get(ICI.getContext(), APInt::getLowBitsSet(TypeBits,
+ ConstantInt::get(ICI.getContext(), APInt::getLowBitsSet(TypeBits,
TypeBits-ShAmtVal));
-
+
Value *And =
Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask");
return new ICmpInst(ICI.getPredicate(), And,
ConstantExpr::getLShr(RHS, ShAmt));
}
}
-
+
// Otherwise, if this is a comparison of the sign bit, simplify to and/test.
bool TrueIfSigned = false;
if (LHSI->hasOneUse() &&
isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) {
// (X << 31) <s 0 --> (X&1) != 0
Constant *Mask = ConstantInt::get(LHSI->getOperand(0)->getType(),
- APInt::getOneBitSet(TypeBits,
+ APInt::getOneBitSet(TypeBits,
TypeBits-ShAmt->getZExtValue()-1));
Value *And =
Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask");
@@ -1295,7 +1296,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
}
break;
}
-
+
case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI)
case Instruction::AShr: {
// Handle equality comparisons of shift-by-constant.
@@ -1312,13 +1313,13 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
}
break;
}
-
+
case Instruction::SDiv:
case Instruction::UDiv:
// Fold: icmp pred ([us]div X, C1), C2 -> range test
- // Fold this div into the comparison, producing a range check.
- // Determine, based on the divide type, what the range is being
- // checked. If there is an overflow on the low or high side, remember
+ // Fold this div into the comparison, producing a range check.
+ // Determine, based on the divide type, what the range is being
+ // checked. If there is an overflow on the low or high side, remember
// it, otherwise compute the range [low, hi) bounding the new value.
// See: InsertRangeTest above for the kinds of replacements possible.
if (ConstantInt *DivRHS = dyn_cast<ConstantInt>(LHSI->getOperand(1)))
@@ -1357,12 +1358,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
}
break;
}
-
+
// Simplify icmp_eq and icmp_ne instructions with integer constant RHS.
if (ICI.isEquality()) {
bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
-
- // If the first operand is (add|sub|and|or|xor|rem) with a constant, and
+
+ // If the first operand is (add|sub|and|or|xor|rem) with a constant, and
// the second operand is a constant, simplify a bit.
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(LHSI)) {
switch (BO->getOpcode()) {
@@ -1389,7 +1390,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
// Replace ((add A, B) != 0) with (A != -B) if A or B is
// efficiently invertible, or if the add has just this one use.
Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1);
-
+
if (Value *NegVal = dyn_castNegVal(BOp1))
return new ICmpInst(ICI.getPredicate(), BOp0, NegVal);
if (Value *NegVal = dyn_castNegVal(BOp0))
@@ -1432,11 +1433,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
Constant *NotCI = ConstantExpr::getNot(RHS);
if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue())
return ReplaceInstUsesWith(ICI,
- ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
+ ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
isICMP_NE));
}
break;
-
+
case Instruction::And:
if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
// If bits are being compared against that are and'd out, then the
@@ -1445,7 +1446,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
return ReplaceInstUsesWith(ICI,
ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
isICMP_NE));
-
+
// If we have ((X & C) == C), turn it into ((X & C) != 0).
if (RHS == BOC && RHSV.isPowerOf2())
return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ :
@@ -1460,16 +1461,16 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
if (BOC->getValue().isSignBit()) {
Value *X = BO->getOperand(0);
Constant *Zero = Constant::getNullValue(X->getType());
- ICmpInst::Predicate pred = isICMP_NE ?
+ ICmpInst::Predicate pred = isICMP_NE ?
ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
return new ICmpInst(pred, X, Zero);
}
-
+
// ((X & ~7) == 0) --> X < 8
if (RHSV == 0 && isHighOnes(BOC)) {
Value *X = BO->getOperand(0);
Constant *NegX = ConstantExpr::getNeg(BOC);
- ICmpInst::Predicate pred = isICMP_NE ?
+ ICmpInst::Predicate pred = isICMP_NE ?
ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
return new ICmpInst(pred, X, NegX);
}
@@ -1517,11 +1518,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
const CastInst *LHSCI = cast<CastInst>(ICI.getOperand(0));
Value *LHSCIOp = LHSCI->getOperand(0);
- const Type *SrcTy = LHSCIOp->getType();
- const Type *DestTy = LHSCI->getType();
+ Type *SrcTy = LHSCIOp->getType();
+ Type *DestTy = LHSCI->getType();
Value *RHSCIOp;
- // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
+ // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
// integer type is the same size as the pointer type.
if (TD && LHSCI->getOpcode() == Instruction::PtrToInt &&
TD->getPointerSizeInBits() ==
@@ -1539,7 +1540,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
if (RHSOp)
return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp);
}
-
+
// The code below only handles extension cast instructions, so far.
// Enforce this.
if (LHSCI->getOpcode() != Instruction::ZExt &&
@@ -1552,9 +1553,9 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
if (CastInst *CI = dyn_cast<CastInst>(ICI.getOperand(1))) {
// Not an extension from the same type?
RHSCIOp = CI->getOperand(0);
- if (RHSCIOp->getType() != LHSCIOp->getType())
+ if (RHSCIOp->getType() != LHSCIOp->getType())
return 0;
-
+
// If the signedness of the two casts doesn't agree (i.e. one is a sext
// and the other is a zext), then we can't handle this.
if (CI->getOpcode() != LHSCI->getOpcode())
@@ -1599,7 +1600,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, Res1);
}
- // The re-extended constant changed so the constant cannot be represented
+ // The re-extended constant changed so the constant cannot be represented
// in the shorter type. Consequently, we cannot emit a simple comparison.
// All the cases that fold to true or false will have already been handled
// by SimplifyICmpInst, so only deal with the tricky case.
@@ -1637,26 +1638,26 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
// llvm.sadd.with.overflow. To do this, we have to replace the original add
// with a narrower add, and discard the add-with-constant that is part of the
// range check (if we can't eliminate it, this isn't profitable).
-
+
// In order to eliminate the add-with-constant, the compare can be its only
// use.
Instruction *AddWithCst = cast<Instruction>(I.getOperand(0));
if (!AddWithCst->hasOneUse()) return 0;
-
+
// If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow.
if (!CI2->getValue().isPowerOf2()) return 0;
unsigned NewWidth = CI2->getValue().countTrailingZeros();
if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return 0;
-
+
// The width of the new add formed is 1 more than the bias.
++NewWidth;
-
+
// Check to see that CI1 is an all-ones value with NewWidth bits.
if (CI1->getBitWidth() == NewWidth ||
CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
return 0;
-
- // In order to replace the original add with a narrower
+
+ // In order to replace the original add with a narrower
// llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
// and truncates that discard the high bits of the add. Verify that this is
// the case.
@@ -1664,7 +1665,7 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
for (Value::use_iterator UI = OrigAdd->use_begin(), E = OrigAdd->use_end();
UI != E; ++UI) {
if (*UI == AddWithCst) continue;
-
+
// Only accept truncates for now. We would really like a nice recursive
// predicate like SimplifyDemandedBits, but which goes downwards the use-def
// chain to see which bits of a value are actually demanded. If the
@@ -1674,32 +1675,32 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
if (TI == 0 ||
TI->getType()->getPrimitiveSizeInBits() > NewWidth) return 0;
}
-
+
// If the pattern matches, truncate the inputs to the narrower type and
// use the sadd_with_overflow intrinsic to efficiently compute both the
// result and the overflow bit.
Module *M = I.getParent()->getParent()->getParent();
-
+
Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow,
NewType);
InstCombiner::BuilderTy *Builder = IC.Builder;
-
+
// Put the new code above the original add, in case there are any uses of the
// add between the add and the compare.
Builder->SetInsertPoint(OrigAdd);
-
+
Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName()+".trunc");
Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName()+".trunc");
CallInst *Call = Builder->CreateCall2(F, TruncA, TruncB, "sadd");
Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result");
Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType());
-
+
// The inner add was the result of the narrow add, zero extended to the
// wider type. Replace it with the result computed by the intrinsic.
IC.ReplaceInstUsesWith(*OrigAdd, ZExt);
-
+
// The original icmp gets replaced with the overflow value.
return ExtractValueInst::Create(Call, 1, "sadd.overflow");
}
@@ -1709,13 +1710,13 @@ static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV,
// Don't bother doing this transformation for pointers, don't do it for
// vectors.
if (!isa<IntegerType>(OrigAddV->getType())) return 0;
-
+
// If the add is a constant expr, then we don't bother transforming it.
Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV);
if (OrigAdd == 0) return 0;
-
+
Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1);
-
+
// Put the new code above the original add, in case there are any uses of the
// add between the add and the compare.
InstCombiner::BuilderTy *Builder = IC.Builder;
@@ -1740,13 +1741,13 @@ static APInt DemandedBitsLHSMask(ICmpInst &I,
unsigned BitWidth, bool isSignCheck) {
if (isSignCheck)
return APInt::getSignBit(BitWidth);
-
+
ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1));
if (!CI) return APInt::getAllOnesValue(BitWidth);
const APInt &RHS = CI->getValue();
-
+
switch (I.getPredicate()) {
- // For a UGT comparison, we don't care about any bits that
+ // For a UGT comparison, we don't care about any bits that
// correspond to the trailing ones of the comparand. The value of these
// bits doesn't impact the outcome of the comparison, because any value
// greater than the RHS must differ in a bit higher than these due to carry.
@@ -1755,7 +1756,7 @@ static APInt DemandedBitsLHSMask(ICmpInst &I,
APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingOnes);
return ~lowBitsSet;
}
-
+
// Similarly, for a ULT comparison, we don't care about the trailing zeros.
// Any value less than the RHS must differ in a higher bit because of carries.
case ICmpInst::ICMP_ULT: {
@@ -1763,17 +1764,17 @@ static APInt DemandedBitsLHSMask(ICmpInst &I,
APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingZeros);
return ~lowBitsSet;
}
-
+
default:
return APInt::getAllOnesValue(BitWidth);
}
-
+
}
Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
bool Changed = false;
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
+
/// Orders the operands of the compare so that they are listed from most
/// complex to least complex. This puts constants before unary operators,
/// before binary operators.
@@ -1782,11 +1783,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
std::swap(Op0, Op1);
Changed = true;
}
-
+
if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD))
return ReplaceInstUsesWith(I, V);
-
- const Type *Ty = Op0->getType();
+
+ Type *Ty = Op0->getType();
// icmp's with boolean values can always be turned into bitwise operations
if (Ty->isIntegerTy(1)) {
@@ -1835,13 +1836,13 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
BitWidth = Ty->getScalarSizeInBits();
else if (TD) // Pointers require TD info to get their size.
BitWidth = TD->getTypeSizeInBits(Ty->getScalarType());
-
+
bool isSignBit = false;
// See if we are doing a comparison with a constant.
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
Value *A = 0, *B = 0;
-
+
// Match the following pattern, which is a common idiom when writing
// overflow-safe integer arithmetic function. The source performs an
// addition in wider type, and explicitly checks for overflow using
@@ -1849,9 +1850,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// sadd_with_overflow intrinsic.
//
// TODO: This could probably be generalized to handle other overflow-safe
- // operations if we worked out the formulas to compute the appropriate
+ // operations if we worked out the formulas to compute the appropriate
// magic constants.
- //
+ //
// sum = a + b
// if (sum+128 >u 255) ... -> llvm.sadd.with.overflow.i8
{
@@ -1861,14 +1862,14 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (Instruction *Res = ProcessUGT_ADDCST_ADD(I, A, B, CI2, CI, *this))
return Res;
}
-
+
// (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B)
if (I.isEquality() && CI->isZero() &&
match(Op0, m_Sub(m_Value(A), m_Value(B)))) {
// (icmp cond A B) if cond is equality
return new ICmpInst(I.getPredicate(), A, B);
}
-
+
// If we have an icmp le or icmp ge instruction, turn it into the
// appropriate icmp lt or icmp gt instruction. This allows us to rely on
// them being folded in the code below. The SimplifyICmpInst code has
@@ -1892,7 +1893,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
ConstantInt::get(CI->getContext(), CI->getValue()-1));
}
-
+
// If this comparison is a normal comparison, it demands all
// bits, if it is a sign bit comparison, it only demands the sign bit.
bool UnusedBit;
@@ -1948,7 +1949,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
case ICmpInst::ICMP_EQ: {
if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
-
+
// If all bits are known zero except for one, then we know at most one
// bit is set. If the comparison is against zero, then this is a check
// to see if *that* bit is set.
@@ -1960,7 +1961,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) ||
LHSC->getValue() != Op0KnownZeroInverted)
LHS = Op0;
-
+
// If the LHS is 1 << x, and we know the result is a power of 2 like 8,
// then turn "((1 << x)&8) == 0" into "x != 3".
Value *X = 0;
@@ -1969,7 +1970,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
return new ICmpInst(ICmpInst::ICMP_NE, X,
ConstantInt::get(X->getType(), CmpVal));
}
-
+
// If the LHS is 8 >>u x, and we know the result is a power of 2 like 1,
// then turn "((8 >>u x)&1) == 0" into "x != 3".
const APInt *CI;
@@ -1979,13 +1980,13 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
ConstantInt::get(X->getType(),
CI->countTrailingZeros()));
}
-
+
break;
}
case ICmpInst::ICMP_NE: {
if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
-
+
// If all bits are known zero except for one, then we know at most one
// bit is set. If the comparison is against zero, then this is a check
// to see if *that* bit is set.
@@ -1997,7 +1998,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) ||
LHSC->getValue() != Op0KnownZeroInverted)
LHS = Op0;
-
+
// If the LHS is 1 << x, and we know the result is a power of 2 like 8,
// then turn "((1 << x)&8) != 0" into "x == 3".
Value *X = 0;
@@ -2006,7 +2007,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
return new ICmpInst(ICmpInst::ICMP_EQ, X,
ConstantInt::get(X->getType(), CmpVal));
}
-
+
// If the LHS is 8 >>u x, and we know the result is a power of 2 like 1,
// then turn "((8 >>u x)&1) != 0" into "x == 3".
const APInt *CI;
@@ -2016,7 +2017,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
ConstantInt::get(X->getType(),
CI->countTrailingZeros()));
}
-
+
break;
}
case ICmpInst::ICMP_ULT:
@@ -2137,9 +2138,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// See if we are doing a comparison between a constant and an instruction that
// can be folded into the comparison.
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
- // Since the RHS is a ConstantInt (CI), if the left hand side is an
- // instruction, see if that instruction also has constants so that the
- // instruction can be folded into the icmp
+ // Since the RHS is a ConstantInt (CI), if the left hand side is an
+ // instruction, see if that instruction also has constants so that the
+ // instruction can be folded into the icmp
if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI))
return Res;
@@ -2194,7 +2195,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
case Instruction::IntToPtr:
// icmp pred inttoptr(X), null -> icmp pred X, 0
if (RHSC->isNullValue() && TD &&
- TD->getIntPtrType(RHSC->getContext()) ==
+ TD->getIntPtrType(RHSC->getContext()) ==
LHSI->getOperand(0)->getType())
return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
Constant::getNullValue(LHSI->getOperand(0)->getType()));
@@ -2227,8 +2228,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// values. If the ptr->ptr cast can be stripped off both arguments, we do so
// now.
if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) {
- if (Op0->getType()->isPointerTy() &&
- (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) {
+ if (Op0->getType()->isPointerTy() &&
+ (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) {
// We keep moving the cast from the left operand over to the right
// operand, where it can often be eliminated completely.
Op0 = CI->getOperand(0);
@@ -2250,7 +2251,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
return new ICmpInst(I.getPredicate(), Op0, Op1);
}
}
-
+
if (isa<CastInst>(Op0)) {
// Handle the special case of: icmp (cast bool to X), <cst>
// This comes up when you have code like
@@ -2384,7 +2385,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
return new ICmpInst(Pred, BO0->getOperand(0),
BO1->getOperand(0));
}
-
+
if (CI->isMaxValue(true)) {
ICmpInst::Predicate Pred = I.isSigned()
? I.getUnsignedPredicate()
@@ -2404,7 +2405,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// Mask = -1 >> count-trailing-zeros(Cst).
if (!CI->isZero() && !CI->isOne()) {
const APInt &AP = CI->getValue();
- ConstantInt *Mask = ConstantInt::get(I.getContext(),
+ ConstantInt *Mask = ConstantInt::get(I.getContext(),
APInt::getLowBitsSet(AP.getBitWidth(),
AP.getBitWidth() -
AP.countTrailingZeros()));
@@ -2438,7 +2439,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
}
}
-
+
{ Value *A, *B;
// ~x < ~y --> y < x
// ~x < cst --> ~cst < x
@@ -2452,11 +2453,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// (a+b) <u a --> llvm.uadd.with.overflow.
// (a+b) <u b --> llvm.uadd.with.overflow.
if (I.getPredicate() == ICmpInst::ICMP_ULT &&
- match(Op0, m_Add(m_Value(A), m_Value(B))) &&
+ match(Op0, m_Add(m_Value(A), m_Value(B))) &&
(Op1 == A || Op1 == B))
if (Instruction *R = ProcessUAddIdiom(I, Op0, *this))
return R;
-
+
// a >u (a+b) --> llvm.uadd.with.overflow.
// b >u (a+b) --> llvm.uadd.with.overflow.
if (I.getPredicate() == ICmpInst::ICMP_UGT &&
@@ -2465,7 +2466,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (Instruction *R = ProcessUAddIdiom(I, Op1, *this))
return R;
}
-
+
if (I.isEquality()) {
Value *A, *B, *C, *D;
@@ -2483,10 +2484,10 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) {
Constant *NC = ConstantInt::get(I.getContext(),
C1->getValue() ^ C2->getValue());
- Value *Xor = Builder->CreateXor(C, NC, "tmp");
+ Value *Xor = Builder->CreateXor(C, NC);
return new ICmpInst(I.getPredicate(), A, Xor);
}
-
+
// A^B == A^D -> B == D
if (A == C) return new ICmpInst(I.getPredicate(), B, D);
if (A == D) return new ICmpInst(I.getPredicate(), B, C);
@@ -2494,7 +2495,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (B == D) return new ICmpInst(I.getPredicate(), A, C);
}
}
-
+
if (match(Op1, m_Xor(m_Value(A), m_Value(B))) &&
(A == Op0 || B == Op0)) {
// A == (A^B) -> B == 0
@@ -2504,10 +2505,10 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
// (X&Z) == (Y&Z) -> (X^Y) & Z == 0
- if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) &&
+ if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) &&
match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) {
Value *X = 0, *Y = 0, *Z = 0;
-
+
if (A == C) {
X = B; Y = D; Z = A;
} else if (A == D) {
@@ -2517,16 +2518,16 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
} else if (B == D) {
X = A; Y = C; Z = B;
}
-
+
if (X) { // Build (X^Y) & Z
- Op1 = Builder->CreateXor(X, Y, "tmp");
- Op1 = Builder->CreateAnd(Op1, Z, "tmp");
+ Op1 = Builder->CreateXor(X, Y);
+ Op1 = Builder->CreateAnd(Op1, Z);
I.setOperand(0, Op1);
I.setOperand(1, Constant::getNullValue(Op1->getType()));
return &I;
}
}
-
+
// Transform "icmp eq (trunc (lshr(X, cst1)), cst" to
// "icmp (and X, mask), cst"
uint64_t ShAmt = 0;
@@ -2539,21 +2540,21 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// when it exposes other optimizations.
!A->hasOneUse()) {
unsigned ASize =cast<IntegerType>(A->getType())->getPrimitiveSizeInBits();
-
+
if (ShAmt < ASize) {
APInt MaskV =
APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits());
MaskV <<= ShAmt;
-
+
APInt CmpV = Cst1->getValue().zext(ASize);
CmpV <<= ShAmt;
-
+
Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV));
return new ICmpInst(I.getPredicate(), Mask, Builder->getInt(CmpV));
}
}
}
-
+
{
Value *X; ConstantInt *Cst;
// icmp X+Cst, X
@@ -2579,31 +2580,31 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
Constant *RHSC) {
if (!isa<ConstantFP>(RHSC)) return 0;
const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF();
-
+
// Get the width of the mantissa. We don't want to hack on conversions that
// might lose information from the integer, e.g. "i64 -> float"
int MantissaWidth = LHSI->getType()->getFPMantissaWidth();
if (MantissaWidth == -1) return 0; // Unknown.
-
+
// Check to see that the input is converted from an integer type that is small
// enough that preserves all bits. TODO: check here for "known" sign bits.
// This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
unsigned InputSize = LHSI->getOperand(0)->getType()->getScalarSizeInBits();
-
+
// If this is a uitofp instruction, we need an extra bit to hold the sign.
bool LHSUnsigned = isa<UIToFPInst>(LHSI);
if (LHSUnsigned)
++InputSize;
-
+
// If the conversion would lose info, don't hack on this.
if ((int)InputSize > MantissaWidth)
return 0;
-
+
// Otherwise, we can potentially simplify the comparison. We know that it
// will always come through as an integer value and we know the constant is
// not a NAN (it would have been previously simplified).
assert(!RHS.isNaN() && "NaN comparison not already folded!");
-
+
ICmpInst::Predicate Pred;
switch (I.getPredicate()) {
default: llvm_unreachable("Unexpected predicate!");
@@ -2636,15 +2637,15 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
case FCmpInst::FCMP_UNO:
return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
}
-
- const IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
-
+
+ IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
+
// Now we know that the APFloat is a normal number, zero or inf.
-
+
// See if the FP constant is too large for the integer. For example,
// comparing an i8 to 300.0.
unsigned IntWidth = IntTy->getScalarSizeInBits();
-
+
if (!LHSUnsigned) {
// If the RHS value is > SignedMax, fold the comparison. This handles +INF
// and large values.
@@ -2670,7 +2671,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
}
}
-
+
if (!LHSUnsigned) {
// See if the RHS value is < SignedMin.
APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false);
@@ -2766,7 +2767,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
bool Changed = false;
-
+
/// Orders the operands of the compare so that they are listed from most
/// complex to least complex. This puts constants before unary operators,
/// before binary operators.
@@ -2776,7 +2777,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
}
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
+
if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, TD))
return ReplaceInstUsesWith(I, V);
@@ -2792,7 +2793,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
I.setPredicate(FCmpInst::FCMP_UNO);
I.setOperand(1, Constant::getNullValue(Op0->getType()));
return &I;
-
+
case FCmpInst::FCMP_ORD: // True if ordered (no nans)
case FCmpInst::FCMP_OEQ: // True if ordered and equal
case FCmpInst::FCMP_OGE: // True if ordered and greater than or equal
@@ -2803,7 +2804,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
return &I;
}
}
-
+
// Handle fcmp with constant RHS
if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
@@ -2836,10 +2837,14 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
APFloat F = RHSF->getValueAPF();
F.convert(*Sem, APFloat::rmNearestTiesToEven, &Lossy);
- // Avoid lossy conversions and denormals.
+ // Avoid lossy conversions and denormals. Zero is a special case
+ // that's OK to convert.
+ APFloat Fabs = F;
+ Fabs.clearSign();
if (!Lossy &&
- F.compare(APFloat::getSmallestNormalized(*Sem)) !=
- APFloat::cmpLessThan)
+ ((Fabs.compare(APFloat::getSmallestNormalized(*Sem)) !=
+ APFloat::cmpLessThan) || Fabs.isZero()))
+
return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0),
ConstantFP::get(RHSC->getContext(), F));
break;
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index f499290..7446a51 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -26,7 +26,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// Ensure that the alloca array size argument has type intptr_t, so that
// any casting is exposed early.
if (TD) {
- const Type *IntPtrTy = TD->getIntPtrType(AI.getContext());
+ Type *IntPtrTy = TD->getIntPtrType(AI.getContext());
if (AI.getArraySize()->getType() != IntPtrTy) {
Value *V = Builder->CreateIntCast(AI.getArraySize(),
IntPtrTy, false);
@@ -38,7 +38,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
if (AI.isArrayAllocation()) { // Check C != 1
if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
- const Type *NewTy =
+ Type *NewTy =
ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!");
AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
@@ -58,8 +58,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
Idx[0] = NullIdx;
Idx[1] = NullIdx;
Instruction *GEP =
- GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2,
- New->getName()+".sub");
+ GetElementPtrInst::CreateInBounds(New, Idx, New->getName()+".sub");
InsertNewInstBefore(GEP, *It);
// Now make everything use the getelementptr instead of the original
@@ -92,28 +91,28 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
User *CI = cast<User>(LI.getOperand(0));
Value *CastOp = CI->getOperand(0);
- const PointerType *DestTy = cast<PointerType>(CI->getType());
- const Type *DestPTy = DestTy->getElementType();
- if (const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) {
+ PointerType *DestTy = cast<PointerType>(CI->getType());
+ Type *DestPTy = DestTy->getElementType();
+ if (PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) {
// If the address spaces don't match, don't eliminate the cast.
if (DestTy->getAddressSpace() != SrcTy->getAddressSpace())
return 0;
- const Type *SrcPTy = SrcTy->getElementType();
+ Type *SrcPTy = SrcTy->getElementType();
if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
DestPTy->isVectorTy()) {
// If the source is an array, the code below will not succeed. Check to
// see if a trivial 'gep P, 0, 0' will help matters. Only do this for
// constants.
- if (const ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy))
+ if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy))
if (Constant *CSrc = dyn_cast<Constant>(CastOp))
if (ASrcTy->getNumElements() != 0) {
Value *Idxs[2];
Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext()));
Idxs[1] = Idxs[0];
- CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2);
+ CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs);
SrcTy = cast<PointerType>(CastOp->getType());
SrcPTy = SrcTy->getElementType();
}
@@ -133,6 +132,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
LoadInst *NewLoad =
IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
NewLoad->setAlignment(LI.getAlignment());
+ NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope());
// Now cast the result of the load.
return new BitCastInst(NewLoad, LI.getType());
}
@@ -163,8 +163,9 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
return Res;
- // None of the following transforms are legal for volatile loads.
- if (LI.isVolatile()) return 0;
+ // None of the following transforms are legal for volatile/atomic loads.
+ // FIXME: Some of it is okay for atomic loads; needs refactoring.
+ if (!LI.isSimple()) return 0;
// Do really simple store-to-load forwarding and load CSE, to catch cases
// where there are several consecutive memory accesses to the same location,
@@ -256,11 +257,11 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
User *CI = cast<User>(SI.getOperand(1));
Value *CastOp = CI->getOperand(0);
- const Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
- const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
+ Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
+ PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
if (SrcTy == 0) return 0;
- const Type *SrcPTy = SrcTy->getElementType();
+ Type *SrcPTy = SrcTy->getElementType();
if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy())
return 0;
@@ -280,12 +281,12 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
NewGEPIndices.push_back(Zero);
while (1) {
- if (const StructType *STy = dyn_cast<StructType>(SrcPTy)) {
+ if (StructType *STy = dyn_cast<StructType>(SrcPTy)) {
if (!STy->getNumElements()) /* Struct can be empty {} */
break;
NewGEPIndices.push_back(Zero);
SrcPTy = STy->getElementType(0);
- } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) {
+ } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) {
NewGEPIndices.push_back(Zero);
SrcPTy = ATy->getElementType();
} else {
@@ -314,8 +315,8 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
Value *NewCast;
Value *SIOp0 = SI.getOperand(0);
Instruction::CastOps opcode = Instruction::BitCast;
- const Type* CastSrcTy = SIOp0->getType();
- const Type* CastDstTy = SrcPTy;
+ Type* CastSrcTy = SIOp0->getType();
+ Type* CastDstTy = SrcPTy;
if (CastDstTy->isPointerTy()) {
if (CastSrcTy->isIntegerTy())
opcode = Instruction::IntToPtr;
@@ -327,8 +328,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
// SIOp0 is a pointer to aggregate and this is a store to the first field,
// emit a GEP to index into its first field.
if (!NewGEPIndices.empty())
- CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices.begin(),
- NewGEPIndices.end());
+ CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices);
NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
SIOp0->getName()+".c");
@@ -370,21 +370,6 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
Value *Val = SI.getOperand(0);
Value *Ptr = SI.getOperand(1);
- // If the RHS is an alloca with a single use, zapify the store, making the
- // alloca dead.
- if (!SI.isVolatile()) {
- if (Ptr->hasOneUse()) {
- if (isa<AllocaInst>(Ptr))
- return EraseInstFromFunction(SI);
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
- if (isa<AllocaInst>(GEP->getOperand(0))) {
- if (GEP->getOperand(0)->hasOneUse())
- return EraseInstFromFunction(SI);
- }
- }
- }
- }
-
// Attempt to improve the alignment.
if (TD) {
unsigned KnownAlign =
@@ -400,6 +385,23 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
SI.setAlignment(EffectiveStoreAlign);
}
+ // Don't hack volatile/atomic stores.
+ // FIXME: Some bits are legal for atomic stores; needs refactoring.
+ if (!SI.isSimple()) return 0;
+
+ // If the RHS is an alloca with a single use, zapify the store, making the
+ // alloca dead.
+ if (Ptr->hasOneUse()) {
+ if (isa<AllocaInst>(Ptr))
+ return EraseInstFromFunction(SI);
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
+ if (isa<AllocaInst>(GEP->getOperand(0))) {
+ if (GEP->getOperand(0)->hasOneUse())
+ return EraseInstFromFunction(SI);
+ }
+ }
+ }
+
// Do really simple DSE, to catch cases where there are several consecutive
// stores to the same location, separated by a few arithmetic operations. This
// situation often occurs with bitfield accesses.
@@ -417,8 +419,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
// Prev store isn't volatile, and stores to the same location?
- if (!PrevSI->isVolatile() &&equivalentAddressValues(PrevSI->getOperand(1),
- SI.getOperand(1))) {
+ if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1),
+ SI.getOperand(1))) {
++NumDeadStore;
++BBI;
EraseInstFromFunction(*PrevSI);
@@ -432,7 +434,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
// then *this* store is dead (X = load P; store X -> P).
if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
- !SI.isVolatile())
+ LI->isSimple())
return EraseInstFromFunction(SI);
// Otherwise, this is a load from some other location. Stores before it
@@ -444,9 +446,6 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
break;
}
-
-
- if (SI.isVolatile()) return 0; // Don't hack volatile stores.
// store X, null -> turns into 'unreachable' in SimplifyCFG
if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {
@@ -549,11 +548,11 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
return false;
--BBI;
}
- // If this isn't a store, isn't a store to the same location, or if the
- // alignments differ, bail out.
+ // If this isn't a store, isn't a store to the same location, or is not the
+ // right kind of store, bail out.
OtherStore = dyn_cast<StoreInst>(BBI);
if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
- OtherStore->getAlignment() != SI.getAlignment())
+ !SI.isSameOperationAs(OtherStore))
return false;
} else {
// Otherwise, the other block ended with a conditional branch. If one of the
@@ -569,7 +568,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
// Check to see if we find the matching store.
if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
if (OtherStore->getOperand(1) != SI.getOperand(1) ||
- OtherStore->getAlignment() != SI.getAlignment())
+ !SI.isSameOperationAs(OtherStore))
return false;
break;
}
@@ -601,10 +600,12 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
// Advance to a place where it is safe to insert the new store and
// insert it.
- BBI = DestBB->getFirstNonPHI();
+ BBI = DestBB->getFirstInsertionPt();
StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
- OtherStore->isVolatile(),
- SI.getAlignment());
+ SI.isVolatile(),
+ SI.getAlignment(),
+ SI.getOrdering(),
+ SI.getSynchScope());
InsertNewInstBefore(NewSI, *BBI);
NewSI->setDebugLoc(OtherStore->getDebugLoc());
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 630a6fe..7f48125 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -38,7 +38,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
m_Value(B))) &&
// The "1" can be any value known to be a power of 2.
isPowerOfTwo(PowerOf2, IC.getTargetData())) {
- A = IC.Builder->CreateSub(A, B, "tmp");
+ A = IC.Builder->CreateSub(A, B);
return IC.Builder->CreateShl(PowerOf2, A);
}
@@ -131,7 +131,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
{ Value *X; ConstantInt *C1;
if (Op0->hasOneUse() &&
match(Op0, m_Add(m_Value(X), m_ConstantInt(C1)))) {
- Value *Add = Builder->CreateMul(X, CI, "tmp");
+ Value *Add = Builder->CreateMul(X, CI);
return BinaryOperator::CreateAdd(Add, Builder->CreateMul(C1, CI));
}
}
@@ -244,7 +244,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (BoolCast) {
Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()),
- BoolCast, "tmp");
+ BoolCast);
return BinaryOperator::CreateAnd(V, OtherOp);
}
}
@@ -421,7 +421,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
/// dyn_castZExtVal - Checks if V is a zext or constant that can
/// be truncated to Ty without losing bits.
-static Value *dyn_castZExtVal(Value *V, const Type *Ty) {
+static Value *dyn_castZExtVal(Value *V, Type *Ty) {
if (ZExtInst *Z = dyn_cast<ZExtInst>(V)) {
if (Z->getSrcTy() == Ty)
return Z->getOperand(0);
@@ -466,8 +466,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
{ const APInt *CI; Value *N;
if (match(Op1, m_Shl(m_Power2(CI), m_Value(N)))) {
if (*CI != 1)
- N = Builder->CreateAdd(N, ConstantInt::get(I.getType(), CI->logBase2()),
- "tmp");
+ N = Builder->CreateAdd(N, ConstantInt::get(I.getType(),CI->logBase2()));
if (I.isExact())
return BinaryOperator::CreateExactLShr(Op0, N);
return BinaryOperator::CreateLShr(Op0, N);
@@ -630,7 +629,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
// Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)
if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
Constant *N1 = Constant::getAllOnesValue(I.getType());
- Value *Add = Builder->CreateAdd(Op1, N1, "tmp");
+ Value *Add = Builder->CreateAdd(Op1, N1);
return BinaryOperator::CreateAnd(Op0, Add);
}
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 3777340..664546c 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -28,8 +28,8 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
Value *LHSVal = FirstInst->getOperand(0);
Value *RHSVal = FirstInst->getOperand(1);
- const Type *LHSType = LHSVal->getType();
- const Type *RHSType = RHSVal->getType();
+ Type *LHSType = LHSVal->getType();
+ Type *RHSType = RHSVal->getType();
bool isNUW = false, isNSW = false, isExact = false;
if (OverflowingBinaryOperator *BO =
@@ -229,8 +229,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
Value *Base = FixedOperands[0];
GetElementPtrInst *NewGEP =
- GetElementPtrInst::Create(Base, FixedOperands.begin()+1,
- FixedOperands.end());
+ GetElementPtrInst::Create(Base, makeArrayRef(FixedOperands).slice(1));
if (AllInBounds) NewGEP->setIsInBounds();
NewGEP->setDebugLoc(FirstInst->getDebugLoc());
return NewGEP;
@@ -287,7 +286,12 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
LoadInst *FirstLI = cast<LoadInst>(PN.getIncomingValue(0));
-
+
+ // FIXME: This is overconservative; this transform is allowed in some cases
+ // for atomic operations.
+ if (FirstLI->isAtomic())
+ return 0;
+
// When processing loads, we need to propagate two bits of information to the
// sunk load: whether it is volatile, and what its alignment is. We currently
// don't sink loads when some have their alignment specified and some don't.
@@ -397,7 +401,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
// the same type or "+42") we can pull the operation through the PHI, reducing
// code size and simplifying code.
Constant *ConstantOp = 0;
- const Type *CastSrcTy = 0;
+ Type *CastSrcTy = 0;
bool isNUW = false, isNSW = false, isExact = false;
if (isa<CastInst>(FirstInst)) {
@@ -572,7 +576,7 @@ struct LoweredPHIRecord {
unsigned Shift; // The amount shifted.
unsigned Width; // The width extracted.
- LoweredPHIRecord(PHINode *pn, unsigned Sh, const Type *Ty)
+ LoweredPHIRecord(PHINode *pn, unsigned Sh, Type *Ty)
: PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {}
// Ctor form used by DenseMap.
@@ -701,7 +705,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
unsigned PHIId = PHIUsers[UserI].PHIId;
PHINode *PN = PHIsToSlice[PHIId];
unsigned Offset = PHIUsers[UserI].Shift;
- const Type *Ty = PHIUsers[UserI].Inst->getType();
+ Type *Ty = PHIUsers[UserI].Inst->getType();
PHINode *EltPHI;
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 5733c20..91e60a4 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -13,6 +13,7 @@
#include "InstCombine.h"
#include "llvm/Support/PatternMatch.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
using namespace llvm;
using namespace PatternMatch;
@@ -323,9 +324,14 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
}
// All operands were constants, fold it.
- if (ConstOps.size() == I->getNumOperands())
+ if (ConstOps.size() == I->getNumOperands()) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ if (!LI->isVolatile())
+ return ConstantFoldLoadFromConstPtr(ConstOps[0], TD);
+
return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
- ConstOps.data(), ConstOps.size(), TD);
+ ConstOps, TD);
+ }
}
return 0;
@@ -363,7 +369,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_SGT: {
// These transformations only work for selects over integers.
- const IntegerType *SelectTy = dyn_cast<IntegerType>(SI.getType());
+ IntegerType *SelectTy = dyn_cast<IntegerType>(SI.getType());
if (!SelectTy)
break;
@@ -443,7 +449,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
// FIXME: Type and constness constraints could be lifted, but we have to
// watch code size carefully. We should consider xor instead of
// sub/add when we decide to do that.
- if (const IntegerType *Ty = dyn_cast<IntegerType>(CmpLHS->getType())) {
+ if (IntegerType *Ty = dyn_cast<IntegerType>(CmpLHS->getType())) {
if (TrueVal->getType() == Ty) {
if (ConstantInt *Cmp = dyn_cast<ConstantInt>(CmpRHS)) {
ConstantInt *C1 = NULL, *C2 = NULL;
@@ -476,10 +482,16 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal ||
SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal)
return ReplaceInstUsesWith(SI, FalseVal);
+ if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal ||
+ SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal)
+ return ReplaceInstUsesWith(SI, FalseVal);
} else if (Pred == ICmpInst::ICMP_NE) {
if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal ||
SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal)
return ReplaceInstUsesWith(SI, TrueVal);
+ if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal ||
+ SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal)
+ return ReplaceInstUsesWith(SI, TrueVal);
}
// NOTE: if we wanted to, this is where to detect integer MIN/MAX
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 811f949..6d85add 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -13,6 +13,7 @@
#include "InstCombine.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Support/PatternMatch.h"
using namespace llvm;
@@ -207,11 +208,12 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
return I;
case Instruction::Shl: {
- unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+ BinaryOperator *BO = cast<BinaryOperator>(I);
+ unsigned TypeWidth = BO->getType()->getScalarSizeInBits();
// We only accept shifts-by-a-constant in CanEvaluateShifted.
- ConstantInt *CI = cast<ConstantInt>(I->getOperand(1));
-
+ ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
+
// We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
if (isLeftShift) {
// If this is oversized composite shift, then unsigned shifts get 0.
@@ -219,7 +221,9 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
if (NewShAmt >= TypeWidth)
return Constant::getNullValue(I->getType());
- I->setOperand(1, ConstantInt::get(I->getType(), NewShAmt));
+ BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt));
+ BO->setHasNoUnsignedWrap(false);
+ BO->setHasNoSignedWrap(false);
return I;
}
@@ -227,11 +231,11 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
// zeros.
if (CI->getValue() == NumBits) {
APInt Mask(APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits));
- V = IC.Builder->CreateAnd(I->getOperand(0),
- ConstantInt::get(I->getContext(), Mask));
+ V = IC.Builder->CreateAnd(BO->getOperand(0),
+ ConstantInt::get(BO->getContext(), Mask));
if (Instruction *VI = dyn_cast<Instruction>(V)) {
- VI->moveBefore(I);
- VI->takeName(I);
+ VI->moveBefore(BO);
+ VI->takeName(BO);
}
return V;
}
@@ -239,23 +243,27 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
// We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that
// the and won't be needed.
assert(CI->getZExtValue() > NumBits);
- I->setOperand(1, ConstantInt::get(I->getType(),
- CI->getZExtValue() - NumBits));
- return I;
+ BO->setOperand(1, ConstantInt::get(BO->getType(),
+ CI->getZExtValue() - NumBits));
+ BO->setHasNoUnsignedWrap(false);
+ BO->setHasNoSignedWrap(false);
+ return BO;
}
case Instruction::LShr: {
- unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+ BinaryOperator *BO = cast<BinaryOperator>(I);
+ unsigned TypeWidth = BO->getType()->getScalarSizeInBits();
// We only accept shifts-by-a-constant in CanEvaluateShifted.
- ConstantInt *CI = cast<ConstantInt>(I->getOperand(1));
+ ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
// We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
if (!isLeftShift) {
// If this is oversized composite shift, then unsigned shifts get 0.
unsigned NewShAmt = NumBits+CI->getZExtValue();
if (NewShAmt >= TypeWidth)
- return Constant::getNullValue(I->getType());
+ return Constant::getNullValue(BO->getType());
- I->setOperand(1, ConstantInt::get(I->getType(), NewShAmt));
+ BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt));
+ BO->setIsExact(false);
return I;
}
@@ -264,7 +272,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
if (CI->getValue() == NumBits) {
APInt Mask(APInt::getHighBitsSet(TypeWidth, TypeWidth - NumBits));
V = IC.Builder->CreateAnd(I->getOperand(0),
- ConstantInt::get(I->getContext(), Mask));
+ ConstantInt::get(BO->getContext(), Mask));
if (Instruction *VI = dyn_cast<Instruction>(V)) {
VI->moveBefore(I);
VI->takeName(I);
@@ -275,9 +283,10 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
// We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that
// the and won't be needed.
assert(CI->getZExtValue() > NumBits);
- I->setOperand(1, ConstantInt::get(I->getType(),
- CI->getZExtValue() - NumBits));
- return I;
+ BO->setOperand(1, ConstantInt::get(BO->getType(),
+ CI->getZExtValue() - NumBits));
+ BO->setIsExact(false);
+ return BO;
}
case Instruction::Select:
@@ -528,7 +537,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift.
- const IntegerType *Ty = cast<IntegerType>(I.getType());
+ IntegerType *Ty = cast<IntegerType>(I.getType());
// Check for (X << c1) << c2 and (X >> c1) >> c2
if (I.getOpcode() == ShiftOp->getOpcode()) {
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 8fea8eb..5cd9a4b 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -103,7 +103,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
assert(V != 0 && "Null pointer of Value???");
assert(Depth <= 6 && "Limit Search Depth");
uint32_t BitWidth = DemandedMask.getBitWidth();
- const Type *VTy = V->getType();
+ Type *VTy = V->getType();
assert((TD || !VTy->isPointerTy()) &&
"SimplifyDemandedBits needs to know bit widths!");
assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
@@ -325,8 +325,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
Constant *AndC = Constant::getIntegerValue(VTy,
~RHSKnownOne & DemandedMask);
- Instruction *And =
- BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
+ Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
return InsertNewInstWith(And, *I);
}
}
@@ -351,14 +350,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Constant *AndC =
ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
- Instruction *NewAnd =
- BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
+ Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
InsertNewInstWith(NewAnd, *I);
Constant *XorC =
ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
- Instruction *NewXor =
- BinaryOperator::CreateXor(NewAnd, XorC, "tmp");
+ Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);
return InsertNewInstWith(NewXor, *I);
}
@@ -404,8 +401,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
return 0; // vector->int or fp->int?
- if (const VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
- if (const VectorType *SrcVTy =
+ if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
+ if (VectorType *SrcVTy =
dyn_cast<VectorType>(I->getOperand(0)->getType())) {
if (DstVTy->getNumElements() != SrcVTy->getNumElements())
// Don't touch a bitcast between vectors of different element counts.
@@ -826,7 +823,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
UndefElts = 0;
if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
- const Type *EltTy = cast<VectorType>(V->getType())->getElementType();
+ Type *EltTy = cast<VectorType>(V->getType())->getElementType();
Constant *Undef = UndefValue::get(EltTy);
std::vector<Constant*> Elts;
@@ -855,7 +852,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
if (DemandedElts.isAllOnesValue())
return 0;
- const Type *EltTy = cast<VectorType>(V->getType())->getElementType();
+ Type *EltTy = cast<VectorType>(V->getType())->getElementType();
Constant *Zero = Constant::getNullValue(EltTy);
Constant *Undef = UndefValue::get(EltTy);
std::vector<Constant*> Elts;
@@ -962,6 +959,9 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
unsigned MaskVal = Shuffle->getMaskValue(i);
if (MaskVal == -1u) {
UndefElts.setBit(i);
+ } else if (!DemandedElts[i]) {
+ NewUndefElts = true;
+ UndefElts.setBit(i);
} else if (MaskVal < LHSVWidth) {
if (UndefElts4[MaskVal]) {
NewUndefElts = true;
@@ -992,7 +992,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
}
case Instruction::BitCast: {
// Vector->vector casts only.
- const VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
+ VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
if (!VTy) break;
unsigned InVWidth = VTy->getNumElements();
APInt InputDemandedElts(InVWidth, 0);
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index ad6a8d0..154267c 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -77,7 +77,7 @@ static std::vector<int> getShuffleMask(const ShuffleVectorInst *SVI) {
/// extracted from the vector.
static Value *FindScalarElement(Value *V, unsigned EltNo) {
assert(V->getType()->isVectorTy() && "Not looking at a vector?");
- const VectorType *PTy = cast<VectorType>(V->getType());
+ VectorType *PTy = cast<VectorType>(V->getType());
unsigned Width = PTy->getNumElements();
if (EltNo >= Width) // Out of range access.
return UndefValue::get(PTy->getElementType());
@@ -175,7 +175,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
// the same number of elements, see if we can find the source element from
// it. In this case, we will end up needing to bitcast the scalars.
if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
- if (const VectorType *VT =
+ if (VectorType *VT =
dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
if (VT->getNumElements() == VectorWidth)
if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
@@ -225,7 +225,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
SrcIdx -= LHSWidth;
Src = SVI->getOperand(1);
}
- const Type *Int32Ty = Type::getInt32Ty(EI.getContext());
+ Type *Int32Ty = Type::getInt32Ty(EI.getContext());
return ExtractElementInst::Create(Src,
ConstantInt::get(Int32Ty,
SrcIdx, false));
@@ -555,7 +555,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
// shuffle mask, do the replacement.
if (isSplat || NewMask == LHSMask || NewMask == Mask) {
std::vector<Constant*> Elts;
- const Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
+ Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
for (unsigned i = 0, e = NewMask.size(); i != e; ++i) {
if (NewMask[i] < 0) {
Elts.push_back(UndefValue::get(Int32Ty));
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index ab98ef9..92874b9 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -46,8 +46,10 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/ValueHandle.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm-c/Initialization.h"
#include <algorithm>
#include <climits>
@@ -83,7 +85,7 @@ void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
/// ShouldChangeType - Return true if it is desirable to convert a computation
/// from 'From' to 'To'. We don't want to convert from a legal to an illegal
/// type for example, or from a smaller to a larger illegal type.
-bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const {
+bool InstCombiner::ShouldChangeType(Type *From, Type *To) const {
assert(From->isIntegerTy() && To->isIntegerTy());
// If we don't have TD, we don't know if the source/dest are legal.
@@ -107,6 +109,43 @@ bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const {
return true;
}
+// Return true, if No Signed Wrap should be maintained for I.
+// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
+// where both B and C should be ConstantInts, results in a constant that does
+// not overflow. This function only handles the Add and Sub opcodes. For
+// all other opcodes, the function conservatively returns false.
+static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) {
+ OverflowingBinaryOperator *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
+ if (!OBO || !OBO->hasNoSignedWrap()) {
+ return false;
+ }
+
+ // We reason about Add and Sub Only.
+ Instruction::BinaryOps Opcode = I.getOpcode();
+ if (Opcode != Instruction::Add &&
+ Opcode != Instruction::Sub) {
+ return false;
+ }
+
+ ConstantInt *CB = dyn_cast<ConstantInt>(B);
+ ConstantInt *CC = dyn_cast<ConstantInt>(C);
+
+ if (!CB || !CC) {
+ return false;
+ }
+
+ const APInt &BVal = CB->getValue();
+ const APInt &CVal = CC->getValue();
+ bool Overflow = false;
+
+ if (Opcode == Instruction::Add) {
+ BVal.sadd_ov(CVal, Overflow);
+ } else {
+ BVal.ssub_ov(CVal, Overflow);
+ }
+
+ return !Overflow;
+}
/// SimplifyAssociativeOrCommutative - This performs a few simplifications for
/// operators which are associative or commutative:
@@ -158,7 +197,16 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
I.setOperand(1, V);
// Conservatively clear the optional flags, since they may not be
// preserved by the reassociation.
- I.clearSubclassOptionalData();
+ if (MaintainNoSignedWrap(I, B, C) &&
+ (!Op0 || (isa<BinaryOperator>(Op0) && Op0->hasNoSignedWrap()))) {
+ // Note: this is only valid because SimplifyBinOp doesn't look at
+ // the operands to Op0.
+ I.clearSubclassOptionalData();
+ I.setHasNoSignedWrap(true);
+ } else {
+ I.clearSubclassOptionalData();
+ }
+
Changed = true;
++NumReassoc;
continue;
@@ -240,7 +288,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
Constant *C2 = cast<Constant>(Op1->getOperand(1));
Constant *Folded = ConstantExpr::get(Opcode, C1, C2);
- Instruction *New = BinaryOperator::Create(Opcode, A, B);
+ BinaryOperator *New = BinaryOperator::Create(Opcode, A, B);
InsertNewInstWith(New, I);
New->takeName(Op1);
I.setOperand(0, New);
@@ -248,6 +296,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
// Conservatively clear the optional flags, since they may not be
// preserved by the reassociation.
I.clearSubclassOptionalData();
+
Changed = true;
continue;
}
@@ -516,8 +565,8 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
// If it's a bitcast involving vectors, make sure it has the same number of
// elements on both sides.
if (BitCastInst *BC = dyn_cast<BitCastInst>(&Op)) {
- const VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy());
- const VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy());
+ VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy());
+ VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy());
// Verify that either both or neither are vectors.
if ((SrcTy == NULL) != (DestTy == NULL)) return 0;
@@ -654,7 +703,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
}
} else {
CastInst *CI = cast<CastInst>(&I);
- const Type *RetTy = CI->getType();
+ Type *RetTy = CI->getType();
for (unsigned i = 0; i != NumPHIValues; ++i) {
Value *InV;
if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
@@ -680,7 +729,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
/// or not there is a sequence of GEP indices into the type that will land us at
/// the specified offset. If so, fill them into NewIndices and return the
/// resultant element type, otherwise return null.
-const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset,
+Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset,
SmallVectorImpl<Value*> &NewIndices) {
if (!TD) return 0;
if (!Ty->isSized()) return 0;
@@ -688,7 +737,7 @@ const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset,
// Start with the index over the outer type. Note that the type size
// might be zero (even if the offset isn't zero) if the indexed type
// is something like [0 x {int, int}]
- const Type *IntPtrTy = TD->getIntPtrType(Ty->getContext());
+ Type *IntPtrTy = TD->getIntPtrType(Ty->getContext());
int64_t FirstIdx = 0;
if (int64_t TySize = TD->getTypeAllocSize(Ty)) {
FirstIdx = Offset/TySize;
@@ -711,7 +760,7 @@ const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset,
if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty))
return 0;
- if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ if (StructType *STy = dyn_cast<StructType>(Ty)) {
const StructLayout *SL = TD->getStructLayout(STy);
assert(Offset < (int64_t)SL->getSizeInBytes() &&
"Offset must stay within the indexed type");
@@ -722,7 +771,7 @@ const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset,
Offset -= SL->getElementOffset(Elt);
Ty = STy->getElementType(Elt);
- } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+ } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType());
assert(EltSize && "Cannot index into a zero-sized array");
NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
@@ -737,12 +786,20 @@ const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset,
return Ty;
}
-
+static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) {
+ // If this GEP has only 0 indices, it is the same pointer as
+ // Src. If Src is not a trivial GEP too, don't combine
+ // the indices.
+ if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
+ !Src.hasOneUse())
+ return false;
+ return true;
+}
Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
- if (Value *V = SimplifyGEPInst(&Ops[0], Ops.size(), TD))
+ if (Value *V = SimplifyGEPInst(Ops, TD))
return ReplaceInstUsesWith(GEP, V);
Value *PtrOp = GEP.getOperand(0);
@@ -751,13 +808,13 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// by multiples of a zero size type with zero.
if (TD) {
bool MadeChange = false;
- const Type *IntPtrTy = TD->getIntPtrType(GEP.getContext());
+ Type *IntPtrTy = TD->getIntPtrType(GEP.getContext());
gep_type_iterator GTI = gep_type_begin(GEP);
for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end();
I != E; ++I, ++GTI) {
// Skip indices into struct types.
- const SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI);
+ SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI);
if (!SeqTy) continue;
// If the element type has zero size then any index over it is equivalent
@@ -785,21 +842,15 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// getelementptr instructions into a single instruction.
//
if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) {
-
- // If this GEP has only 0 indices, it is the same pointer as
- // Src. If Src is not a trivial GEP too, don't combine
- // the indices.
- if (GEP.hasAllZeroIndices() && !Src->hasAllZeroIndices() &&
- !Src->hasOneUse())
+ if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
return 0;
// Note that if our source is a gep chain itself that we wait for that
// chain to be resolved before we perform this transformation. This
// avoids us creating a TON of code in some cases.
- //
- if (GetElementPtrInst *SrcGEP =
- dyn_cast<GetElementPtrInst>(Src->getOperand(0)))
- if (SrcGEP->getNumOperands() == 2)
+ if (GEPOperator *SrcGEP =
+ dyn_cast<GEPOperator>(Src->getOperand(0)))
+ if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP))
return 0; // Wait until our source is folded to completion.
SmallVector<Value*, 8> Indices;
@@ -851,15 +902,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (!Indices.empty())
return (GEP.isInBounds() && Src->isInBounds()) ?
- GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(),
- Indices.end(), GEP.getName()) :
- GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(),
- Indices.end(), GEP.getName());
+ GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices,
+ GEP.getName()) :
+ GetElementPtrInst::Create(Src->getOperand(0), Indices, GEP.getName());
}
// Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
Value *StrippedPtr = PtrOp->stripPointerCasts();
- const PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType());
+ PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType());
if (StrippedPtr != PtrOp &&
StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) {
@@ -875,21 +925,20 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
//
// This occurs when the program declares an array extern like "int X[];"
if (HasZeroPointerIndex) {
- const PointerType *CPTy = cast<PointerType>(PtrOp->getType());
- if (const ArrayType *CATy =
+ PointerType *CPTy = cast<PointerType>(PtrOp->getType());
+ if (ArrayType *CATy =
dyn_cast<ArrayType>(CPTy->getElementType())) {
// GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
if (CATy->getElementType() == StrippedPtrTy->getElementType()) {
// -> GEP i8* X, ...
SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end());
GetElementPtrInst *Res =
- GetElementPtrInst::Create(StrippedPtr, Idx.begin(),
- Idx.end(), GEP.getName());
+ GetElementPtrInst::Create(StrippedPtr, Idx, GEP.getName());
Res->setIsInBounds(GEP.isInBounds());
return Res;
}
- if (const ArrayType *XATy =
+ if (ArrayType *XATy =
dyn_cast<ArrayType>(StrippedPtrTy->getElementType())){
// GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
if (CATy->getElementType() == XATy->getElementType()) {
@@ -907,8 +956,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Transform things like:
// %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
// into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
- const Type *SrcElTy = StrippedPtrTy->getElementType();
- const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType();
+ Type *SrcElTy = StrippedPtrTy->getElementType();
+ Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType();
if (TD && SrcElTy->isArrayTy() &&
TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
TD->getTypeAllocSize(ResElTy)) {
@@ -916,8 +965,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
Idx[1] = GEP.getOperand(1);
Value *NewGEP = GEP.isInBounds() ?
- Builder->CreateInBoundsGEP(StrippedPtr, Idx, Idx + 2, GEP.getName()) :
- Builder->CreateGEP(StrippedPtr, Idx, Idx + 2, GEP.getName());
+ Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) :
+ Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
// V and GEP are both pointer types --> BitCast
return new BitCastInst(NewGEP, GEP.getType());
}
@@ -975,8 +1024,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
Idx[1] = NewIdx;
Value *NewGEP = GEP.isInBounds() ?
- Builder->CreateInBoundsGEP(StrippedPtr, Idx, Idx + 2,GEP.getName()):
- Builder->CreateGEP(StrippedPtr, Idx, Idx + 2, GEP.getName());
+ Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()):
+ Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
// The NewGEP must be pointer typed, so must the old one -> BitCast
return new BitCastInst(NewGEP, GEP.getType());
}
@@ -1023,14 +1072,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// field at Offset in 'A's type. If so, we can pull the cast through the
// GEP.
SmallVector<Value*, 8> NewIndices;
- const Type *InTy =
+ Type *InTy =
cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();
if (FindElementAtOffset(InTy, Offset, NewIndices)) {
Value *NGEP = GEP.isInBounds() ?
- Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(),
- NewIndices.end()) :
- Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(),
- NewIndices.end());
+ Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices) :
+ Builder->CreateGEP(BCI->getOperand(0), NewIndices);
if (NGEP->getType() == GEP.getType())
return ReplaceInstUsesWith(GEP, NGEP);
@@ -1045,15 +1092,43 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
-static bool IsOnlyNullComparedAndFreed(const Value &V) {
- for (Value::const_use_iterator UI = V.use_begin(), UE = V.use_end();
+static bool IsOnlyNullComparedAndFreed(Value *V, SmallVectorImpl<WeakVH> &Users,
+ int Depth = 0) {
+ if (Depth == 8)
+ return false;
+
+ for (Value::use_iterator UI = V->use_begin(), UE = V->use_end();
UI != UE; ++UI) {
- const User *U = *UI;
- if (isFreeCall(U))
+ User *U = *UI;
+ if (isFreeCall(U)) {
+ Users.push_back(U);
continue;
- if (const ICmpInst *ICI = dyn_cast<ICmpInst>(U))
- if (ICI->isEquality() && isa<ConstantPointerNull>(ICI->getOperand(1)))
+ }
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(U)) {
+ if (ICI->isEquality() && isa<ConstantPointerNull>(ICI->getOperand(1))) {
+ Users.push_back(ICI);
+ continue;
+ }
+ }
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+ if (IsOnlyNullComparedAndFreed(BCI, Users, Depth+1)) {
+ Users.push_back(BCI);
+ continue;
+ }
+ }
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+ if (IsOnlyNullComparedAndFreed(GEPI, Users, Depth+1)) {
+ Users.push_back(GEPI);
+ continue;
+ }
+ }
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ Users.push_back(II);
continue;
+ }
+ }
return false;
}
return true;
@@ -1063,25 +1138,20 @@ Instruction *InstCombiner::visitMalloc(Instruction &MI) {
// If we have a malloc call which is only used in any amount of comparisons
// to null and free calls, delete the calls and replace the comparisons with
// true or false as appropriate.
- if (IsOnlyNullComparedAndFreed(MI)) {
- for (Value::use_iterator UI = MI.use_begin(), UE = MI.use_end();
- UI != UE;) {
- // We can assume that every remaining use is a free call or an icmp eq/ne
- // to null, so the cast is safe.
- Instruction *I = cast<Instruction>(*UI);
-
- // Early increment here, as we're about to get rid of the user.
- ++UI;
-
- if (isFreeCall(I)) {
- EraseInstFromFunction(*cast<CallInst>(I));
- continue;
+ SmallVector<WeakVH, 64> Users;
+ if (IsOnlyNullComparedAndFreed(&MI, Users)) {
+ for (unsigned i = 0, e = Users.size(); i != e; ++i) {
+ Instruction *I = cast_or_null<Instruction>(&*Users[i]);
+ if (!I) continue;
+
+ if (ICmpInst *C = dyn_cast<ICmpInst>(I)) {
+ ReplaceInstUsesWith(*C,
+ ConstantInt::get(Type::getInt1Ty(C->getContext()),
+ C->isFalseWhenEqual()));
+ } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I)) {
+ ReplaceInstUsesWith(*I, UndefValue::get(I->getType()));
}
- // Again, the cast is safe.
- ICmpInst *C = cast<ICmpInst>(I);
- ReplaceInstUsesWith(*C, ConstantInt::get(Type::getInt1Ty(C->getContext()),
- C->isFalseWhenEqual()));
- EraseInstFromFunction(*C);
+ EraseInstFromFunction(*I);
}
return EraseInstFromFunction(MI);
}
@@ -1120,8 +1190,7 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
!isa<Constant>(X)) {
// Swap Destinations and condition...
BI.setCondition(X);
- BI.setSuccessor(0, FalseDest);
- BI.setSuccessor(1, TrueDest);
+ BI.swapSuccessors();
return &BI;
}
@@ -1136,8 +1205,7 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
Cond->setPredicate(FCmpInst::getInversePredicate(FPred));
// Swap Destinations and condition.
- BI.setSuccessor(0, FalseDest);
- BI.setSuccessor(1, TrueDest);
+ BI.swapSuccessors();
Worklist.Add(Cond);
return &BI;
}
@@ -1153,8 +1221,7 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
ICmpInst *Cond = cast<ICmpInst>(BI.getCondition());
Cond->setPredicate(ICmpInst::getInversePredicate(IPred));
// Swap Destinations and condition.
- BI.setSuccessor(0, FalseDest);
- BI.setSuccessor(1, TrueDest);
+ BI.swapSuccessors();
Worklist.Add(Cond);
return &BI;
}
@@ -1168,11 +1235,17 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
if (I->getOpcode() == Instruction::Add)
if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
// change 'switch (X+4) case 1:' into 'switch (X) case -3'
- for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2)
- SI.setOperand(i,
- ConstantExpr::getSub(cast<Constant>(SI.getOperand(i)),
- AddRHS));
- SI.setOperand(0, I->getOperand(0));
+ unsigned NumCases = SI.getNumCases();
+ // Skip the first item since that's the default case.
+ for (unsigned i = 1; i < NumCases; ++i) {
+ ConstantInt* CaseVal = SI.getCaseValue(i);
+ Constant* NewCaseVal = ConstantExpr::getSub(cast<Constant>(CaseVal),
+ AddRHS);
+ assert(isa<ConstantInt>(NewCaseVal) &&
+ "Result of expression should be constant");
+ SI.setSuccessorValue(i, cast<ConstantInt>(NewCaseVal));
+ }
+ SI.setCondition(I->getOperand(0));
Worklist.Add(I);
return &SI;
}
@@ -1242,7 +1315,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(),
EV.getIndices());
return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
- ArrayRef<unsigned>(insi, inse));
+ makeArrayRef(insi, inse));
}
if (insi == inse)
// The insert list is a prefix of the extract list
@@ -1254,7 +1327,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
// with
// %E extractvalue { i32 } { i32 42 }, 0
return ExtractValueInst::Create(IV->getInsertedValueOperand(),
- ArrayRef<unsigned>(exti, exte));
+ makeArrayRef(exti, exte));
}
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) {
// We're extracting from an intrinsic, see if we're the only user, which
@@ -1310,7 +1383,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
// load from a GEP. This reduces the size of the load.
// FIXME: If a load is used only by extractvalue instructions then this
// could be done regardless of having multiple uses.
- if (!L->isVolatile() && L->hasOneUse()) {
+ if (L->isSimple() && L->hasOneUse()) {
// extractvalue has integer indices, getelementptr has Value*s. Convert.
SmallVector<Value*, 4> Indices;
// Prefix an i32 0 since we need the first element.
@@ -1322,8 +1395,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
// We need to insert these at the location of the old load, not at that of
// the extractvalue.
Builder->SetInsertPoint(L->getParent(), L);
- Value *GEP = Builder->CreateInBoundsGEP(L->getPointerOperand(),
- Indices.begin(), Indices.end());
+ Value *GEP = Builder->CreateInBoundsGEP(L->getPointerOperand(), Indices);
// Returning the load directly will cause the main loop to insert it in
// the wrong spot, so use ReplaceInstUsesWith().
return ReplaceInstUsesWith(EV, Builder->CreateLoad(GEP));
@@ -1339,6 +1411,342 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
return 0;
}
+enum Personality_Type {
+ Unknown_Personality,
+ GNU_Ada_Personality,
+ GNU_CXX_Personality
+};
+
+/// RecognizePersonality - See if the given exception handling personality
+/// function is one that we understand. If so, return a description of it;
+/// otherwise return Unknown_Personality.
+static Personality_Type RecognizePersonality(Value *Pers) {
+ Function *F = dyn_cast<Function>(Pers->stripPointerCasts());
+ if (!F)
+ return Unknown_Personality;
+ return StringSwitch<Personality_Type>(F->getName())
+ .Case("__gnat_eh_personality", GNU_Ada_Personality)
+ .Case("__gxx_personality_v0", GNU_CXX_Personality)
+ .Default(Unknown_Personality);
+}
+
+/// isCatchAll - Return 'true' if the given typeinfo will match anything.
+static bool isCatchAll(Personality_Type Personality, Constant *TypeInfo) {
+ switch (Personality) {
+ case Unknown_Personality:
+ return false;
+ case GNU_Ada_Personality:
+ // While __gnat_all_others_value will match any Ada exception, it doesn't
+ // match foreign exceptions (or didn't, before gcc-4.7).
+ return false;
+ case GNU_CXX_Personality:
+ return TypeInfo->isNullValue();
+ }
+ llvm_unreachable("Unknown personality!");
+}
+
+static bool shorter_filter(const Value *LHS, const Value *RHS) {
+ return
+ cast<ArrayType>(LHS->getType())->getNumElements()
+ <
+ cast<ArrayType>(RHS->getType())->getNumElements();
+}
+
+Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
+ // The logic here should be correct for any real-world personality function.
+ // However if that turns out not to be true, the offending logic can always
+ // be conditioned on the personality function, like the catch-all logic is.
+ Personality_Type Personality = RecognizePersonality(LI.getPersonalityFn());
+
+ // Simplify the list of clauses, eg by removing repeated catch clauses
+ // (these are often created by inlining).
+ bool MakeNewInstruction = false; // If true, recreate using the following:
+ SmallVector<Value *, 16> NewClauses; // - Clauses for the new instruction;
+ bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
+
+ SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
+ for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
+ bool isLastClause = i + 1 == e;
+ if (LI.isCatch(i)) {
+ // A catch clause.
+ Value *CatchClause = LI.getClause(i);
+ Constant *TypeInfo = cast<Constant>(CatchClause->stripPointerCasts());
+
+ // If we already saw this clause, there is no point in having a second
+ // copy of it.
+ if (AlreadyCaught.insert(TypeInfo)) {
+ // This catch clause was not already seen.
+ NewClauses.push_back(CatchClause);
+ } else {
+ // Repeated catch clause - drop the redundant copy.
+ MakeNewInstruction = true;
+ }
+
+ // If this is a catch-all then there is no point in keeping any following
+ // clauses or marking the landingpad as having a cleanup.
+ if (isCatchAll(Personality, TypeInfo)) {
+ if (!isLastClause)
+ MakeNewInstruction = true;
+ CleanupFlag = false;
+ break;
+ }
+ } else {
+ // A filter clause. If any of the filter elements were already caught
+ // then they can be dropped from the filter. It is tempting to try to
+ // exploit the filter further by saying that any typeinfo that does not
+ // occur in the filter can't be caught later (and thus can be dropped).
+ // However this would be wrong, since typeinfos can match without being
+ // equal (for example if one represents a C++ class, and the other some
+ // class derived from it).
+ assert(LI.isFilter(i) && "Unsupported landingpad clause!");
+ Value *FilterClause = LI.getClause(i);
+ ArrayType *FilterType = cast<ArrayType>(FilterClause->getType());
+ unsigned NumTypeInfos = FilterType->getNumElements();
+
+ // An empty filter catches everything, so there is no point in keeping any
+ // following clauses or marking the landingpad as having a cleanup. By
+ // dealing with this case here the following code is made a bit simpler.
+ if (!NumTypeInfos) {
+ NewClauses.push_back(FilterClause);
+ if (!isLastClause)
+ MakeNewInstruction = true;
+ CleanupFlag = false;
+ break;
+ }
+
+ bool MakeNewFilter = false; // If true, make a new filter.
+ SmallVector<Constant *, 16> NewFilterElts; // New elements.
+ if (isa<ConstantAggregateZero>(FilterClause)) {
+ // Not an empty filter - it contains at least one null typeinfo.
+ assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
+ Constant *TypeInfo =
+ Constant::getNullValue(FilterType->getElementType());
+ // If this typeinfo is a catch-all then the filter can never match.
+ if (isCatchAll(Personality, TypeInfo)) {
+ // Throw the filter away.
+ MakeNewInstruction = true;
+ continue;
+ }
+
+ // There is no point in having multiple copies of this typeinfo, so
+ // discard all but the first copy if there is more than one.
+ NewFilterElts.push_back(TypeInfo);
+ if (NumTypeInfos > 1)
+ MakeNewFilter = true;
+ } else {
+ ConstantArray *Filter = cast<ConstantArray>(FilterClause);
+ SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
+ NewFilterElts.reserve(NumTypeInfos);
+
+ // Remove any filter elements that were already caught or that already
+ // occurred in the filter. While there, see if any of the elements are
+ // catch-alls. If so, the filter can be discarded.
+ bool SawCatchAll = false;
+ for (unsigned j = 0; j != NumTypeInfos; ++j) {
+ Value *Elt = Filter->getOperand(j);
+ Constant *TypeInfo = cast<Constant>(Elt->stripPointerCasts());
+ if (isCatchAll(Personality, TypeInfo)) {
+ // This element is a catch-all. Bail out, noting this fact.
+ SawCatchAll = true;
+ break;
+ }
+ if (AlreadyCaught.count(TypeInfo))
+ // Already caught by an earlier clause, so having it in the filter
+ // is pointless.
+ continue;
+ // There is no point in having multiple copies of the same typeinfo in
+ // a filter, so only add it if we didn't already.
+ if (SeenInFilter.insert(TypeInfo))
+ NewFilterElts.push_back(cast<Constant>(Elt));
+ }
+ // A filter containing a catch-all cannot match anything by definition.
+ if (SawCatchAll) {
+ // Throw the filter away.
+ MakeNewInstruction = true;
+ continue;
+ }
+
+ // If we dropped something from the filter, make a new one.
+ if (NewFilterElts.size() < NumTypeInfos)
+ MakeNewFilter = true;
+ }
+ if (MakeNewFilter) {
+ FilterType = ArrayType::get(FilterType->getElementType(),
+ NewFilterElts.size());
+ FilterClause = ConstantArray::get(FilterType, NewFilterElts);
+ MakeNewInstruction = true;
+ }
+
+ NewClauses.push_back(FilterClause);
+
+ // If the new filter is empty then it will catch everything so there is
+ // no point in keeping any following clauses or marking the landingpad
+ // as having a cleanup. The case of the original filter being empty was
+ // already handled above.
+ if (MakeNewFilter && !NewFilterElts.size()) {
+ assert(MakeNewInstruction && "New filter but not a new instruction!");
+ CleanupFlag = false;
+ break;
+ }
+ }
+ }
+
+ // If several filters occur in a row then reorder them so that the shortest
+ // filters come first (those with the smallest number of elements). This is
+ // advantageous because shorter filters are more likely to match, speeding up
+ // unwinding, but mostly because it increases the effectiveness of the other
+ // filter optimizations below.
+ for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
+ unsigned j;
+ // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
+ for (j = i; j != e; ++j)
+ if (!isa<ArrayType>(NewClauses[j]->getType()))
+ break;
+
+ // Check whether the filters are already sorted by length. We need to know
+ // if sorting them is actually going to do anything so that we only make a
+ // new landingpad instruction if it does.
+ for (unsigned k = i; k + 1 < j; ++k)
+ if (shorter_filter(NewClauses[k+1], NewClauses[k])) {
+ // Not sorted, so sort the filters now. Doing an unstable sort would be
+ // correct too but reordering filters pointlessly might confuse users.
+ std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j,
+ shorter_filter);
+ MakeNewInstruction = true;
+ break;
+ }
+
+ // Look for the next batch of filters.
+ i = j + 1;
+ }
+
+ // If typeinfos matched if and only if equal, then the elements of a filter L
+ // that occurs later than a filter F could be replaced by the intersection of
+ // the elements of F and L. In reality two typeinfos can match without being
+ // equal (for example if one represents a C++ class, and the other some class
+ // derived from it) so it would be wrong to perform this transform in general.
+ // However the transform is correct and useful if F is a subset of L. In that
+ // case L can be replaced by F, and thus removed altogether since repeating a
+ // filter is pointless. So here we look at all pairs of filters F and L where
+ // L follows F in the list of clauses, and remove L if every element of F is
+ // an element of L. This can occur when inlining C++ functions with exception
+ // specifications.
+ for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
+ // Examine each filter in turn.
+ Value *Filter = NewClauses[i];
+ ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType());
+ if (!FTy)
+ // Not a filter - skip it.
+ continue;
+ unsigned FElts = FTy->getNumElements();
+ // Examine each filter following this one. Doing this backwards means that
+ // we don't have to worry about filters disappearing under us when removed.
+ for (unsigned j = NewClauses.size() - 1; j != i; --j) {
+ Value *LFilter = NewClauses[j];
+ ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType());
+ if (!LTy)
+ // Not a filter - skip it.
+ continue;
+ // If Filter is a subset of LFilter, i.e. every element of Filter is also
+ // an element of LFilter, then discard LFilter.
+ SmallVector<Value *, 16>::iterator J = NewClauses.begin() + j;
+ // If Filter is empty then it is a subset of LFilter.
+ if (!FElts) {
+ // Discard LFilter.
+ NewClauses.erase(J);
+ MakeNewInstruction = true;
+ // Move on to the next filter.
+ continue;
+ }
+ unsigned LElts = LTy->getNumElements();
+ // If Filter is longer than LFilter then it cannot be a subset of it.
+ if (FElts > LElts)
+ // Move on to the next filter.
+ continue;
+ // At this point we know that LFilter has at least one element.
+ if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros.
+ // Filter is a subset of LFilter iff Filter contains only zeros (as we
+ // already know that Filter is not longer than LFilter).
+ if (isa<ConstantAggregateZero>(Filter)) {
+ assert(FElts <= LElts && "Should have handled this case earlier!");
+ // Discard LFilter.
+ NewClauses.erase(J);
+ MakeNewInstruction = true;
+ }
+ // Move on to the next filter.
+ continue;
+ }
+ ConstantArray *LArray = cast<ConstantArray>(LFilter);
+ if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros.
+ // Since Filter is non-empty and contains only zeros, it is a subset of
+ // LFilter iff LFilter contains a zero.
+ assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
+ for (unsigned l = 0; l != LElts; ++l)
+ if (LArray->getOperand(l)->isNullValue()) {
+ // LFilter contains a zero - discard it.
+ NewClauses.erase(J);
+ MakeNewInstruction = true;
+ break;
+ }
+ // Move on to the next filter.
+ continue;
+ }
+ // At this point we know that both filters are ConstantArrays. Loop over
+ // operands to see whether every element of Filter is also an element of
+ // LFilter. Since filters tend to be short this is probably faster than
+ // using a method that scales nicely.
+ ConstantArray *FArray = cast<ConstantArray>(Filter);
+ bool AllFound = true;
+ for (unsigned f = 0; f != FElts; ++f) {
+ Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts();
+ AllFound = false;
+ for (unsigned l = 0; l != LElts; ++l) {
+ Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts();
+ if (LTypeInfo == FTypeInfo) {
+ AllFound = true;
+ break;
+ }
+ }
+ if (!AllFound)
+ break;
+ }
+ if (AllFound) {
+ // Discard LFilter.
+ NewClauses.erase(J);
+ MakeNewInstruction = true;
+ }
+ // Move on to the next filter.
+ }
+ }
+
+ // If we changed any of the clauses, replace the old landingpad instruction
+ // with a new one.
+ if (MakeNewInstruction) {
+ LandingPadInst *NLI = LandingPadInst::Create(LI.getType(),
+ LI.getPersonalityFn(),
+ NewClauses.size());
+ for (unsigned i = 0, e = NewClauses.size(); i != e; ++i)
+ NLI->addClause(NewClauses[i]);
+ // A landing pad with no clauses must have the cleanup flag set. It is
+ // theoretically possible, though highly unlikely, that we eliminated all
+ // clauses. If so, force the cleanup flag to true.
+ if (NewClauses.empty())
+ CleanupFlag = true;
+ NLI->setCleanup(CleanupFlag);
+ return NLI;
+ }
+
+ // Even if none of the clauses changed, we may nonetheless have understood
+ // that the cleanup flag is pointless. Clear it if so.
+ if (LI.isCleanup() != CleanupFlag) {
+ assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
+ LI.setCleanup(CleanupFlag);
+ return &LI;
+ }
+
+ return 0;
+}
+
@@ -1350,7 +1758,8 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
assert(I->hasOneUse() && "Invariants didn't hold!");
// Cannot move control-flow-involving, volatile loads, vaarg, etc.
- if (isa<PHINode>(I) || I->mayHaveSideEffects() || isa<TerminatorInst>(I))
+ if (isa<PHINode>(I) || isa<LandingPadInst>(I) || I->mayHaveSideEffects() ||
+ isa<TerminatorInst>(I))
return false;
// Do not sink alloca instructions out of the entry block.
@@ -1367,8 +1776,7 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
return false;
}
- BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI();
-
+ BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
I->moveBefore(InsertPos);
++NumSunkInst;
return true;
@@ -1503,27 +1911,29 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
// Do a quick scan over the function. If we find any blocks that are
// unreachable, remove any instructions inside of them. This prevents
// the instcombine code from having to deal with some bad special cases.
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
- if (!Visited.count(BB)) {
- Instruction *Term = BB->getTerminator();
- while (Term != BB->begin()) { // Remove instrs bottom-up
- BasicBlock::iterator I = Term; --I;
-
- DEBUG(errs() << "IC: DCE: " << *I << '\n');
- // A debug intrinsic shouldn't force another iteration if we weren't
- // going to do one without it.
- if (!isa<DbgInfoIntrinsic>(I)) {
- ++NumDeadInst;
- MadeIRChange = true;
- }
-
- // If I is not void type then replaceAllUsesWith undef.
- // This allows ValueHandlers and custom metadata to adjust itself.
- if (!I->getType()->isVoidTy())
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
- I->eraseFromParent();
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (Visited.count(BB)) continue;
+
+ // Delete the instructions backwards, as it has a reduced likelihood of
+ // having to update as many def-use and use-def chains.
+ Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
+ while (EndInst != BB->begin()) {
+ // Delete the next to last instruction.
+ BasicBlock::iterator I = EndInst;
+ Instruction *Inst = --I;
+ if (!Inst->use_empty())
+ Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
+ if (isa<LandingPadInst>(Inst)) {
+ EndInst = Inst;
+ continue;
}
+ if (!isa<DbgInfoIntrinsic>(Inst)) {
+ ++NumDeadInst;
+ MadeIRChange = true;
+ }
+ Inst->eraseFromParent();
}
+ }
}
while (!Worklist.isEmpty()) {
@@ -1604,13 +2014,13 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
// Everything uses the new instruction now.
I->replaceAllUsesWith(Result);
+ // Move the name to the new instruction first.
+ Result->takeName(I);
+
// Push the new instruction and any users onto the worklist.
Worklist.Add(Result);
Worklist.AddUsersToWorkList(*Result);
- // Move the name to the new instruction first.
- Result->takeName(I);
-
// Insert the new instruction into the basic block...
BasicBlock *InstParent = I->getParent();
BasicBlock::iterator InsertPos = I;
diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
index 5700ac8..7b3a927a 100644
--- a/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -6,3 +6,10 @@ add_llvm_library(LLVMInstrumentation
PathProfiling.cpp
ProfilingUtils.cpp
)
+
+add_llvm_library_dependencies(LLVMInstrumentation
+ LLVMAnalysis
+ LLVMCore
+ LLVMSupport
+ LLVMTransformUtils
+ )
diff --git a/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/lib/Transforms/Instrumentation/EdgeProfiling.cpp
index 1d31fcc..e8ef265 100644
--- a/lib/Transforms/Instrumentation/EdgeProfiling.cpp
+++ b/lib/Transforms/Instrumentation/EdgeProfiling.cpp
@@ -74,7 +74,7 @@ bool EdgeProfiler::runOnModule(Module &M) {
}
}
- const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumEdges);
+ Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumEdges);
GlobalVariable *Counters =
new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
Constant::getNullValue(ATy), "EdgeProfCounters");
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 3f2c412..ccf7e11 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -60,11 +60,11 @@ namespace {
bool runOnModule(Module &M);
// Create the GCNO files for the Module based on DebugInfo.
- void emitGCNO(DebugInfoFinder &DIF);
+ void emitGCNO();
// Modify the program to track transitions along edges and call into the
// profiling runtime to emit .gcda files when run.
- bool emitProfileArcs(DebugInfoFinder &DIF);
+ bool emitProfileArcs();
// Get pointers to the functions in the runtime library.
Constant *getStartFileFunc();
@@ -86,8 +86,7 @@ namespace {
// Add the function to write out all our counters to the global destructor
// list.
- void insertCounterWriteout(DebugInfoFinder &,
- SmallVector<std::pair<GlobalVariable *,
+ void insertCounterWriteout(SmallVector<std::pair<GlobalVariable *,
MDNode *>, 8> &);
std::string mangleName(DICompileUnit CU, std::string NewStem);
@@ -110,15 +109,6 @@ ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData,
return new GCOVProfiler(EmitNotes, EmitData, Use402Format);
}
-static DISubprogram findSubprogram(DIScope Scope) {
- while (!Scope.isSubprogram()) {
- assert(Scope.isLexicalBlock() &&
- "Debug location not lexical block or subprogram");
- Scope = DILexicalBlock(Scope).getContext();
- }
- return DISubprogram(Scope);
-}
-
namespace {
class GCOVRecord {
protected:
@@ -177,18 +167,24 @@ namespace {
}
uint32_t length() {
+ // Here 2 = 1 for string lenght + 1 for '0' id#.
return lengthOfGCOVString(Filename) + 2 + Lines.size();
}
- private:
- friend class GCOVBlock;
+ void writeOut() {
+ write(0);
+ writeGCOVString(Filename);
+ for (int i = 0, e = Lines.size(); i != e; ++i)
+ write(Lines[i]);
+ }
- GCOVLines(std::string Filename, raw_ostream *os)
- : Filename(Filename) {
+ GCOVLines(StringRef F, raw_ostream *os)
+ : Filename(F) {
this->os = os;
}
- std::string Filename;
+ private:
+ StringRef Filename;
SmallVector<uint32_t, 32> Lines;
};
@@ -197,7 +193,7 @@ namespace {
// other blocks.
class GCOVBlock : public GCOVRecord {
public:
- GCOVLines &getFile(std::string Filename) {
+ GCOVLines &getFile(StringRef Filename) {
GCOVLines *&Lines = LinesByFile[Filename];
if (!Lines) {
Lines = new GCOVLines(Filename, os);
@@ -220,13 +216,8 @@ namespace {
write(Len);
write(Number);
for (StringMap<GCOVLines *>::iterator I = LinesByFile.begin(),
- E = LinesByFile.end(); I != E; ++I) {
- write(0);
- writeGCOVString(I->second->Filename);
- for (int i = 0, e = I->second->Lines.size(); i != e; ++i) {
- write(I->second->Lines[i]);
- }
- }
+ E = LinesByFile.end(); I != E; ++I)
+ I->second->writeOut();
write(0);
write(0);
}
@@ -353,66 +344,66 @@ bool GCOVProfiler::runOnModule(Module &M) {
this->M = &M;
Ctx = &M.getContext();
- DebugInfoFinder DIF;
- DIF.processModule(M);
-
- if (EmitNotes) emitGCNO(DIF);
- if (EmitData) return emitProfileArcs(DIF);
+ if (EmitNotes) emitGCNO();
+ if (EmitData) return emitProfileArcs();
return false;
}
-void GCOVProfiler::emitGCNO(DebugInfoFinder &DIF) {
+void GCOVProfiler::emitGCNO() {
DenseMap<const MDNode *, raw_fd_ostream *> GcnoFiles;
- for (DebugInfoFinder::iterator I = DIF.compile_unit_begin(),
- E = DIF.compile_unit_end(); I != E; ++I) {
- // Each compile unit gets its own .gcno file. This means that whether we run
- // this pass over the original .o's as they're produced, or run it after
- // LTO, we'll generate the same .gcno files.
-
- DICompileUnit CU(*I);
- raw_fd_ostream *&out = GcnoFiles[CU];
- std::string ErrorInfo;
- out = new raw_fd_ostream(mangleName(CU, "gcno").c_str(), ErrorInfo,
- raw_fd_ostream::F_Binary);
- if (!Use402Format)
- out->write("oncg*404MVLL", 12);
- else
- out->write("oncg*402MVLL", 12);
- }
-
- for (DebugInfoFinder::iterator SPI = DIF.subprogram_begin(),
- SPE = DIF.subprogram_end(); SPI != SPE; ++SPI) {
- DISubprogram SP(*SPI);
- raw_fd_ostream *&os = GcnoFiles[SP.getCompileUnit()];
-
- Function *F = SP.getFunction();
- if (!F) continue;
- GCOVFunction Func(SP, os, Use402Format);
-
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- GCOVBlock &Block = Func.getBlock(BB);
- TerminatorInst *TI = BB->getTerminator();
- if (int successors = TI->getNumSuccessors()) {
- for (int i = 0; i != successors; ++i) {
- Block.addEdge(Func.getBlock(TI->getSuccessor(i)));
+ NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
+ if (CU_Nodes) {
+ for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+ // Each compile unit gets its own .gcno file. This means that whether we run
+ // this pass over the original .o's as they're produced, or run it after
+ // LTO, we'll generate the same .gcno files.
+
+ DICompileUnit CU(CU_Nodes->getOperand(i));
+ raw_fd_ostream *&out = GcnoFiles[CU];
+ std::string ErrorInfo;
+ out = new raw_fd_ostream(mangleName(CU, "gcno").c_str(), ErrorInfo,
+ raw_fd_ostream::F_Binary);
+ if (!Use402Format)
+ out->write("oncg*404MVLL", 12);
+ else
+ out->write("oncg*204MVLL", 12);
+
+ DIArray SPs = CU.getSubprograms();
+ for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
+ DISubprogram SP(SPs.getElement(i));
+ if (!SP.Verify()) continue;
+ raw_fd_ostream *&os = GcnoFiles[CU];
+
+ Function *F = SP.getFunction();
+ if (!F) continue;
+ GCOVFunction Func(SP, os, Use402Format);
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ GCOVBlock &Block = Func.getBlock(BB);
+ TerminatorInst *TI = BB->getTerminator();
+ if (int successors = TI->getNumSuccessors()) {
+ for (int i = 0; i != successors; ++i) {
+ Block.addEdge(Func.getBlock(TI->getSuccessor(i)));
+ }
+ } else if (isa<ReturnInst>(TI)) {
+ Block.addEdge(Func.getReturnBlock());
+ }
+
+ uint32_t Line = 0;
+ for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); I != IE; ++I) {
+ const DebugLoc &Loc = I->getDebugLoc();
+ if (Loc.isUnknown()) continue;
+ if (Line == Loc.getLine()) continue;
+ Line = Loc.getLine();
+ if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue;
+
+ GCOVLines &Lines = Block.getFile(SP.getFilename());
+ Lines.addLine(Loc.getLine());
+ }
}
- } else if (isa<ReturnInst>(TI)) {
- Block.addEdge(Func.getReturnBlock());
- }
-
- uint32_t Line = 0;
- for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); I != IE; ++I) {
- const DebugLoc &Loc = I->getDebugLoc();
- if (Loc.isUnknown()) continue;
- if (Line == Loc.getLine()) continue;
- Line = Loc.getLine();
- if (SP != findSubprogram(DIScope(Loc.getScope(*Ctx)))) continue;
-
- GCOVLines &Lines = Block.getFile(SP.getFilename());
- Lines.addLine(Loc.getLine());
+ Func.writeOut();
}
}
- Func.writeOut();
}
for (DenseMap<const MDNode *, raw_fd_ostream *>::iterator
@@ -424,103 +415,107 @@ void GCOVProfiler::emitGCNO(DebugInfoFinder &DIF) {
}
}
-bool GCOVProfiler::emitProfileArcs(DebugInfoFinder &DIF) {
- if (DIF.subprogram_begin() == DIF.subprogram_end())
- return false;
-
- SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
- for (DebugInfoFinder::iterator SPI = DIF.subprogram_begin(),
- SPE = DIF.subprogram_end(); SPI != SPE; ++SPI) {
- DISubprogram SP(*SPI);
- Function *F = SP.getFunction();
- if (!F) continue;
-
- unsigned Edges = 0;
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- TerminatorInst *TI = BB->getTerminator();
- if (isa<ReturnInst>(TI))
- ++Edges;
- else
- Edges += TI->getNumSuccessors();
- }
-
- const ArrayType *CounterTy =
+bool GCOVProfiler::emitProfileArcs() {
+ NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
+ if (!CU_Nodes) return false;
+
+ bool Result = false;
+ for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+ DICompileUnit CU(CU_Nodes->getOperand(i));
+ DIArray SPs = CU.getSubprograms();
+ SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
+ for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
+ DISubprogram SP(SPs.getElement(i));
+ if (!SP.Verify()) continue;
+ Function *F = SP.getFunction();
+ if (!F) continue;
+ if (!Result) Result = true;
+ unsigned Edges = 0;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ if (isa<ReturnInst>(TI))
+ ++Edges;
+ else
+ Edges += TI->getNumSuccessors();
+ }
+
+ ArrayType *CounterTy =
ArrayType::get(Type::getInt64Ty(*Ctx), Edges);
- GlobalVariable *Counters =
+ GlobalVariable *Counters =
new GlobalVariable(*M, CounterTy, false,
GlobalValue::InternalLinkage,
Constant::getNullValue(CounterTy),
"__llvm_gcov_ctr", 0, false, 0);
- CountersBySP.push_back(std::make_pair(Counters, (MDNode*)SP));
-
- UniqueVector<BasicBlock *> ComplexEdgePreds;
- UniqueVector<BasicBlock *> ComplexEdgeSuccs;
-
- unsigned Edge = 0;
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- TerminatorInst *TI = BB->getTerminator();
- int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
- if (Successors) {
- IRBuilder<> Builder(TI);
-
- if (Successors == 1) {
- Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
- Edge);
- Value *Count = Builder.CreateLoad(Counter);
- Count = Builder.CreateAdd(Count,
- ConstantInt::get(Type::getInt64Ty(*Ctx),1));
- Builder.CreateStore(Count, Counter);
- } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
- Value *Sel = Builder.CreateSelect(
+ CountersBySP.push_back(std::make_pair(Counters, (MDNode*)SP));
+
+ UniqueVector<BasicBlock *> ComplexEdgePreds;
+ UniqueVector<BasicBlock *> ComplexEdgeSuccs;
+
+ unsigned Edge = 0;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
+ if (Successors) {
+ IRBuilder<> Builder(TI);
+
+ if (Successors == 1) {
+ Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
+ Edge);
+ Value *Count = Builder.CreateLoad(Counter);
+ Count = Builder.CreateAdd(Count,
+ ConstantInt::get(Type::getInt64Ty(*Ctx),1));
+ Builder.CreateStore(Count, Counter);
+ } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ Value *Sel = Builder.CreateSelect(
BI->getCondition(),
ConstantInt::get(Type::getInt64Ty(*Ctx), Edge),
ConstantInt::get(Type::getInt64Ty(*Ctx), Edge + 1));
- SmallVector<Value *, 2> Idx;
- Idx.push_back(Constant::getNullValue(Type::getInt64Ty(*Ctx)));
- Idx.push_back(Sel);
- Value *Counter = Builder.CreateInBoundsGEP(Counters,
- Idx.begin(), Idx.end());
- Value *Count = Builder.CreateLoad(Counter);
- Count = Builder.CreateAdd(Count,
- ConstantInt::get(Type::getInt64Ty(*Ctx),1));
- Builder.CreateStore(Count, Counter);
- } else {
- ComplexEdgePreds.insert(BB);
- for (int i = 0; i != Successors; ++i)
- ComplexEdgeSuccs.insert(TI->getSuccessor(i));
+ SmallVector<Value *, 2> Idx;
+ Idx.push_back(Constant::getNullValue(Type::getInt64Ty(*Ctx)));
+ Idx.push_back(Sel);
+ Value *Counter = Builder.CreateInBoundsGEP(Counters, Idx);
+ Value *Count = Builder.CreateLoad(Counter);
+ Count = Builder.CreateAdd(Count,
+ ConstantInt::get(Type::getInt64Ty(*Ctx),1));
+ Builder.CreateStore(Count, Counter);
+ } else {
+ ComplexEdgePreds.insert(BB);
+ for (int i = 0; i != Successors; ++i)
+ ComplexEdgeSuccs.insert(TI->getSuccessor(i));
+ }
+ Edge += Successors;
}
- Edge += Successors;
}
- }
-
- if (!ComplexEdgePreds.empty()) {
- GlobalVariable *EdgeTable =
+
+ if (!ComplexEdgePreds.empty()) {
+ GlobalVariable *EdgeTable =
buildEdgeLookupTable(F, Counters,
ComplexEdgePreds, ComplexEdgeSuccs);
- GlobalVariable *EdgeState = getEdgeStateValue();
-
- const Type *Int32Ty = Type::getInt32Ty(*Ctx);
- for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) {
- IRBuilder<> Builder(ComplexEdgePreds[i+1]->getTerminator());
- Builder.CreateStore(ConstantInt::get(Int32Ty, i), EdgeState);
- }
- for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) {
- // call runtime to perform increment
- IRBuilder<> Builder(ComplexEdgeSuccs[i+1]->getFirstNonPHI());
- Value *CounterPtrArray =
+ GlobalVariable *EdgeState = getEdgeStateValue();
+
+ Type *Int32Ty = Type::getInt32Ty(*Ctx);
+ for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) {
+ IRBuilder<> Builder(ComplexEdgePreds[i+1]->getTerminator());
+ Builder.CreateStore(ConstantInt::get(Int32Ty, i), EdgeState);
+ }
+ for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) {
+ // call runtime to perform increment
+ BasicBlock::iterator InsertPt =
+ ComplexEdgeSuccs[i+1]->getFirstInsertionPt();
+ IRBuilder<> Builder(InsertPt);
+ Value *CounterPtrArray =
Builder.CreateConstInBoundsGEP2_64(EdgeTable, 0,
i * ComplexEdgePreds.size());
- Builder.CreateCall2(getIncrementIndirectCounterFunc(),
- EdgeState, CounterPtrArray);
- // clear the predecessor number
- Builder.CreateStore(ConstantInt::get(Int32Ty, 0xffffffff), EdgeState);
+ Builder.CreateCall2(getIncrementIndirectCounterFunc(),
+ EdgeState, CounterPtrArray);
+ // clear the predecessor number
+ Builder.CreateStore(ConstantInt::get(Int32Ty, 0xffffffff), EdgeState);
+ }
}
}
+ insertCounterWriteout(CountersBySP);
}
-
- insertCounterWriteout(DIF, CountersBySP);
-
- return true;
+ return Result;
}
// All edges with successors that aren't branches are "complex", because it
@@ -535,8 +530,8 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
// read it. Threads and invoke make this untrue.
// emit [(succs * preds) x i64*], logically [succ x [pred x i64*]].
- const Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx);
- const ArrayType *EdgeTableTy = ArrayType::get(
+ Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx);
+ ArrayType *EdgeTableTy = ArrayType::get(
Int64PtrTy, Succs.size() * Preds.size());
Constant **EdgeTable = new Constant*[Succs.size() * Preds.size()];
@@ -572,7 +567,7 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
}
Constant *GCOVProfiler::getStartFileFunc() {
- const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
Type::getInt8PtrTy(*Ctx), false);
return M->getOrInsertFunction("llvm_gcda_start_file", FTy);
}
@@ -582,7 +577,7 @@ Constant *GCOVProfiler::getIncrementIndirectCounterFunc() {
Type::getInt32PtrTy(*Ctx), // uint32_t *predecessor
Type::getInt64PtrTy(*Ctx)->getPointerTo(), // uint64_t **state_table_row
};
- const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
Args, false);
return M->getOrInsertFunction("llvm_gcda_increment_indirect_counter", FTy);
}
@@ -592,7 +587,7 @@ Constant *GCOVProfiler::getEmitFunctionFunc() {
Type::getInt32Ty(*Ctx), // uint32_t ident
Type::getInt8PtrTy(*Ctx), // const char *function_name
};
- const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
Args, false);
return M->getOrInsertFunction("llvm_gcda_emit_function", FTy);
}
@@ -602,13 +597,13 @@ Constant *GCOVProfiler::getEmitArcsFunc() {
Type::getInt32Ty(*Ctx), // uint32_t num_counters
Type::getInt64PtrTy(*Ctx), // uint64_t *counters
};
- const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
Args, false);
return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy);
}
Constant *GCOVProfiler::getEndFileFunc() {
- const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
return M->getOrInsertFunction("llvm_gcda_end_file", FTy);
}
@@ -626,9 +621,8 @@ GlobalVariable *GCOVProfiler::getEdgeStateValue() {
}
void GCOVProfiler::insertCounterWriteout(
- DebugInfoFinder &DIF,
SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> &CountersBySP) {
- const FunctionType *WriteoutFTy =
+ FunctionType *WriteoutFTy =
FunctionType::get(Type::getVoidTy(*Ctx), false);
Function *WriteoutF = Function::Create(WriteoutFTy,
GlobalValue::InternalLinkage,
@@ -642,29 +636,31 @@ void GCOVProfiler::insertCounterWriteout(
Constant *EmitArcs = getEmitArcsFunc();
Constant *EndFile = getEndFileFunc();
- for (DebugInfoFinder::iterator CUI = DIF.compile_unit_begin(),
- CUE = DIF.compile_unit_end(); CUI != CUE; ++CUI) {
- DICompileUnit compile_unit(*CUI);
- std::string FilenameGcda = mangleName(compile_unit, "gcda");
- Builder.CreateCall(StartFile,
- Builder.CreateGlobalStringPtr(FilenameGcda));
- for (SmallVector<std::pair<GlobalVariable *, MDNode *>, 8>::iterator
+ NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
+ if (CU_Nodes) {
+ for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+ DICompileUnit compile_unit(CU_Nodes->getOperand(i));
+ std::string FilenameGcda = mangleName(compile_unit, "gcda");
+ Builder.CreateCall(StartFile,
+ Builder.CreateGlobalStringPtr(FilenameGcda));
+ for (SmallVector<std::pair<GlobalVariable *, MDNode *>, 8>::iterator
I = CountersBySP.begin(), E = CountersBySP.end();
- I != E; ++I) {
- DISubprogram SP(I->second);
- intptr_t ident = reinterpret_cast<intptr_t>(I->second);
- Builder.CreateCall2(EmitFunction,
- ConstantInt::get(Type::getInt32Ty(*Ctx), ident),
- Builder.CreateGlobalStringPtr(SP.getName()));
-
- GlobalVariable *GV = I->first;
- unsigned Arcs =
+ I != E; ++I) {
+ DISubprogram SP(I->second);
+ intptr_t ident = reinterpret_cast<intptr_t>(I->second);
+ Builder.CreateCall2(EmitFunction,
+ ConstantInt::get(Type::getInt32Ty(*Ctx), ident),
+ Builder.CreateGlobalStringPtr(SP.getName()));
+
+ GlobalVariable *GV = I->first;
+ unsigned Arcs =
cast<ArrayType>(GV->getType()->getElementType())->getNumElements();
- Builder.CreateCall2(EmitArcs,
- ConstantInt::get(Type::getInt32Ty(*Ctx), Arcs),
- Builder.CreateConstGEP2_64(GV, 0, 0));
+ Builder.CreateCall2(EmitArcs,
+ ConstantInt::get(Type::getInt32Ty(*Ctx), Arcs),
+ Builder.CreateConstGEP2_64(GV, 0, 0));
+ }
+ Builder.CreateCall(EndFile);
}
- Builder.CreateCall(EndFile);
}
Builder.CreateRetVoid();
diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
index e09f882..62c21b8 100644
--- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
+++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
@@ -112,8 +112,8 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) {
// be calculated from other edge counters on reading the profile info back
// in.
- const Type *Int32 = Type::getInt32Ty(M.getContext());
- const ArrayType *ATy = ArrayType::get(Int32, NumEdges);
+ Type *Int32 = Type::getInt32Ty(M.getContext());
+ ArrayType *ATy = ArrayType::get(Int32, NumEdges);
GlobalVariable *Counters =
new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
Constant::getNullValue(ATy), "OptEdgeProfCounters");
diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp
index 7541663..23915d3 100644
--- a/lib/Transforms/Instrumentation/PathProfiling.cpp
+++ b/lib/Transforms/Instrumentation/PathProfiling.cpp
@@ -374,7 +374,7 @@ namespace llvm {
template<bool xcompile> class TypeBuilder<PathProfilingFunctionTable,
xcompile> {
public:
- static const StructType *get(LLVMContext& C) {
+ static StructType *get(LLVMContext& C) {
return( StructType::get(
TypeBuilder<types::i<32>, xcompile>::get(C), // type
TypeBuilder<types::i<32>, xcompile>::get(C), // array size
@@ -909,7 +909,7 @@ BasicBlock::iterator PathProfiler::getInsertionPoint(BasicBlock* block, Value*
pathNumber) {
if(pathNumber == NULL || isa<ConstantInt>(pathNumber)
|| (((Instruction*)(pathNumber))->getParent()) != block) {
- return(block->getFirstNonPHI());
+ return(block->getFirstInsertionPt());
} else {
Instruction* pathNumberInst = (Instruction*) (pathNumber);
BasicBlock::iterator insertPoint;
@@ -930,7 +930,7 @@ BasicBlock::iterator PathProfiler::getInsertionPoint(BasicBlock* block, Value*
// A PHINode is created in the node, and its values initialized to -1U.
void PathProfiler::preparePHI(BLInstrumentationNode* node) {
BasicBlock* block = node->getBlock();
- BasicBlock::iterator insertPoint = block->getFirstNonPHI();
+ BasicBlock::iterator insertPoint = block->getFirstInsertionPt();
pred_iterator PB = pred_begin(node->getBlock()),
PE = pred_end(node->getBlock());
PHINode* phi = PHINode::Create(Type::getInt32Ty(*Context),
@@ -999,7 +999,7 @@ void PathProfiler::insertNumberIncrement(BLInstrumentationNode* node,
BasicBlock::iterator insertPoint;
if( atBeginning )
- insertPoint = block->getFirstNonPHI();
+ insertPoint = block->getFirstInsertionPt();
else
insertPoint = block->getTerminator();
@@ -1029,8 +1029,7 @@ void PathProfiler::insertCounterIncrement(Value* incValue,
gepIndices[1] = incValue;
GetElementPtrInst* pcPointer =
- GetElementPtrInst::Create(dag->getCounterArray(),
- gepIndices.begin(), gepIndices.end(),
+ GetElementPtrInst::Create(dag->getCounterArray(), gepIndices,
"counterInc", insertPoint);
// Load from the array - call it oldPC
@@ -1140,7 +1139,7 @@ void PathProfiler::insertInstrumentationStartingAt(BLInstrumentationEdge* edge,
}
BasicBlock::iterator insertPoint = atBeginning ?
- instrumentNode->getBlock()->getFirstNonPHI() :
+ instrumentNode->getBlock()->getFirstInsertionPt() :
instrumentNode->getBlock()->getTerminator();
// add information from the bottom edge, if it exists
@@ -1172,7 +1171,7 @@ void PathProfiler::insertInstrumentationStartingAt(BLInstrumentationEdge* edge,
// Insert instrumentation if this is a normal edge
else {
BasicBlock::iterator insertPoint = atBeginning ?
- instrumentNode->getBlock()->getFirstNonPHI() :
+ instrumentNode->getBlock()->getFirstInsertionPt() :
instrumentNode->getBlock()->getTerminator();
if( edge->isInitialization() ) { // initialize path number
@@ -1233,7 +1232,7 @@ void PathProfiler::insertInstrumentation(
end = callEdges.end(); edge != end; edge++ ) {
BLInstrumentationNode* node =
(BLInstrumentationNode*)(*edge)->getSource();
- BasicBlock::iterator insertPoint = node->getBlock()->getFirstNonPHI();
+ BasicBlock::iterator insertPoint = node->getBlock()->getFirstInsertionPt();
// Find the first function call
while( ((Instruction&)(*insertPoint)).getOpcode() != Instruction::Call )
@@ -1289,7 +1288,7 @@ void PathProfiler::runOnFunction(std::vector<Constant*> &ftInit,
// Should we store the information in an array or hash
if( dag.getNumberOfPaths() <= HASH_THRESHHOLD ) {
- const Type* t = ArrayType::get(Type::getInt32Ty(*Context),
+ Type* t = ArrayType::get(Type::getInt32Ty(*Context),
dag.getNumberOfPaths());
dag.setCounterArray(new GlobalVariable(M, t, false,
@@ -1301,7 +1300,7 @@ void PathProfiler::runOnFunction(std::vector<Constant*> &ftInit,
// Add to global function reference table
unsigned type;
- const Type* voidPtr = TypeBuilder<types::i<8>*, true>::get(*Context);
+ Type* voidPtr = TypeBuilder<types::i<8>*, true>::get(*Context);
if( dag.getNumberOfPaths() <= HASH_THRESHHOLD )
type = ProfilingArray;
@@ -1315,7 +1314,7 @@ void PathProfiler::runOnFunction(std::vector<Constant*> &ftInit,
ConstantExpr::getBitCast(dag.getCounterArray(), voidPtr) :
Constant::getNullValue(voidPtr);
- const StructType* at = ftEntryTypeBuilder::get(*Context);
+ StructType* at = ftEntryTypeBuilder::get(*Context);
ConstantStruct* functionEntry =
(ConstantStruct*)ConstantStruct::get(at, entryArray);
ftInit.push_back(functionEntry);
@@ -1379,8 +1378,8 @@ bool PathProfiler::runOnModule(Module &M) {
runOnFunction(ftInit, *F, M);
}
- const Type *t = ftEntryTypeBuilder::get(*Context);
- const ArrayType* ftArrayType = ArrayType::get(t, ftInit.size());
+ Type *t = ftEntryTypeBuilder::get(*Context);
+ ArrayType* ftArrayType = ArrayType::get(t, ftInit.size());
Constant* ftInitConstant = ConstantArray::get(ftArrayType, ftInit);
DEBUG(dbgs() << " ftArrayType:" << *ftArrayType << "\n");
@@ -1388,7 +1387,7 @@ bool PathProfiler::runOnModule(Module &M) {
GlobalVariable* functionTable =
new GlobalVariable(M, ftArrayType, false, GlobalValue::InternalLinkage,
ftInitConstant, "functionPathTable");
- const Type *eltType = ftArrayType->getTypeAtIndex((unsigned)0);
+ Type *eltType = ftArrayType->getTypeAtIndex((unsigned)0);
InsertProfilingInitCall(Main, "llvm_start_path_profiling", functionTable,
PointerType::getUnqual(eltType));
diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
index 445a5b6..de57cd1 100644
--- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp
+++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
@@ -25,9 +25,9 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
GlobalValue *Array,
PointerType *arrayType) {
LLVMContext &Context = MainFn->getContext();
- const Type *ArgVTy =
+ Type *ArgVTy =
PointerType::getUnqual(Type::getInt8PtrTy(Context));
- const PointerType *UIntPtr = arrayType ? arrayType :
+ PointerType *UIntPtr = arrayType ? arrayType :
Type::getInt32PtrTy(Context);
Module &M = *MainFn->getParent();
Constant *InitFn = M.getOrInsertFunction(FnName, Type::getInt32Ty(Context),
@@ -51,8 +51,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
Constant::getNullValue(Type::getInt32Ty(Context)));
unsigned NumElements = 0;
if (Array) {
- Args[2] = ConstantExpr::getGetElementPtr(Array, &GEPIndices[0],
- GEPIndices.size());
+ Args[2] = ConstantExpr::getGetElementPtr(Array, GEPIndices);
NumElements =
cast<ArrayType>(Array->getType()->getElementType())->getNumElements();
} else {
@@ -108,7 +107,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
GlobalValue *CounterArray, bool beginning) {
// Insert the increment after any alloca or PHI instructions...
- BasicBlock::iterator InsertPos = beginning ? BB->getFirstNonPHI() :
+ BasicBlock::iterator InsertPos = beginning ? BB->getFirstInsertionPt() :
BB->getTerminator();
while (isa<AllocaInst>(InsertPos))
++InsertPos;
@@ -120,7 +119,7 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
Indices[0] = Constant::getNullValue(Type::getInt32Ty(Context));
Indices[1] = ConstantInt::get(Type::getInt32Ty(Context), CounterNum);
Constant *ElementPtr =
- ConstantExpr::getGetElementPtr(CounterArray, &Indices[0], Indices.size());
+ ConstantExpr::getGetElementPtr(CounterArray, Indices);
// Load, increment and store the value back.
Value *OldVal = new LoadInst(ElementPtr, "OldFuncCounter", InsertPos);
@@ -137,7 +136,7 @@ void llvm::InsertProfilingShutdownCall(Function *Callee, Module *Mod) {
Type::getInt32Ty(Mod->getContext()),
FunctionType::get(Type::getVoidTy(Mod->getContext()), false)->getPointerTo()
};
- const StructType *GlobalDtorElemTy =
+ StructType *GlobalDtorElemTy =
StructType::get(Mod->getContext(), GlobalDtorElems, false);
// Construct the new element we'll be adding.
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index a5adb5e..ba214d1 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -57,6 +57,7 @@ bool ADCE::runOnFunction(Function& F) {
for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
if (isa<TerminatorInst>(I.getInstructionIterator()) ||
isa<DbgInfoIntrinsic>(I.getInstructionIterator()) ||
+ isa<LandingPadInst>(I.getInstructionIterator()) ||
I->mayHaveSideEffects()) {
alive.insert(I.getInstructionIterator());
worklist.push_back(I.getInstructionIterator());
@@ -65,7 +66,6 @@ bool ADCE::runOnFunction(Function& F) {
// Propagate liveness backwards to operands.
while (!worklist.empty()) {
Instruction* curr = worklist.pop_back_val();
-
for (Instruction::op_iterator OI = curr->op_begin(), OE = curr->op_end();
OI != OE; ++OI)
if (Instruction* Inst = dyn_cast<Instruction>(OI))
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index c223da6..79bcae5 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -29,6 +29,14 @@ add_llvm_library(LLVMScalarOpts
SimplifyCFGPass.cpp
SimplifyLibCalls.cpp
Sink.cpp
- TailDuplication.cpp
TailRecursionElimination.cpp
)
+
+add_llvm_library_dependencies(LLVMScalarOpts
+ LLVMAnalysis
+ LLVMCore
+ LLVMInstCombine
+ LLVMSupport
+ LLVMTarget
+ LLVMTransformUtils
+ )
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 0af14ed..f8f18b2 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -58,6 +58,7 @@ STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
STATISTIC(NumRetsDup, "Number of return instructions duplicated");
+STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
static cl::opt<bool> DisableBranchOpts(
"disable-cgp-branch-opts", cl::Hidden, cl::init(false),
@@ -104,12 +105,13 @@ namespace {
void EliminateMostlyEmptyBlock(BasicBlock *BB);
bool OptimizeBlock(BasicBlock &BB);
bool OptimizeInst(Instruction *I);
- bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy);
+ bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy);
bool OptimizeInlineAsmInst(CallInst *CS);
bool OptimizeCallInst(CallInst *CI);
bool MoveExtToFormExtLoad(Instruction *I);
bool OptimizeExtUses(Instruction *I);
bool DupRetToEnableTailCallOpts(ReturnInst *RI);
+ bool PlaceDbgValues(Function &F);
};
}
@@ -132,6 +134,11 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// unconditional branch.
EverMadeChange |= EliminateMostlyEmptyBlocks(F);
+ // llvm.dbg.value is far away from the value then iSel may not be able
+ // handle it properly. iSel will drop llvm.dbg.value if it can not
+ // find a node corresponding to the value.
+ EverMadeChange |= PlaceDbgValues(F);
+
bool MadeChange = true;
while (MadeChange) {
MadeChange = false;
@@ -410,8 +417,7 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
CastInst *&InsertedCast = InsertedCasts[UserBB];
if (!InsertedCast) {
- BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
-
+ BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
InsertedCast =
CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
InsertPt);
@@ -467,8 +473,7 @@ static bool OptimizeCmpExpression(CmpInst *CI) {
CmpInst *&InsertedCmp = InsertedCmps[UserBB];
if (!InsertedCmp) {
- BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
-
+ BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
InsertedCmp =
CmpInst::Create(CI->getOpcode(),
CI->getPredicate(), CI->getOperand(0),
@@ -528,7 +533,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
- const Type *ReturnTy = CI->getType();
+ Type *ReturnTy = CI->getType();
Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
// Substituting this can cause recursive simplifications, which can
@@ -551,22 +556,6 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
// From here on out we're working with named functions.
if (CI->getCalledFunction() == 0) return false;
- // llvm.dbg.value is far away from the value then iSel may not be able
- // handle it properly. iSel will drop llvm.dbg.value if it can not
- // find a node corresponding to the value.
- if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(CI))
- if (Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue()))
- if (!VI->isTerminator() &&
- (DVI->getParent() != VI->getParent() || DT->dominates(DVI, VI))) {
- DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
- DVI->removeFromParent();
- if (isa<PHINode>(VI))
- DVI->insertBefore(VI->getParent()->getFirstNonPHI());
- else
- DVI->insertAfter(VI);
- return true;
- }
-
// We'll need TargetData from here on out.
const TargetData *TD = TLI ? TLI->getTargetData() : 0;
if (!TD) return false;
@@ -724,7 +713,7 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
/// This method is used to optimize both load/store and inline asms with memory
/// operands.
bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
- const Type *AccessTy) {
+ Type *AccessTy) {
Value *Repl = Addr;
// Try to collapse single-value PHI nodes. This is necessary to undo
@@ -746,13 +735,11 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
worklist.pop_back();
// Break use-def graph loops.
- if (Visited.count(V)) {
+ if (!Visited.insert(V)) {
Consensus = 0;
break;
}
- Visited.insert(V);
-
// For a PHI node, push all of its incoming values.
if (PHINode *P = dyn_cast<PHINode>(V)) {
for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
@@ -763,7 +750,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// For non-PHIs, determine the addressing mode being computed.
SmallVector<Instruction*, 16> NewAddrModeInsts;
ExtAddrMode NewAddrMode =
- AddressingModeMatcher::Match(V, AccessTy,MemoryInst,
+ AddressingModeMatcher::Match(V, AccessTy, MemoryInst,
NewAddrModeInsts, *TLI);
// This check is broken into two cases with very similar code to avoid using
@@ -822,7 +809,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// Insert this computation right after this user. Since our caller is
// scanning from the top of the BB to the bottom, reuse of the expr are
// guaranteed to happen later.
- BasicBlock::iterator InsertPt = MemoryInst;
+ IRBuilder<> Builder(MemoryInst);
// Now that we determined the addressing expression we want to use and know
// that we have to sink it into this block. Check to see if we have already
@@ -833,11 +820,11 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst);
if (SunkAddr->getType() != Addr->getType())
- SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt);
+ SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
} else {
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst);
- const Type *IntPtrTy =
+ Type *IntPtrTy =
TLI->getTargetData()->getIntPtrType(AccessTy->getContext());
Value *Result = 0;
@@ -850,10 +837,9 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
if (AddrMode.BaseReg) {
Value *V = AddrMode.BaseReg;
if (V->getType()->isPointerTy())
- V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
+ V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
if (V->getType() != IntPtrTy)
- V = CastInst::CreateIntegerCast(V, IntPtrTy, /*isSigned=*/true,
- "sunkaddr", InsertPt);
+ V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
Result = V;
}
@@ -863,29 +849,27 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
if (V->getType() == IntPtrTy) {
// done.
} else if (V->getType()->isPointerTy()) {
- V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
+ V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
} else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
cast<IntegerType>(V->getType())->getBitWidth()) {
- V = new TruncInst(V, IntPtrTy, "sunkaddr", InsertPt);
+ V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
} else {
- V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt);
+ V = Builder.CreateSExt(V, IntPtrTy, "sunkaddr");
}
if (AddrMode.Scale != 1)
- V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy,
- AddrMode.Scale),
- "sunkaddr", InsertPt);
+ V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
+ "sunkaddr");
if (Result)
- Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
+ Result = Builder.CreateAdd(Result, V, "sunkaddr");
else
Result = V;
}
// Add in the BaseGV if present.
if (AddrMode.BaseGV) {
- Value *V = new PtrToIntInst(AddrMode.BaseGV, IntPtrTy, "sunkaddr",
- InsertPt);
+ Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
if (Result)
- Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
+ Result = Builder.CreateAdd(Result, V, "sunkaddr");
else
Result = V;
}
@@ -894,7 +878,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
if (AddrMode.BaseOffs) {
Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
if (Result)
- Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
+ Result = Builder.CreateAdd(Result, V, "sunkaddr");
else
Result = V;
}
@@ -902,7 +886,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
if (Result == 0)
SunkAddr = Constant::getNullValue(Addr->getType());
else
- SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt);
+ SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
}
MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
@@ -1059,8 +1043,7 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
if (!InsertedTrunc) {
- BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
-
+ BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
}
@@ -1159,3 +1142,34 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
return MadeChange;
}
+
+// llvm.dbg.value is far away from the value then iSel may not be able
+// handle it properly. iSel will drop llvm.dbg.value if it can not
+// find a node corresponding to the value.
+bool CodeGenPrepare::PlaceDbgValues(Function &F) {
+ bool MadeChange = false;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ Instruction *PrevNonDbgInst = NULL;
+ for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
+ Instruction *Insn = BI; ++BI;
+ DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
+ if (!DVI) {
+ PrevNonDbgInst = Insn;
+ continue;
+ }
+
+ Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
+ if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
+ DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
+ DVI->removeFromParent();
+ if (isa<PHINode>(VI))
+ DVI->insertBefore(VI->getParent()->getFirstInsertionPt());
+ else
+ DVI->insertAfter(VI);
+ MadeChange = true;
+ ++NumDbgValueMoved;
+ }
+ }
+ }
+ return MadeChange;
+}
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index cb9b5be..a593d0f 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -52,18 +52,18 @@ namespace {
AA = &getAnalysis<AliasAnalysis>();
MD = &getAnalysis<MemoryDependenceAnalysis>();
DominatorTree &DT = getAnalysis<DominatorTree>();
-
+
bool Changed = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
// Only check non-dead blocks. Dead blocks may have strange pointer
// cycles that will confuse alias analysis.
if (DT.isReachableFromEntry(I))
Changed |= runOnBasicBlock(*I);
-
+
AA = 0; MD = 0;
return Changed;
}
-
+
bool runOnBasicBlock(BasicBlock &BB);
bool HandleFree(CallInst *F);
bool handleEndBlock(BasicBlock &BB);
@@ -105,34 +105,34 @@ static void DeleteDeadInstruction(Instruction *I,
MemoryDependenceAnalysis &MD,
SmallPtrSet<Value*, 16> *ValueSet = 0) {
SmallVector<Instruction*, 32> NowDeadInsts;
-
+
NowDeadInsts.push_back(I);
--NumFastOther;
-
+
// Before we touch this instruction, remove it from memdep!
do {
Instruction *DeadInst = NowDeadInsts.pop_back_val();
++NumFastOther;
-
+
// This instruction is dead, zap it, in stages. Start by removing it from
// MemDep, which needs to know the operands and needs it to be in the
// function.
MD.removeInstruction(DeadInst);
-
+
for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
Value *Op = DeadInst->getOperand(op);
DeadInst->setOperand(op, 0);
-
+
// If this operand just became dead, add it to the NowDeadInsts list.
if (!Op->use_empty()) continue;
-
+
if (Instruction *OpI = dyn_cast<Instruction>(Op))
if (isInstructionTriviallyDead(OpI))
NowDeadInsts.push_back(OpI);
}
-
+
DeadInst->eraseFromParent();
-
+
if (ValueSet) ValueSet->erase(DeadInst);
} while (!NowDeadInsts.empty());
}
@@ -159,11 +159,13 @@ static bool hasMemoryWrite(Instruction *I) {
}
/// getLocForWrite - Return a Location stored to by the specified instruction.
+/// If isRemovable returns true, this function and getLocForRead completely
+/// describe the memory operations for this instruction.
static AliasAnalysis::Location
getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
return AA.getLocation(SI);
-
+
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) {
// memcpy/memmove/memset.
AliasAnalysis::Location Loc = AA.getLocationForDest(MI);
@@ -174,10 +176,10 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
return AliasAnalysis::Location();
return Loc;
}
-
+
IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst);
if (II == 0) return AliasAnalysis::Location();
-
+
switch (II->getIntrinsicID()) {
default: return AliasAnalysis::Location(); // Unhandled intrinsic.
case Intrinsic::init_trampoline:
@@ -185,7 +187,7 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
// that we should use the size of the pointee type. This isn't valid for
// init.trampoline, which writes more than an i8.
if (AA.getTargetData() == 0) return AliasAnalysis::Location();
-
+
// FIXME: We don't know the size of the trampoline, so we can't really
// handle it here.
return AliasAnalysis::Location(II->getArgOperand(0));
@@ -198,10 +200,10 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
/// getLocForRead - Return the location read by the specified "hasMemoryWrite"
/// instruction if any.
-static AliasAnalysis::Location
+static AliasAnalysis::Location
getLocForRead(Instruction *Inst, AliasAnalysis &AA) {
assert(hasMemoryWrite(Inst) && "Unknown instruction case");
-
+
// The only instructions that both read and write are the mem transfer
// instructions (memcpy/memmove).
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(Inst))
@@ -213,10 +215,10 @@ getLocForRead(Instruction *Inst, AliasAnalysis &AA) {
/// isRemovable - If the value of this instruction and the memory it writes to
/// is unused, may we delete this instruction?
static bool isRemovable(Instruction *I) {
- // Don't remove volatile stores.
+ // Don't remove volatile/atomic stores.
if (StoreInst *SI = dyn_cast<StoreInst>(I))
- return !SI->isVolatile();
-
+ return SI->isUnordered();
+
IntrinsicInst *II = cast<IntrinsicInst>(I);
switch (II->getIntrinsicID()) {
default: assert(0 && "doesn't pass 'hasMemoryWrite' predicate");
@@ -227,7 +229,7 @@ static bool isRemovable(Instruction *I) {
case Intrinsic::init_trampoline:
// Always safe to remove init_trampoline.
return true;
-
+
case Intrinsic::memset:
case Intrinsic::memmove:
case Intrinsic::memcpy:
@@ -255,16 +257,16 @@ static uint64_t getPointerSize(Value *V, AliasAnalysis &AA) {
const TargetData *TD = AA.getTargetData();
if (TD == 0)
return AliasAnalysis::UnknownSize;
-
+
if (AllocaInst *A = dyn_cast<AllocaInst>(V)) {
// Get size information for the alloca
if (ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize()))
return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType());
return AliasAnalysis::UnknownSize;
}
-
+
assert(isa<Argument>(V) && "Expected AllocaInst or Argument!");
- const PointerType *PT = cast<PointerType>(V->getType());
+ PointerType *PT = cast<PointerType>(V->getType());
return TD->getTypeAllocSize(PT->getElementType());
}
@@ -287,7 +289,7 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
AliasAnalysis &AA) {
const Value *P1 = Earlier.Ptr->stripPointerCasts();
const Value *P2 = Later.Ptr->stripPointerCasts();
-
+
// If the start pointers are the same, we just have to compare sizes to see if
// the later store was larger than the earlier store.
if (P1 == P2) {
@@ -302,33 +304,33 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
return Later.Ptr->getType() == Earlier.Ptr->getType();
return false;
}
-
+
// Make sure that the Later size is >= the Earlier size.
if (Later.Size < Earlier.Size)
return false;
return true;
}
-
+
// Otherwise, we have to have size information, and the later store has to be
// larger than the earlier one.
if (Later.Size == AliasAnalysis::UnknownSize ||
Earlier.Size == AliasAnalysis::UnknownSize ||
Later.Size <= Earlier.Size || AA.getTargetData() == 0)
return false;
-
+
// Check to see if the later store is to the entire object (either a global,
// an alloca, or a byval argument). If so, then it clearly overwrites any
// other store to the same object.
const TargetData &TD = *AA.getTargetData();
-
+
const Value *UO1 = GetUnderlyingObject(P1, &TD),
*UO2 = GetUnderlyingObject(P2, &TD);
-
+
// If we can't resolve the same pointers to the same object, then we can't
// analyze them at all.
if (UO1 != UO2)
return false;
-
+
// If the "Later" store is to a recognizable object, get its size.
if (isObjectPointerWithTrustworthySize(UO2)) {
uint64_t ObjectSize =
@@ -336,26 +338,26 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
if (ObjectSize == Later.Size)
return true;
}
-
+
// Okay, we have stores to two completely different pointers. Try to
// decompose the pointer into a "base + constant_offset" form. If the base
// pointers are equal, then we can reason about the two stores.
int64_t EarlierOff = 0, LaterOff = 0;
const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD);
const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD);
-
+
// If the base pointers still differ, we have two completely different stores.
if (BP1 != BP2)
return false;
// The later store completely overlaps the earlier store if:
- //
+ //
// 1. Both start at the same offset and the later one's size is greater than
// or equal to the earlier one's, or
//
// |--earlier--|
// |-- later --|
- //
+ //
// 2. The earlier store has an offset greater than the later offset, but which
// still lies completely within the later store.
//
@@ -373,7 +375,7 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
/// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a
/// memory region into an identical pointer) then it doesn't actually make its
-/// input dead in the traditional sense. Consider this case:
+/// input dead in the traditional sense. Consider this case:
///
/// memcpy(A <- B)
/// memcpy(A <- A)
@@ -391,10 +393,10 @@ static bool isPossibleSelfRead(Instruction *Inst,
// location read.
AliasAnalysis::Location InstReadLoc = getLocForRead(Inst, AA);
if (InstReadLoc.Ptr == 0) return false; // Not a reading instruction.
-
+
// If the read and written loc obviously don't alias, it isn't a read.
if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false;
-
+
// Okay, 'Inst' may copy over itself. However, we can still remove a the
// DepWrite instruction if we can prove that it reads from the same location
// as Inst. This handles useful cases like:
@@ -404,10 +406,10 @@ static bool isPossibleSelfRead(Instruction *Inst,
// aliases, so removing the first memcpy is safe (assuming it writes <= #
// bytes as the second one.
AliasAnalysis::Location DepReadLoc = getLocForRead(DepWrite, AA);
-
+
if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr))
return false;
-
+
// If DepWrite doesn't read memory or if we can't prove it is a must alias,
// then it can't be considered dead.
return true;
@@ -420,43 +422,43 @@ static bool isPossibleSelfRead(Instruction *Inst,
bool DSE::runOnBasicBlock(BasicBlock &BB) {
bool MadeChange = false;
-
+
// Do a top-down walk on the BB.
for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
Instruction *Inst = BBI++;
-
+
// Handle 'free' calls specially.
if (CallInst *F = isFreeCall(Inst)) {
MadeChange |= HandleFree(F);
continue;
}
-
+
// If we find something that writes memory, get its memory dependence.
if (!hasMemoryWrite(Inst))
continue;
MemDepResult InstDep = MD->getDependency(Inst);
-
+
// Ignore any store where we can't find a local dependence.
// FIXME: cross-block DSE would be fun. :)
- if (InstDep.isNonLocal() || InstDep.isUnknown())
+ if (!InstDep.isDef() && !InstDep.isClobber())
continue;
-
+
// If we're storing the same value back to a pointer that we just
// loaded from, then the store can be removed.
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
- SI->getOperand(0) == DepLoad && !SI->isVolatile()) {
+ SI->getOperand(0) == DepLoad && isRemovable(SI)) {
DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n "
<< "LOAD: " << *DepLoad << "\n STORE: " << *SI << '\n');
-
+
// DeleteDeadInstruction can delete the current instruction. Save BBI
// in case we need it.
WeakVH NextInst(BBI);
-
+
DeleteDeadInstruction(SI, *MD);
-
+
if (NextInst == 0) // Next instruction deleted.
BBI = BB.begin();
else if (BBI != BB.begin()) // Revisit this instruction if possible.
@@ -467,15 +469,15 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
}
}
}
-
+
// Figure out what location is being stored to.
AliasAnalysis::Location Loc = getLocForWrite(Inst, *AA);
// If we didn't get a useful location, fail.
if (Loc.Ptr == 0)
continue;
-
- while (!InstDep.isNonLocal() && !InstDep.isUnknown()) {
+
+ while (InstDep.isDef() || InstDep.isClobber()) {
// Get the memory clobbered by the instruction we depend on. MemDep will
// skip any instructions that 'Loc' clearly doesn't interact with. If we
// end up depending on a may- or must-aliased load, then we can't optimize
@@ -496,12 +498,12 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
!isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
<< *DepWrite << "\n KILLER: " << *Inst << '\n');
-
+
// Delete the store and now-dead instructions that feed it.
DeleteDeadInstruction(DepWrite, *MD);
++NumFastStores;
MadeChange = true;
-
+
// DeleteDeadInstruction can delete the current instruction in loop
// cases, reset BBI.
BBI = Inst;
@@ -509,7 +511,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
--BBI;
break;
}
-
+
// If this is a may-aliased store that is clobbering the store value, we
// can keep searching past it for another must-aliased pointer that stores
// to the same location. For example, in:
@@ -519,20 +521,20 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
// we can remove the first store to P even though we don't know if P and Q
// alias.
if (DepWrite == &BB.front()) break;
-
+
// Can't look past this instruction if it might read 'Loc'.
if (AA->getModRefInfo(DepWrite, Loc) & AliasAnalysis::Ref)
break;
-
+
InstDep = MD->getPointerDependencyFrom(Loc, false, DepWrite, &BB);
}
}
-
+
// If this block ends in a return, unwind, or unreachable, all allocas are
// dead at its end, which means stores to them are also dead.
if (BB.getTerminator()->getNumSuccessors() == 0)
MadeChange |= handleEndBlock(BB);
-
+
return MadeChange;
}
@@ -543,18 +545,18 @@ bool DSE::HandleFree(CallInst *F) {
MemDepResult Dep = MD->getDependency(F);
- while (!Dep.isNonLocal() && !Dep.isUnknown()) {
+ while (Dep.isDef() || Dep.isClobber()) {
Instruction *Dependency = Dep.getInst();
if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency))
return MadeChange;
-
+
Value *DepPointer =
GetUnderlyingObject(getStoredPointerOperand(Dependency));
// Check for aliasing.
if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
return MadeChange;
-
+
// DCE instructions only used to calculate that store
DeleteDeadInstruction(Dependency, *MD);
++NumFastStores;
@@ -567,7 +569,7 @@ bool DSE::HandleFree(CallInst *F) {
// free(s);
Dep = MD->getDependency(F);
};
-
+
return MadeChange;
}
@@ -579,28 +581,28 @@ bool DSE::HandleFree(CallInst *F) {
/// ret void
bool DSE::handleEndBlock(BasicBlock &BB) {
bool MadeChange = false;
-
+
// Keep track of all of the stack objects that are dead at the end of the
// function.
SmallPtrSet<Value*, 16> DeadStackObjects;
-
+
// Find all of the alloca'd pointers in the entry block.
BasicBlock *Entry = BB.getParent()->begin();
for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I)
if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
DeadStackObjects.insert(AI);
-
+
// Treat byval arguments the same, stores to them are dead at the end of the
// function.
for (Function::arg_iterator AI = BB.getParent()->arg_begin(),
AE = BB.getParent()->arg_end(); AI != AE; ++AI)
if (AI->hasByValAttr())
DeadStackObjects.insert(AI);
-
+
// Scan the basic block backwards
for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){
--BBI;
-
+
// If we find a store, check to see if it points into a dead stack value.
if (hasMemoryWrite(BBI) && isRemovable(BBI)) {
// See through pointer-to-pointer bitcasts
@@ -609,10 +611,10 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// Stores to stack values are valid candidates for removal.
if (DeadStackObjects.count(Pointer)) {
Instruction *Dead = BBI++;
-
+
DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: "
<< *Dead << "\n Object: " << *Pointer << '\n');
-
+
// DCE instructions only used to calculate that store.
DeleteDeadInstruction(Dead, *MD, &DeadStackObjects);
++NumFastStores;
@@ -620,7 +622,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
continue;
}
}
-
+
// Remove any dead non-memory-mutating instructions.
if (isInstructionTriviallyDead(BBI)) {
Instruction *Inst = BBI++;
@@ -629,55 +631,61 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
MadeChange = true;
continue;
}
-
+
if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) {
DeadStackObjects.erase(A);
continue;
}
-
+
if (CallSite CS = cast<Value>(BBI)) {
// If this call does not access memory, it can't be loading any of our
// pointers.
if (AA->doesNotAccessMemory(CS))
continue;
-
+
// If the call might load from any of our allocas, then any store above
// the call is live.
SmallVector<Value*, 8> LiveAllocas;
for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
E = DeadStackObjects.end(); I != E; ++I) {
// See if the call site touches it.
- AliasAnalysis::ModRefResult A =
+ AliasAnalysis::ModRefResult A =
AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA));
-
+
if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref)
LiveAllocas.push_back(*I);
}
-
+
for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(),
E = LiveAllocas.end(); I != E; ++I)
DeadStackObjects.erase(*I);
-
+
// If all of the allocas were clobbered by the call then we're not going
// to find anything else to process.
if (DeadStackObjects.empty())
return MadeChange;
-
+
continue;
}
-
+
AliasAnalysis::Location LoadedLoc;
-
+
// If we encounter a use of the pointer, it is no longer considered dead
if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
+ if (!L->isUnordered()) // Be conservative with atomic/volatile load
+ break;
LoadedLoc = AA->getLocation(L);
} else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
LoadedLoc = AA->getLocation(V);
} else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) {
LoadedLoc = AA->getLocationForSource(MTI);
- } else {
- // Not a loading instruction.
+ } else if (!BBI->mayReadFromMemory()) {
+ // Instruction doesn't read memory. Note that stores that weren't removed
+ // above will hit this case.
continue;
+ } else {
+ // Unknown inst; assume it clobbers everything.
+ break;
}
// Remove any allocas from the DeadPointer set that are loaded, as this
@@ -689,7 +697,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
if (DeadStackObjects.empty())
break;
}
-
+
return MadeChange;
}
@@ -703,14 +711,14 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
// A constant can't be in the dead pointer set.
if (isa<Constant>(UnderlyingPointer))
return;
-
+
// If the kill pointer can be easily reduced to an alloca, don't bother doing
// extraneous AA queries.
if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) {
DeadStackObjects.erase(const_cast<Value*>(UnderlyingPointer));
return;
}
-
+
SmallVector<Value*, 16> NowLive;
for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
E = DeadStackObjects.end(); I != E; ++I) {
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 3d3f17b..c0223d2 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -92,7 +92,7 @@ unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
// Hash in all of the operands as pointers.
unsigned Res = 0;
for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
- Res ^= getHash(Inst->getOperand(i)) << i;
+ Res ^= getHash(Inst->getOperand(i)) << (i & 0xF);
if (CastInst *CI = dyn_cast<CastInst>(Inst))
Res ^= getHash(CI->getType());
@@ -185,7 +185,7 @@ unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) {
assert(!Inst->getOperand(i)->getType()->isMetadataTy() &&
"Cannot value number calls with metadata operands");
- Res ^= getHash(Inst->getOperand(i)) << i;
+ Res ^= getHash(Inst->getOperand(i)) << (i & 0xF);
}
// Mix in the opcode.
@@ -357,7 +357,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// If this is a non-volatile load, process it.
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
// Ignore volatile loads.
- if (LI->isVolatile()) {
+ if (!LI->isSimple()) {
LastStore = 0;
continue;
}
@@ -437,7 +437,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
std::pair<Value*, unsigned>(SI->getValueOperand(), CurrentGeneration));
// Remember that this was the last store we saw for DSE.
- if (!SI->isVolatile())
+ if (SI->isSimple())
LastStore = SI;
}
}
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 87b7317..cbfdbcd 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -41,12 +41,16 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/PatternMatch.h"
using namespace llvm;
+using namespace PatternMatch;
STATISTIC(NumGVNInstr, "Number of instructions deleted");
STATISTIC(NumGVNLoad, "Number of loads deleted");
STATISTIC(NumGVNPRE, "Number of instructions PRE'd");
STATISTIC(NumGVNBlocks, "Number of blocks merged");
+STATISTIC(NumGVNSimpl, "Number of instructions simplified");
+STATISTIC(NumGVNEqProp, "Number of equalities propagated");
STATISTIC(NumPRELoad, "Number of loads PRE'd");
static cl::opt<bool> EnablePRE("enable-pre",
@@ -63,7 +67,7 @@ static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
namespace {
struct Expression {
uint32_t opcode;
- const Type *type;
+ Type *type;
SmallVector<uint32_t, 4> varargs;
Expression(uint32_t o = ~2U) : opcode(o) { }
@@ -548,6 +552,9 @@ namespace {
void cleanupGlobalSets();
void verifyRemoved(const Instruction *I) const;
bool splitCriticalEdges();
+ unsigned replaceAllDominatedUsesWith(Value *From, Value *To,
+ BasicBlock *Root);
+ bool propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root);
};
char GVN::ID = 0;
@@ -655,7 +662,7 @@ SpeculationFailure:
/// CanCoerceMustAliasedValueToLoad - Return true if
/// CoerceAvailableValueToLoadType will succeed.
static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
- const Type *LoadTy,
+ Type *LoadTy,
const TargetData &TD) {
// If the loaded or stored value is an first class array or struct, don't try
// to transform them. We need to be able to bitcast to integer.
@@ -680,17 +687,17 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
///
/// If we can't do it, return null.
static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
- const Type *LoadedTy,
+ Type *LoadedTy,
Instruction *InsertPt,
const TargetData &TD) {
if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
return 0;
// If this is already the right type, just return it.
- const Type *StoredValTy = StoredVal->getType();
+ Type *StoredValTy = StoredVal->getType();
- uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy);
- uint64_t LoadSize = TD.getTypeStoreSizeInBits(LoadedTy);
+ uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy);
+ uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
// If the store and reload are the same size, we can always reuse it.
if (StoreSize == LoadSize) {
@@ -704,7 +711,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
}
- const Type *TypeToCastTo = LoadedTy;
+ Type *TypeToCastTo = LoadedTy;
if (TypeToCastTo->isPointerTy())
TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext());
@@ -743,7 +750,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
}
// Truncate the integer to the right size now.
- const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize);
+ Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize);
StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt);
if (LoadedTy == NewIntTy)
@@ -765,7 +772,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
/// Check this case to see if there is anything more we can do before we give
/// up. This returns -1 if we have to give up, or a byte number in the stored
/// value of the piece that feeds the load.
-static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
+static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
Value *WritePtr,
uint64_t WriteSizeInBits,
const TargetData &TD) {
@@ -839,7 +846,7 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
/// AnalyzeLoadFromClobberingStore - This function is called when we have a
/// memdep query of a load that ends up being a clobbering store.
-static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr,
+static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
StoreInst *DepSI,
const TargetData &TD) {
// Cannot handle reading from store of first-class aggregate yet.
@@ -856,7 +863,7 @@ static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr,
/// AnalyzeLoadFromClobberingLoad - This function is called when we have a
/// memdep query of a load that ends up being clobbered by another load. See if
/// the other load can feed into the second load.
-static int AnalyzeLoadFromClobberingLoad(const Type *LoadTy, Value *LoadPtr,
+static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr,
LoadInst *DepLI, const TargetData &TD){
// Cannot handle reading from store of first-class aggregate yet.
if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
@@ -883,7 +890,7 @@ static int AnalyzeLoadFromClobberingLoad(const Type *LoadTy, Value *LoadPtr,
-static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
+static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
MemIntrinsic *MI,
const TargetData &TD) {
// If the mem operation is a non-constant size, we can't handle it.
@@ -920,7 +927,7 @@ static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
llvm::Type::getInt8PtrTy(Src->getContext()));
Constant *OffsetCst =
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
- Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
+ Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
if (ConstantFoldLoadFromConstPtr(Src, &TD))
return Offset;
@@ -934,7 +941,7 @@ static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
/// mustalias. Check this case to see if there is anything more we can do
/// before we give up.
static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
- const Type *LoadTy,
+ Type *LoadTy,
Instruction *InsertPt, const TargetData &TD){
LLVMContext &Ctx = SrcVal->getType()->getContext();
@@ -946,10 +953,9 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
// Compute which bits of the stored value are being used by the load. Convert
// to an integer type to start with.
if (SrcVal->getType()->isPointerTy())
- SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp");
+ SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx));
if (!SrcVal->getType()->isIntegerTy())
- SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8),
- "tmp");
+ SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8));
// Shift the bits to the least significant depending on endianness.
unsigned ShiftAmt;
@@ -959,11 +965,10 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
ShiftAmt = (StoreSize-LoadSize-Offset)*8;
if (ShiftAmt)
- SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt, "tmp");
+ SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt);
if (LoadSize != StoreSize)
- SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8),
- "tmp");
+ SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8));
return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
}
@@ -974,7 +979,7 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
/// because the pointers don't mustalias. Check this case to see if there is
/// anything more we can do before we give up.
static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
- const Type *LoadTy, Instruction *InsertPt,
+ Type *LoadTy, Instruction *InsertPt,
GVN &gvn) {
const TargetData &TD = *gvn.getTargetData();
// If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
@@ -982,8 +987,8 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType());
unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
if (Offset+LoadSize > SrcValSize) {
- assert(!SrcVal->isVolatile() && "Cannot widen volatile load!");
- assert(isa<IntegerType>(SrcVal->getType())&&"Can't widen non-integer load");
+ assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!");
+ assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load");
// If we have a load/load clobber an DepLI can be widened to cover this
// load, then we should widen it to the next power of 2 size big enough!
unsigned NewLoadSize = Offset+LoadSize;
@@ -996,7 +1001,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
// memdep queries will find the new load. We can't easily remove the old
// load completely because it is already in the value numbering table.
IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal));
- const Type *DestPTy =
+ Type *DestPTy =
IntegerType::get(LoadTy->getContext(), NewLoadSize*8);
DestPTy = PointerType::get(DestPTy,
cast<PointerType>(PtrVal->getType())->getAddressSpace());
@@ -1034,7 +1039,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
/// GetMemInstValueForLoad - This function is called when we have a
/// memdep query of a load that ends up being a clobbering mem intrinsic.
static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
- const Type *LoadTy, Instruction *InsertPt,
+ Type *LoadTy, Instruction *InsertPt,
const TargetData &TD){
LLVMContext &Ctx = LoadTy->getContext();
uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
@@ -1081,7 +1086,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
llvm::Type::getInt8PtrTy(Src->getContext()));
Constant *OffsetCst =
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
- Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
+ Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
return ConstantFoldLoadFromConstPtr(Src, &TD);
}
@@ -1154,7 +1159,7 @@ struct AvailableValueInBlock {
/// MaterializeAdjustedValue - Emit code into this block to adjust the value
/// defined here to the specified type. This handles various coercion cases.
- Value *MaterializeAdjustedValue(const Type *LoadTy, GVN &gvn) const {
+ Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const {
Value *Res;
if (isSimpleValue()) {
Res = getSimpleValue();
@@ -1213,7 +1218,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
SSAUpdater SSAUpdate(&NewPHIs);
SSAUpdate.Initialize(LI->getType(), LI->getName());
- const Type *LoadTy = LI->getType();
+ Type *LoadTy = LI->getType();
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
const AvailableValueInBlock &AV = ValuesPerBlock[i];
@@ -1274,7 +1279,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
// If we had a phi translation failure, we'll have a single entry which is a
// clobber in the current block. Reject this early.
- if (Deps.size() == 1 && Deps[0].getResult().isUnknown()) {
+ if (Deps.size() == 1
+ && !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber())
+ {
DEBUG(
dbgs() << "GVN: non-local load ";
WriteAsOperand(dbgs(), LI);
@@ -1294,7 +1301,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
BasicBlock *DepBB = Deps[i].getBB();
MemDepResult DepInfo = Deps[i].getResult();
- if (DepInfo.isUnknown()) {
+ if (!DepInfo.isDef() && !DepInfo.isClobber()) {
UnavailableBlocks.push_back(DepBB);
continue;
}
@@ -1359,7 +1366,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
continue;
}
- assert(DepInfo.isDef() && "Expecting def here");
+ // DepInfo.isDef() here
Instruction *DepInst = DepInfo.getInst();
@@ -1446,8 +1453,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
Blockers.insert(UnavailableBlocks[i]);
- // Lets find first basic block with more than one predecessor. Walk backwards
- // through predecessors if needed.
+ // Let's find the first basic block with more than one predecessor. Walk
+ // backwards through predecessors if needed.
BasicBlock *LoadBB = LI->getParent();
BasicBlock *TmpBB = LoadBB;
@@ -1519,10 +1526,19 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
<< Pred->getName() << "': " << *LI << '\n');
return false;
}
+
+ if (LoadBB->isLandingPad()) {
+ DEBUG(dbgs()
+ << "COULD NOT PRE LOAD BECAUSE OF LANDING PAD CRITICAL EDGE '"
+ << Pred->getName() << "': " << *LI << '\n');
+ return false;
+ }
+
unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB);
NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum));
}
}
+
if (!NeedToSplit.empty()) {
toSplit.append(NeedToSplit.begin(), NeedToSplit.end());
return false;
@@ -1660,7 +1676,7 @@ bool GVN::processLoad(LoadInst *L) {
if (!MD)
return false;
- if (L->isVolatile())
+ if (!L->isSimple())
return false;
if (L->use_empty()) {
@@ -1747,7 +1763,11 @@ bool GVN::processLoad(LoadInst *L) {
return false;
}
- if (Dep.isUnknown()) {
+ // If it is defined in another block, try harder.
+ if (Dep.isNonLocal())
+ return processNonLocalLoad(L);
+
+ if (!Dep.isDef()) {
DEBUG(
// fast print dep, using operator<< on instruction is too slow.
dbgs() << "GVN: load ";
@@ -1757,12 +1777,6 @@ bool GVN::processLoad(LoadInst *L) {
return false;
}
- // If it is defined in another block, try harder.
- if (Dep.isNonLocal())
- return processNonLocalLoad(L);
-
- assert(Dep.isDef() && "Expecting def here");
-
Instruction *DepInst = Dep.getInst();
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
Value *StoredVal = DepSI->getValueOperand();
@@ -1874,6 +1888,133 @@ Value *GVN::findLeader(BasicBlock *BB, uint32_t num) {
return Val;
}
+/// replaceAllDominatedUsesWith - Replace all uses of 'From' with 'To' if the
+/// use is dominated by the given basic block. Returns the number of uses that
+/// were replaced.
+unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To,
+ BasicBlock *Root) {
+ unsigned Count = 0;
+ for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
+ UI != UE; ) {
+ Instruction *User = cast<Instruction>(*UI);
+ unsigned OpNum = UI.getOperandNo();
+ ++UI;
+
+ if (DT->dominates(Root, User->getParent())) {
+ User->setOperand(OpNum, To);
+ ++Count;
+ }
+ }
+ return Count;
+}
+
+/// propagateEquality - The given values are known to be equal in every block
+/// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with
+/// 'RHS' everywhere in the scope. Returns whether a change was made.
+bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
+ if (LHS == RHS) return false;
+ assert(LHS->getType() == RHS->getType() && "Equal but types differ!");
+
+ // Don't try to propagate equalities between constants.
+ if (isa<Constant>(LHS) && isa<Constant>(RHS))
+ return false;
+
+ // Make sure that any constants are on the right-hand side. In general the
+ // best results are obtained by placing the longest lived value on the RHS.
+ if (isa<Constant>(LHS))
+ std::swap(LHS, RHS);
+
+ // If neither term is constant then bail out. This is not for correctness,
+ // it's just that the non-constant case is much less useful: it occurs just
+ // as often as the constant case but handling it hardly ever results in an
+ // improvement.
+ if (!isa<Constant>(RHS))
+ return false;
+
+ // If value numbering later deduces that an instruction in the scope is equal
+ // to 'LHS' then ensure it will be turned into 'RHS'.
+ addToLeaderTable(VN.lookup_or_add(LHS), RHS, Root);
+
+ // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope.
+ unsigned NumReplacements = replaceAllDominatedUsesWith(LHS, RHS, Root);
+ bool Changed = NumReplacements > 0;
+ NumGVNEqProp += NumReplacements;
+
+ // Now try to deduce additional equalities from this one. For example, if the
+ // known equality was "(A != B)" == "false" then it follows that A and B are
+ // equal in the scope. Only boolean equalities with an explicit true or false
+ // RHS are currently supported.
+ if (!RHS->getType()->isIntegerTy(1))
+ // Not a boolean equality - bail out.
+ return Changed;
+ ConstantInt *CI = dyn_cast<ConstantInt>(RHS);
+ if (!CI)
+ // RHS neither 'true' nor 'false' - bail out.
+ return Changed;
+ // Whether RHS equals 'true'. Otherwise it equals 'false'.
+ bool isKnownTrue = CI->isAllOnesValue();
+ bool isKnownFalse = !isKnownTrue;
+
+ // If "A && B" is known true then both A and B are known true. If "A || B"
+ // is known false then both A and B are known false.
+ Value *A, *B;
+ if ((isKnownTrue && match(LHS, m_And(m_Value(A), m_Value(B)))) ||
+ (isKnownFalse && match(LHS, m_Or(m_Value(A), m_Value(B))))) {
+ Changed |= propagateEquality(A, RHS, Root);
+ Changed |= propagateEquality(B, RHS, Root);
+ return Changed;
+ }
+
+ // If we are propagating an equality like "(A == B)" == "true" then also
+ // propagate the equality A == B.
+ if (ICmpInst *Cmp = dyn_cast<ICmpInst>(LHS)) {
+ // Only equality comparisons are supported.
+ if ((isKnownTrue && Cmp->getPredicate() == CmpInst::ICMP_EQ) ||
+ (isKnownFalse && Cmp->getPredicate() == CmpInst::ICMP_NE)) {
+ Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
+ Changed |= propagateEquality(Op0, Op1, Root);
+ }
+ return Changed;
+ }
+
+ return Changed;
+}
+
+/// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'. Return
+/// true if every path from the entry block to 'Dst' passes via this edge. In
+/// particular 'Dst' must not be reachable via another edge from 'Src'.
+static bool isOnlyReachableViaThisEdge(BasicBlock *Src, BasicBlock *Dst,
+ DominatorTree *DT) {
+ // First off, there must not be more than one edge from Src to Dst, there
+ // should be exactly one. So keep track of the number of times Src occurs
+ // as a predecessor of Dst and fail if it's more than once. Secondly, any
+ // other predecessors of Dst should be dominated by Dst (see logic below).
+ bool SawEdgeFromSrc = false;
+ for (pred_iterator PI = pred_begin(Dst), PE = pred_end(Dst); PI != PE; ++PI) {
+ BasicBlock *Pred = *PI;
+ if (Pred == Src) {
+ // An edge from Src to Dst.
+ if (SawEdgeFromSrc)
+ // There are multiple edges from Src to Dst - fail.
+ return false;
+ SawEdgeFromSrc = true;
+ continue;
+ }
+ // If the predecessor is not dominated by Dst, then it must be possible to
+ // reach it either without passing through Src (and thus not via the edge)
+ // or by passing through Src but taking a different edge out of Src. Either
+ // way it is possible to reach Dst without passing via the edge, so fail.
+ if (!DT->dominates(Dst, *PI))
+ return false;
+ }
+ assert(SawEdgeFromSrc && "No edge between these basic blocks!");
+
+ // Every path from the entry block to Dst must at some point pass to Dst from
+ // a predecessor that is not dominated by Dst. This predecessor can only be
+ // Src, since all others are dominated by Dst. As there is only one edge from
+ // Src to Dst, the path passes by this edge.
+ return true;
+}
/// processInstruction - When calculating availability, handle an instruction
/// by inserting it into the appropriate sets
@@ -1891,6 +2032,7 @@ bool GVN::processInstruction(Instruction *I) {
if (MD && V->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
markInstructionForDeletion(I);
+ ++NumGVNSimpl;
return true;
}
@@ -1903,30 +2045,45 @@ bool GVN::processInstruction(Instruction *I) {
return false;
}
- // For conditions branches, we can perform simple conditional propagation on
+ // For conditional branches, we can perform simple conditional propagation on
// the condition value itself.
if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
return false;
-
+
Value *BranchCond = BI->getCondition();
- uint32_t CondVN = VN.lookup_or_add(BranchCond);
-
+
BasicBlock *TrueSucc = BI->getSuccessor(0);
BasicBlock *FalseSucc = BI->getSuccessor(1);
-
- if (TrueSucc->getSinglePredecessor())
- addToLeaderTable(CondVN,
- ConstantInt::getTrue(TrueSucc->getContext()),
- TrueSucc);
- if (FalseSucc->getSinglePredecessor())
- addToLeaderTable(CondVN,
- ConstantInt::getFalse(TrueSucc->getContext()),
- FalseSucc);
-
- return false;
+ BasicBlock *Parent = BI->getParent();
+ bool Changed = false;
+
+ if (isOnlyReachableViaThisEdge(Parent, TrueSucc, DT))
+ Changed |= propagateEquality(BranchCond,
+ ConstantInt::getTrue(TrueSucc->getContext()),
+ TrueSucc);
+
+ if (isOnlyReachableViaThisEdge(Parent, FalseSucc, DT))
+ Changed |= propagateEquality(BranchCond,
+ ConstantInt::getFalse(FalseSucc->getContext()),
+ FalseSucc);
+
+ return Changed;
}
-
+
+ // For switches, propagate the case values into the case destinations.
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
+ Value *SwitchCond = SI->getCondition();
+ BasicBlock *Parent = SI->getParent();
+ bool Changed = false;
+ for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) {
+ BasicBlock *Dst = SI->getSuccessor(i);
+ if (isOnlyReachableViaThisEdge(Parent, Dst, DT))
+ Changed |= propagateEquality(SwitchCond, SI->getCaseValue(i), Dst);
+ }
+ return Changed;
+ }
+
// Instructions with void type don't return a value, so there's
// no point in trying to find redudancies in them.
if (I->getType()->isVoidTy()) return false;
@@ -2071,6 +2228,9 @@ bool GVN::performPRE(Function &F) {
// Nothing to PRE in the entry block.
if (CurrentBlock == &F.getEntryBlock()) continue;
+ // Don't perform PRE on a landing pad.
+ if (CurrentBlock->isLandingPad()) continue;
+
for (BasicBlock::iterator BI = CurrentBlock->begin(),
BE = CurrentBlock->end(); BI != BE; ) {
Instruction *CurInst = BI++;
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index dee3d38..75fa011 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -11,17 +11,6 @@
// computations derived from them) into simpler forms suitable for subsequent
// analysis and transformation.
//
-// This transformation makes the following changes to each loop with an
-// identifiable induction variable:
-// 1. All loops are transformed to have a SINGLE canonical induction variable
-// which starts at zero and steps by one.
-// 2. The canonical induction variable is guaranteed to be the first PHI node
-// in the loop header block.
-// 3. The canonical induction variable is guaranteed to be in a wide enough
-// type so that IV expressions need not be (directly) zero-extended or
-// sign-extended.
-// 4. Any pointer arithmetic recurrences are raised to use array subscripts.
-//
// If the trip count of a loop is computable, this pass also makes the following
// changes:
// 1. The exit condition for the loop is canonicalized to compare the
@@ -33,9 +22,6 @@
// purpose of the loop is to compute the exit value of some derived
// expression, this transformation will make the loop dead.
//
-// This transformation should be followed by strength reduction after all of the
-// desired loop transformations have been performed.
-//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "indvars"
@@ -57,11 +43,11 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SimplifyIndVar.h"
#include "llvm/Target/TargetData.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
using namespace llvm;
STATISTIC(NumRemoved , "Number of aux indvars removed");
@@ -69,15 +55,21 @@ STATISTIC(NumWidened , "Number of indvars widened");
STATISTIC(NumInserted , "Number of canonical indvars added");
STATISTIC(NumReplaced , "Number of exit values replaced");
STATISTIC(NumLFTR , "Number of loop exit tests replaced");
-STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
-STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
-STATISTIC(NumElimCmp , "Number of IV comparisons eliminated");
STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
-static cl::opt<bool> DisableIVRewrite(
- "disable-iv-rewrite", cl::Hidden,
- cl::desc("Disable canonical induction variable rewriting"));
+namespace llvm {
+ cl::opt<bool> EnableIVRewrite(
+ "enable-iv-rewrite", cl::Hidden,
+ cl::desc("Enable canonical induction variable rewriting"));
+
+ // Trip count verification can be enabled by default under NDEBUG if we
+ // implement a strong expression equivalence checker in SCEV. Until then, we
+ // use the verify-indvars flag, which may assert in some cases.
+ cl::opt<bool> VerifyIndvars(
+ "verify-indvars", cl::Hidden,
+ cl::desc("Verify the ScalarEvolution result after running indvars"));
+}
namespace {
class IndVarSimplify : public LoopPass {
@@ -105,12 +97,12 @@ namespace {
AU.addRequired<ScalarEvolution>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
- if (!DisableIVRewrite)
+ if (EnableIVRewrite)
AU.addRequired<IVUsers>();
AU.addPreserved<ScalarEvolution>();
AU.addPreservedID(LoopSimplifyID);
AU.addPreservedID(LCSSAID);
- if (!DisableIVRewrite)
+ if (EnableIVRewrite)
AU.addPreserved<IVUsers>();
AU.setPreservesCFG();
}
@@ -125,24 +117,14 @@ namespace {
void HandleFloatingPointIV(Loop *L, PHINode *PH);
void RewriteNonIntegerIVs(Loop *L);
- void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
-
- void SimplifyIVUsers(SCEVExpander &Rewriter);
- void SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter);
+ void SimplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LPPassManager &LPM);
- bool EliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
- void EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
- void EliminateIVRemainder(BinaryOperator *Rem,
- Value *IVOperand,
- bool IsSigned);
-
- void SimplifyCongruentIVs(Loop *L);
+ void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
- ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
- PHINode *IndVar,
- SCEVExpander &Rewriter);
+ Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
+ PHINode *IndVar, SCEVExpander &Rewriter);
void SinkUnusedInvariants(Loop *L);
};
@@ -211,6 +193,36 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
return true;
}
+/// Determine the insertion point for this user. By default, insert immediately
+/// before the user. SCEVExpander or LICM will hoist loop invariants out of the
+/// loop. For PHI nodes, there may be multiple uses, so compute the nearest
+/// common dominator for the incoming blocks.
+static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
+ DominatorTree *DT) {
+ PHINode *PHI = dyn_cast<PHINode>(User);
+ if (!PHI)
+ return User;
+
+ Instruction *InsertPt = 0;
+ for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
+ if (PHI->getIncomingValue(i) != Def)
+ continue;
+
+ BasicBlock *InsertBB = PHI->getIncomingBlock(i);
+ if (!InsertPt) {
+ InsertPt = InsertBB->getTerminator();
+ continue;
+ }
+ InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
+ InsertPt = InsertBB->getTerminator();
+ }
+ assert(InsertPt && "Missing phi operand");
+ assert((!isa<Instruction>(Def) ||
+ DT->dominates(cast<Instruction>(Def), InsertPt)) &&
+ "def does not dominate all uses");
+ return InsertPt;
+}
+
//===----------------------------------------------------------------------===//
// RewriteNonIntegerIVs and helpers. Prefer integer IVs.
//===----------------------------------------------------------------------===//
@@ -337,14 +349,14 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// Positive and negative strides have different safety conditions.
if (IncValue > 0) {
// If we have a positive stride, we require the init to be less than the
- // exit value and an equality or less than comparison.
- if (InitValue >= ExitValue ||
- NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE)
+ // exit value.
+ if (InitValue >= ExitValue)
return;
uint32_t Range = uint32_t(ExitValue-InitValue);
- if (NewPred == CmpInst::ICMP_SLE) {
- // Normalize SLE -> SLT, check for infinite loop.
+ // Check for infinite loop, either:
+ // while (i <= Exit) or until (i > Exit)
+ if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
if (++Range == 0) return; // Range overflows.
}
@@ -364,14 +376,14 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
} else {
// If we have a negative stride, we require the init to be greater than the
- // exit value and an equality or greater than comparison.
- if (InitValue >= ExitValue ||
- NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE)
+ // exit value.
+ if (InitValue <= ExitValue)
return;
uint32_t Range = uint32_t(InitValue-ExitValue);
- if (NewPred == CmpInst::ICMP_SGE) {
- // Normalize SGE -> SGT, check for infinite loop.
+ // Check for infinite loop, either:
+ // while (i >= Exit) or until (i < Exit)
+ if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
if (++Range == 0) return; // Range overflows.
}
@@ -390,7 +402,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
return;
}
- const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
+ IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
// Insert new integer induction variable.
PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
@@ -429,7 +441,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// platforms.
if (WeakPH) {
Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
- PN->getParent()->getFirstNonPHI());
+ PN->getParent()->getFirstInsertionPt());
PN->replaceAllUsesWith(Conv);
RecursivelyDeleteTriviallyDeadInstructions(PN);
}
@@ -437,6 +449,8 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// Add a new IVUsers entry for the newly-created integer PHI.
if (IU)
IU->AddUsersIfInteresting(NewPHI);
+
+ Changed = true;
}
void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
@@ -582,45 +596,15 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
//===----------------------------------------------------------------------===//
// Rewrite IV users based on a canonical IV.
-// To be replaced by -disable-iv-rewrite.
+// Only for use with -enable-iv-rewrite.
//===----------------------------------------------------------------------===//
-/// SimplifyIVUsers - Iteratively perform simplification on IVUsers within this
-/// loop. IVUsers is treated as a worklist. Each successive simplification may
-/// push more users which may themselves be candidates for simplification.
-///
-/// This is the old approach to IV simplification to be replaced by
-/// SimplifyIVUsersNoRewrite.
-///
-void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) {
- // Each round of simplification involves a round of eliminating operations
- // followed by a round of widening IVs. A single IVUsers worklist is used
- // across all rounds. The inner loop advances the user. If widening exposes
- // more uses, then another pass through the outer loop is triggered.
- for (IVUsers::iterator I = IU->begin(); I != IU->end(); ++I) {
- Instruction *UseInst = I->getUser();
- Value *IVOperand = I->getOperandValToReplace();
-
- if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
- EliminateIVComparison(ICmp, IVOperand);
- continue;
- }
- if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
- bool IsSigned = Rem->getOpcode() == Instruction::SRem;
- if (IsSigned || Rem->getOpcode() == Instruction::URem) {
- EliminateIVRemainder(Rem, IVOperand, IsSigned);
- continue;
- }
- }
- }
-}
-
-// FIXME: It is an extremely bad idea to indvar substitute anything more
-// complex than affine induction variables. Doing so will put expensive
-// polynomial evaluations inside of the loop, and the str reduction pass
-// currently can only reduce affine polynomials. For now just disable
-// indvar subst on anything more complex than an affine addrec, unless
-// it can be expanded to a trivial value.
+/// FIXME: It is an extremely bad idea to indvar substitute anything more
+/// complex than affine induction variables. Doing so will put expensive
+/// polynomial evaluations inside of the loop, and the str reduction pass
+/// currently can only reduce affine polynomials. For now just disable
+/// indvar subst on anything more complex than an affine addrec, unless
+/// it can be expanded to a trivial value.
static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
// Loop-invariant values are safe.
if (SE->isLoopInvariant(S, L)) return true;
@@ -631,7 +615,8 @@ static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
return AR->isAffine();
// An add is safe it all its operands are safe.
- if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) {
+ if (const SCEVCommutativeExpr *Commutative
+ = dyn_cast<SCEVCommutativeExpr>(S)) {
for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
E = Commutative->op_end(); I != E; ++I)
if (!isSafe(*I, L, SE)) return false;
@@ -665,7 +650,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
// of different sizes.
for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
Value *Op = UI->getOperandValToReplace();
- const Type *UseTy = Op->getType();
+ Type *UseTy = Op->getType();
Instruction *User = UI->getUser();
// Compute the final addrec to expand into code.
@@ -692,18 +677,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
// hoist loop invariants out of the loop. For PHI nodes, there may be
// multiple uses, so compute the nearest common dominator for the
// incoming blocks.
- Instruction *InsertPt = User;
- if (PHINode *PHI = dyn_cast<PHINode>(InsertPt))
- for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
- if (PHI->getIncomingValue(i) == Op) {
- if (InsertPt == User)
- InsertPt = PHI->getIncomingBlock(i)->getTerminator();
- else
- InsertPt =
- DT->findNearestCommonDominator(InsertPt->getParent(),
- PHI->getIncomingBlock(i))
- ->getTerminator();
- }
+ Instruction *InsertPt = getInsertPointForUses(User, Op, DT);
// Now expand it into actual Instructions and patch it into place.
Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
@@ -747,19 +721,38 @@ namespace {
// extend operations. This information is recorded by CollectExtend and
// provides the input to WidenIV.
struct WideIVInfo {
- const Type *WidestNativeType; // Widest integer type created [sz]ext
- bool IsSigned; // Was an sext user seen before a zext?
+ PHINode *NarrowIV;
+ Type *WidestNativeType; // Widest integer type created [sz]ext
+ bool IsSigned; // Was an sext user seen before a zext?
- WideIVInfo() : WidestNativeType(0), IsSigned(false) {}
+ WideIVInfo() : NarrowIV(0), WidestNativeType(0), IsSigned(false) {}
+ };
+
+ class WideIVVisitor : public IVVisitor {
+ ScalarEvolution *SE;
+ const TargetData *TD;
+
+ public:
+ WideIVInfo WI;
+
+ WideIVVisitor(PHINode *NarrowIV, ScalarEvolution *SCEV,
+ const TargetData *TData) :
+ SE(SCEV), TD(TData) { WI.NarrowIV = NarrowIV; }
+
+ // Implement the interface used by simplifyUsersOfIV.
+ virtual void visitCast(CastInst *Cast);
};
}
-/// CollectExtend - Update information about the induction variable that is
+/// visitCast - Update information about the induction variable that is
/// extended by this sign or zero extend operation. This is used to determine
/// the final width of the IV before actually widening it.
-static void CollectExtend(CastInst *Cast, bool IsSigned, WideIVInfo &WI,
- ScalarEvolution *SE, const TargetData *TD) {
- const Type *Ty = Cast->getType();
+void WideIVVisitor::visitCast(CastInst *Cast) {
+ bool IsSigned = Cast->getOpcode() == Instruction::SExt;
+ if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
+ return;
+
+ Type *Ty = Cast->getType();
uint64_t Width = SE->getTypeSizeInBits(Ty);
if (TD && !TD->isLegalInteger(Width))
return;
@@ -779,6 +772,21 @@ static void CollectExtend(CastInst *Cast, bool IsSigned, WideIVInfo &WI,
}
namespace {
+
+/// NarrowIVDefUse - Record a link in the Narrow IV def-use chain along with the
+/// WideIV that computes the same value as the Narrow IV def. This avoids
+/// caching Use* pointers.
+struct NarrowIVDefUse {
+ Instruction *NarrowDef;
+ Instruction *NarrowUse;
+ Instruction *WideDef;
+
+ NarrowIVDefUse(): NarrowDef(0), NarrowUse(0), WideDef(0) {}
+
+ NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD):
+ NarrowDef(ND), NarrowUse(NU), WideDef(WD) {}
+};
+
/// WidenIV - The goal of this transform is to remove sign and zero extends
/// without creating any new induction variables. To do this, it creates a new
/// phi of the wider type and redirects all users, either removing extends or
@@ -787,7 +795,7 @@ namespace {
class WidenIV {
// Parameters
PHINode *OrigPhi;
- const Type *WideType;
+ Type *WideType;
bool IsSigned;
// Context
@@ -803,13 +811,13 @@ class WidenIV {
SmallVectorImpl<WeakVH> &DeadInsts;
SmallPtrSet<Instruction*,16> Widened;
- SmallVector<std::pair<Use *, Instruction *>, 8> NarrowIVUsers;
+ SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
public:
- WidenIV(PHINode *PN, const WideIVInfo &WI, LoopInfo *LInfo,
+ WidenIV(const WideIVInfo &WI, LoopInfo *LInfo,
ScalarEvolution *SEv, DominatorTree *DTree,
SmallVectorImpl<WeakVH> &DI) :
- OrigPhi(PN),
+ OrigPhi(WI.NarrowIV),
WideType(WI.WidestNativeType),
IsSigned(WI.IsSigned),
LI(LInfo),
@@ -826,21 +834,42 @@ public:
PHINode *CreateWideIV(SCEVExpander &Rewriter);
protected:
- Instruction *CloneIVUser(Instruction *NarrowUse,
- Instruction *NarrowDef,
- Instruction *WideDef);
+ Value *getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
+ Instruction *Use);
+
+ Instruction *CloneIVUser(NarrowIVDefUse DU);
const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse);
- Instruction *WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef,
- Instruction *WideDef);
+ const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU);
+
+ Instruction *WidenIVUse(NarrowIVDefUse DU);
void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
};
} // anonymous namespace
-static Value *getExtend( Value *NarrowOper, const Type *WideType,
- bool IsSigned, IRBuilder<> &Builder) {
+/// isLoopInvariant - Perform a quick domtree based check for loop invariance
+/// assuming that V is used within the loop. LoopInfo::isLoopInvariant() seems
+/// gratuitous for this purpose.
+static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
+ Instruction *Inst = dyn_cast<Instruction>(V);
+ if (!Inst)
+ return true;
+
+ return DT->properlyDominates(Inst->getParent(), L->getHeader());
+}
+
+Value *WidenIV::getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
+ Instruction *Use) {
+ // Set the debug location and conservative insertion point.
+ IRBuilder<> Builder(Use);
+ // Hoist the insertion point into loop preheaders as far as possible.
+ for (const Loop *L = LI->getLoopFor(Use->getParent());
+ L && L->getLoopPreheader() && isLoopInvariant(NarrowOper, L, DT);
+ L = L->getParentLoop())
+ Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
+
return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
Builder.CreateZExt(NarrowOper, WideType);
}
@@ -848,10 +877,8 @@ static Value *getExtend( Value *NarrowOper, const Type *WideType,
/// CloneIVUser - Instantiate a wide operation to replace a narrow
/// operation. This only needs to handle operations that can evaluation to
/// SCEVAddRec. It can safely return 0 for any operation we decide not to clone.
-Instruction *WidenIV::CloneIVUser(Instruction *NarrowUse,
- Instruction *NarrowDef,
- Instruction *WideDef) {
- unsigned Opcode = NarrowUse->getOpcode();
+Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
+ unsigned Opcode = DU.NarrowUse->getOpcode();
switch (Opcode) {
default:
return 0;
@@ -865,24 +892,23 @@ Instruction *WidenIV::CloneIVUser(Instruction *NarrowUse,
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
- DEBUG(dbgs() << "Cloning IVUser: " << *NarrowUse << "\n");
-
- IRBuilder<> Builder(NarrowUse);
+ DEBUG(dbgs() << "Cloning IVUser: " << *DU.NarrowUse << "\n");
// Replace NarrowDef operands with WideDef. Otherwise, we don't know
// anything about the narrow operand yet so must insert a [sz]ext. It is
// probably loop invariant and will be folded or hoisted. If it actually
// comes from a widened IV, it should be removed during a future call to
// WidenIVUse.
- Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) ? WideDef :
- getExtend(NarrowUse->getOperand(0), WideType, IsSigned, Builder);
- Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) ? WideDef :
- getExtend(NarrowUse->getOperand(1), WideType, IsSigned, Builder);
+ Value *LHS = (DU.NarrowUse->getOperand(0) == DU.NarrowDef) ? DU.WideDef :
+ getExtend(DU.NarrowUse->getOperand(0), WideType, IsSigned, DU.NarrowUse);
+ Value *RHS = (DU.NarrowUse->getOperand(1) == DU.NarrowDef) ? DU.WideDef :
+ getExtend(DU.NarrowUse->getOperand(1), WideType, IsSigned, DU.NarrowUse);
- BinaryOperator *NarrowBO = cast<BinaryOperator>(NarrowUse);
+ BinaryOperator *NarrowBO = cast<BinaryOperator>(DU.NarrowUse);
BinaryOperator *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(),
LHS, RHS,
NarrowBO->getName());
+ IRBuilder<> Builder(DU.NarrowUse);
Builder.Insert(WideBO);
if (const OverflowingBinaryOperator *OBO =
dyn_cast<OverflowingBinaryOperator>(NarrowBO)) {
@@ -894,45 +920,46 @@ Instruction *WidenIV::CloneIVUser(Instruction *NarrowUse,
llvm_unreachable(0);
}
-/// HoistStep - Attempt to hoist an IV increment above a potential use.
-///
-/// To successfully hoist, two criteria must be met:
-/// - IncV operands dominate InsertPos and
-/// - InsertPos dominates IncV
-///
-/// Meeting the second condition means that we don't need to check all of IncV's
-/// existing uses (it's moving up in the domtree).
-///
-/// This does not yet recursively hoist the operands, although that would
-/// not be difficult.
-static bool HoistStep(Instruction *IncV, Instruction *InsertPos,
- const DominatorTree *DT)
-{
- if (DT->dominates(IncV, InsertPos))
- return true;
+/// No-wrap operations can transfer sign extension of their result to their
+/// operands. Generate the SCEV value for the widened operation without
+/// actually modifying the IR yet. If the expression after extending the
+/// operands is an AddRec for this loop, return it.
+const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
+ // Handle the common case of add<nsw/nuw>
+ if (DU.NarrowUse->getOpcode() != Instruction::Add)
+ return 0;
- if (!DT->dominates(InsertPos->getParent(), IncV->getParent()))
- return false;
+ // One operand (NarrowDef) has already been extended to WideDef. Now determine
+ // if extending the other will lead to a recurrence.
+ unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
+ assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
+
+ const SCEV *ExtendOperExpr = 0;
+ const OverflowingBinaryOperator *OBO =
+ cast<OverflowingBinaryOperator>(DU.NarrowUse);
+ if (IsSigned && OBO->hasNoSignedWrap())
+ ExtendOperExpr = SE->getSignExtendExpr(
+ SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
+ else if(!IsSigned && OBO->hasNoUnsignedWrap())
+ ExtendOperExpr = SE->getZeroExtendExpr(
+ SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
+ else
+ return 0;
- if (IncV->mayHaveSideEffects())
- return false;
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(
+ SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr,
+ IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW));
- // Attempt to hoist IncV
- for (User::op_iterator OI = IncV->op_begin(), OE = IncV->op_end();
- OI != OE; ++OI) {
- Instruction *OInst = dyn_cast<Instruction>(OI);
- if (OInst && !DT->dominates(OInst, InsertPos))
- return false;
- }
- IncV->moveBefore(InsertPos);
- return true;
+ if (!AddRec || AddRec->getLoop() != L)
+ return 0;
+ return AddRec;
}
-// GetWideRecurrence - Is this instruction potentially interesting from IVUsers'
-// perspective after widening it's type? In other words, can the extend be
-// safely hoisted out of the loop with SCEV reducing the value to a recurrence
-// on the same loop. If so, return the sign or zero extended
-// recurrence. Otherwise return NULL.
+/// GetWideRecurrence - Is this instruction potentially interesting from
+/// IVUsers' perspective after widening it's type? In other words, can the
+/// extend be safely hoisted out of the loop with SCEV reducing the value to a
+/// recurrence on the same loop. If so, return the sign or zero extended
+/// recurrence. Otherwise return NULL.
const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
if (!SE->isSCEVable(NarrowUse->getType()))
return 0;
@@ -951,47 +978,45 @@ const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
if (!AddRec || AddRec->getLoop() != L)
return 0;
-
return AddRec;
}
/// WidenIVUse - Determine whether an individual user of the narrow IV can be
/// widened. If so, return the wide clone of the user.
-Instruction *WidenIV::WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef,
- Instruction *WideDef) {
- Instruction *NarrowUse = cast<Instruction>(NarrowDefUse.getUser());
+Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU) {
// Stop traversing the def-use chain at inner-loop phis or post-loop phis.
- if (isa<PHINode>(NarrowUse) && LI->getLoopFor(NarrowUse->getParent()) != L)
+ if (isa<PHINode>(DU.NarrowUse) &&
+ LI->getLoopFor(DU.NarrowUse->getParent()) != L)
return 0;
// Our raison d'etre! Eliminate sign and zero extension.
- if (IsSigned ? isa<SExtInst>(NarrowUse) : isa<ZExtInst>(NarrowUse)) {
- Value *NewDef = WideDef;
- if (NarrowUse->getType() != WideType) {
- unsigned CastWidth = SE->getTypeSizeInBits(NarrowUse->getType());
+ if (IsSigned ? isa<SExtInst>(DU.NarrowUse) : isa<ZExtInst>(DU.NarrowUse)) {
+ Value *NewDef = DU.WideDef;
+ if (DU.NarrowUse->getType() != WideType) {
+ unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
unsigned IVWidth = SE->getTypeSizeInBits(WideType);
if (CastWidth < IVWidth) {
// The cast isn't as wide as the IV, so insert a Trunc.
- IRBuilder<> Builder(NarrowDefUse);
- NewDef = Builder.CreateTrunc(WideDef, NarrowUse->getType());
+ IRBuilder<> Builder(DU.NarrowUse);
+ NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
}
else {
// A wider extend was hidden behind a narrower one. This may induce
// another round of IV widening in which the intermediate IV becomes
// dead. It should be very rare.
DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
- << " not wide enough to subsume " << *NarrowUse << "\n");
- NarrowUse->replaceUsesOfWith(NarrowDef, WideDef);
- NewDef = NarrowUse;
+ << " not wide enough to subsume " << *DU.NarrowUse << "\n");
+ DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
+ NewDef = DU.NarrowUse;
}
}
- if (NewDef != NarrowUse) {
- DEBUG(dbgs() << "INDVARS: eliminating " << *NarrowUse
- << " replaced by " << *WideDef << "\n");
+ if (NewDef != DU.NarrowUse) {
+ DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
+ << " replaced by " << *DU.WideDef << "\n");
++NumElimExt;
- NarrowUse->replaceAllUsesWith(NewDef);
- DeadInsts.push_back(NarrowUse);
+ DU.NarrowUse->replaceAllUsesWith(NewDef);
+ DeadInsts.push_back(DU.NarrowUse);
}
// Now that the extend is gone, we want to expose it's uses for potential
// further simplification. We don't need to directly inform SimplifyIVUsers
@@ -1004,29 +1029,32 @@ Instruction *WidenIV::WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef,
}
// Does this user itself evaluate to a recurrence after widening?
- const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(NarrowUse);
+ const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(DU.NarrowUse);
+ if (!WideAddRec) {
+ WideAddRec = GetExtendedOperandRecurrence(DU);
+ }
if (!WideAddRec) {
// This user does not evaluate to a recurence after widening, so don't
// follow it. Instead insert a Trunc to kill off the original use,
// eventually isolating the original narrow IV so it can be removed.
- IRBuilder<> Builder(NarrowDefUse);
- Value *Trunc = Builder.CreateTrunc(WideDef, NarrowDef->getType());
- NarrowUse->replaceUsesOfWith(NarrowDef, Trunc);
+ IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
+ Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
+ DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
return 0;
}
- // We assume that block terminators are not SCEVable. We wouldn't want to
+ // Assume block terminators cannot evaluate to a recurrence. We can't to
// insert a Trunc after a terminator if there happens to be a critical edge.
- assert(NarrowUse != NarrowUse->getParent()->getTerminator() &&
+ assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
"SCEV is not expected to evaluate a block terminator");
// Reuse the IV increment that SCEVExpander created as long as it dominates
// NarrowUse.
Instruction *WideUse = 0;
- if (WideAddRec == WideIncExpr && HoistStep(WideInc, NarrowUse, DT)) {
+ if (WideAddRec == WideIncExpr
+ && SCEVExpander::hoistStep(WideInc, DU.NarrowUse, DT))
WideUse = WideInc;
- }
else {
- WideUse = CloneIVUser(NarrowUse, NarrowDef, WideDef);
+ WideUse = CloneIVUser(DU);
if (!WideUse)
return 0;
}
@@ -1051,13 +1079,13 @@ Instruction *WidenIV::WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef,
void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
for (Value::use_iterator UI = NarrowDef->use_begin(),
UE = NarrowDef->use_end(); UI != UE; ++UI) {
- Use &U = UI.getUse();
+ Instruction *NarrowUse = cast<Instruction>(*UI);
// Handle data flow merges and bizarre phi cycles.
- if (!Widened.insert(cast<Instruction>(U.getUser())))
+ if (!Widened.insert(NarrowUse))
continue;
- NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WideDef));
+ NarrowIVUsers.push_back(NarrowIVDefUse(NarrowDef, NarrowUse, WideDef));
}
}
@@ -1124,23 +1152,19 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
pushNarrowIVUsers(OrigPhi, WidePhi);
while (!NarrowIVUsers.empty()) {
- Use *UsePtr;
- Instruction *WideDef;
- tie(UsePtr, WideDef) = NarrowIVUsers.pop_back_val();
- Use &NarrowDefUse = *UsePtr;
+ NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
// Process a def-use edge. This may replace the use, so don't hold a
// use_iterator across it.
- Instruction *NarrowDef = cast<Instruction>(NarrowDefUse.get());
- Instruction *WideUse = WidenIVUse(NarrowDefUse, NarrowDef, WideDef);
+ Instruction *WideUse = WidenIVUse(DU);
// Follow all def-use edges from the previous narrow use.
if (WideUse)
- pushNarrowIVUsers(cast<Instruction>(NarrowDefUse.getUser()), WideUse);
+ pushNarrowIVUsers(DU.NarrowUse, WideUse);
// WidenIVUse may have removed the def-use edge.
- if (NarrowDef->use_empty())
- DeadInsts.push_back(NarrowDef);
+ if (DU.NarrowDef->use_empty())
+ DeadInsts.push_back(DU.NarrowDef);
}
return WidePhi;
}
@@ -1149,187 +1173,17 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
// Simplification of IV users based on SCEV evaluation.
//===----------------------------------------------------------------------===//
-void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
- unsigned IVOperIdx = 0;
- ICmpInst::Predicate Pred = ICmp->getPredicate();
- if (IVOperand != ICmp->getOperand(0)) {
- // Swapped
- assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand");
- IVOperIdx = 1;
- Pred = ICmpInst::getSwappedPredicate(Pred);
- }
-
- // Get the SCEVs for the ICmp operands.
- const SCEV *S = SE->getSCEV(ICmp->getOperand(IVOperIdx));
- const SCEV *X = SE->getSCEV(ICmp->getOperand(1 - IVOperIdx));
-
- // Simplify unnecessary loops away.
- const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
- S = SE->getSCEVAtScope(S, ICmpLoop);
- X = SE->getSCEVAtScope(X, ICmpLoop);
-
- // If the condition is always true or always false, replace it with
- // a constant value.
- if (SE->isKnownPredicate(Pred, S, X))
- ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext()));
- else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X))
- ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext()));
- else
- return;
-
- DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
- ++NumElimCmp;
- Changed = true;
- DeadInsts.push_back(ICmp);
-}
-
-void IndVarSimplify::EliminateIVRemainder(BinaryOperator *Rem,
- Value *IVOperand,
- bool IsSigned) {
- // We're only interested in the case where we know something about
- // the numerator.
- if (IVOperand != Rem->getOperand(0))
- return;
-
- // Get the SCEVs for the ICmp operands.
- const SCEV *S = SE->getSCEV(Rem->getOperand(0));
- const SCEV *X = SE->getSCEV(Rem->getOperand(1));
-
- // Simplify unnecessary loops away.
- const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent());
- S = SE->getSCEVAtScope(S, ICmpLoop);
- X = SE->getSCEVAtScope(X, ICmpLoop);
-
- // i % n --> i if i is in [0,n).
- if ((!IsSigned || SE->isKnownNonNegative(S)) &&
- SE->isKnownPredicate(IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
- S, X))
- Rem->replaceAllUsesWith(Rem->getOperand(0));
- else {
- // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n).
- const SCEV *LessOne =
- SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1));
- if (IsSigned && !SE->isKnownNonNegative(LessOne))
- return;
-
- if (!SE->isKnownPredicate(IsSigned ?
- ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
- LessOne, X))
- return;
-
- ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ,
- Rem->getOperand(0), Rem->getOperand(1),
- "tmp");
- SelectInst *Sel =
- SelectInst::Create(ICmp,
- ConstantInt::get(Rem->getType(), 0),
- Rem->getOperand(0), "tmp", Rem);
- Rem->replaceAllUsesWith(Sel);
- }
-
- // Inform IVUsers about the new users.
- if (IU) {
- if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0)))
- IU->AddUsersIfInteresting(I);
- }
- DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
- ++NumElimRem;
- Changed = true;
- DeadInsts.push_back(Rem);
-}
-
-/// EliminateIVUser - Eliminate an operation that consumes a simple IV and has
-/// no observable side-effect given the range of IV values.
-bool IndVarSimplify::EliminateIVUser(Instruction *UseInst,
- Instruction *IVOperand) {
- if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
- EliminateIVComparison(ICmp, IVOperand);
- return true;
- }
- if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
- bool IsSigned = Rem->getOpcode() == Instruction::SRem;
- if (IsSigned || Rem->getOpcode() == Instruction::URem) {
- EliminateIVRemainder(Rem, IVOperand, IsSigned);
- return true;
- }
- }
-
- // Eliminate any operation that SCEV can prove is an identity function.
- if (!SE->isSCEVable(UseInst->getType()) ||
- (UseInst->getType() != IVOperand->getType()) ||
- (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand)))
- return false;
-
- DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');
-
- UseInst->replaceAllUsesWith(IVOperand);
- ++NumElimIdentity;
- Changed = true;
- DeadInsts.push_back(UseInst);
- return true;
-}
-
-/// pushIVUsers - Add all uses of Def to the current IV's worklist.
-///
-static void pushIVUsers(
- Instruction *Def,
- SmallPtrSet<Instruction*,16> &Simplified,
- SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) {
-
- for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end();
- UI != E; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
-
- // Avoid infinite or exponential worklist processing.
- // Also ensure unique worklist users.
- // If Def is a LoopPhi, it may not be in the Simplified set, so check for
- // self edges first.
- if (User != Def && Simplified.insert(User))
- SimpleIVUsers.push_back(std::make_pair(User, Def));
- }
-}
-
-/// isSimpleIVUser - Return true if this instruction generates a simple SCEV
-/// expression in terms of that IV.
-///
-/// This is similar to IVUsers' isInsteresting() but processes each instruction
-/// non-recursively when the operand is already known to be a simpleIVUser.
-///
-static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) {
- if (!SE->isSCEVable(I->getType()))
- return false;
-
- // Get the symbolic expression for this instruction.
- const SCEV *S = SE->getSCEV(I);
-
- // We assume that terminators are not SCEVable.
- assert((!S || I != I->getParent()->getTerminator()) &&
- "can't fold terminators");
-
- // Only consider affine recurrences.
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
- if (AR && AR->getLoop() == L)
- return true;
-
- return false;
-}
-/// SimplifyIVUsersNoRewrite - Iteratively perform simplification on a worklist
-/// of IV users. Each successive simplification may push more users which may
+/// SimplifyAndExtend - Iteratively perform simplification on a worklist of IV
+/// users. Each successive simplification may push more users which may
/// themselves be candidates for simplification.
///
-/// The "NoRewrite" algorithm does not require IVUsers analysis. Instead, it
-/// simplifies instructions in-place during analysis. Rather than rewriting
-/// induction variables bottom-up from their users, it transforms a chain of
-/// IVUsers top-down, updating the IR only when it encouters a clear
-/// optimization opportunitiy. A SCEVExpander "Rewriter" instance is still
-/// needed, but only used to generate a new IV (phi) of wider type for sign/zero
-/// extend elimination.
+/// Sign/Zero extend elimination is interleaved with IV simplification.
///
-/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
-///
-void IndVarSimplify::SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter) {
- std::map<PHINode *, WideIVInfo> WideIVMap;
+void IndVarSimplify::SimplifyAndExtend(Loop *L,
+ SCEVExpander &Rewriter,
+ LPPassManager &LPM) {
+ SmallVector<WideIVInfo, 8> WideIVs;
SmallVector<PHINode*, 8> LoopPhis;
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
@@ -1345,108 +1199,81 @@ void IndVarSimplify::SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter) {
// extension. The first time SCEV attempts to normalize sign/zero extension,
// the result becomes final. So for the most predictable results, we delay
// evaluation of sign/zero extend evaluation until needed, and avoid running
- // other SCEV based analysis prior to SimplifyIVUsersNoRewrite.
+ // other SCEV based analysis prior to SimplifyAndExtend.
do {
PHINode *CurrIV = LoopPhis.pop_back_val();
// Information about sign/zero extensions of CurrIV.
- WideIVInfo WI;
-
- // Instructions processed by SimplifyIVUsers for CurrIV.
- SmallPtrSet<Instruction*,16> Simplified;
-
- // Use-def pairs if IV users waiting to be processed for CurrIV.
- SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers;
-
- // Push users of the current LoopPhi. In rare cases, pushIVUsers may be
- // called multiple times for the same LoopPhi. This is the proper thing to
- // do for loop header phis that use each other.
- pushIVUsers(CurrIV, Simplified, SimpleIVUsers);
+ WideIVVisitor WIV(CurrIV, SE, TD);
- while (!SimpleIVUsers.empty()) {
- Instruction *UseInst, *Operand;
- tie(UseInst, Operand) = SimpleIVUsers.pop_back_val();
- // Bypass back edges to avoid extra work.
- if (UseInst == CurrIV) continue;
+ Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &WIV);
- if (EliminateIVUser(UseInst, Operand)) {
- pushIVUsers(Operand, Simplified, SimpleIVUsers);
- continue;
- }
- if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) {
- bool IsSigned = Cast->getOpcode() == Instruction::SExt;
- if (IsSigned || Cast->getOpcode() == Instruction::ZExt) {
- CollectExtend(Cast, IsSigned, WI, SE, TD);
- }
- continue;
- }
- if (isSimpleIVUser(UseInst, L, SE)) {
- pushIVUsers(UseInst, Simplified, SimpleIVUsers);
- }
- }
- if (WI.WidestNativeType) {
- WideIVMap[CurrIV] = WI;
+ if (WIV.WI.WidestNativeType) {
+ WideIVs.push_back(WIV.WI);
}
} while(!LoopPhis.empty());
- for (std::map<PHINode *, WideIVInfo>::const_iterator I = WideIVMap.begin(),
- E = WideIVMap.end(); I != E; ++I) {
- WidenIV Widener(I->first, I->second, LI, SE, DT, DeadInsts);
+ for (; !WideIVs.empty(); WideIVs.pop_back()) {
+ WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts);
if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) {
Changed = true;
LoopPhis.push_back(WidePhi);
}
}
- WideIVMap.clear();
}
}
-/// SimplifyCongruentIVs - Check for congruent phis in this loop header and
-/// populate ExprToIVMap for use later.
-///
-void IndVarSimplify::SimplifyCongruentIVs(Loop *L) {
- DenseMap<const SCEV *, PHINode *> ExprToIVMap;
- for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
- PHINode *Phi = cast<PHINode>(I);
- if (!SE->isSCEVable(Phi->getType()))
- continue;
+//===----------------------------------------------------------------------===//
+// LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.
+//===----------------------------------------------------------------------===//
- const SCEV *S = SE->getSCEV(Phi);
- DenseMap<const SCEV *, PHINode *>::const_iterator Pos;
- bool Inserted;
- tie(Pos, Inserted) = ExprToIVMap.insert(std::make_pair(S, Phi));
- if (Inserted)
- continue;
- PHINode *OrigPhi = Pos->second;
- // Replacing the congruent phi is sufficient because acyclic redundancy
- // elimination, CSE/GVN, should handle the rest. However, once SCEV proves
- // that a phi is congruent, it's almost certain to be the head of an IV
- // user cycle that is isomorphic with the original phi. So it's worth
- // eagerly cleaning up the common case of a single IV increment.
- if (BasicBlock *LatchBlock = L->getLoopLatch()) {
- Instruction *OrigInc =
- cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
- Instruction *IsomorphicInc =
- cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
- if (OrigInc != IsomorphicInc &&
- SE->getSCEV(OrigInc) == SE->getSCEV(IsomorphicInc) &&
- HoistStep(OrigInc, IsomorphicInc, DT)) {
- DEBUG(dbgs() << "INDVARS: Eliminated congruent iv.inc: "
- << *IsomorphicInc << '\n');
- IsomorphicInc->replaceAllUsesWith(OrigInc);
- DeadInsts.push_back(IsomorphicInc);
- }
+/// Check for expressions that ScalarEvolution generates to compute
+/// BackedgeTakenInfo. If these expressions have not been reduced, then
+/// expanding them may incur additional cost (albeit in the loop preheader).
+static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
+ ScalarEvolution *SE) {
+ // If the backedge-taken count is a UDiv, it's very likely a UDiv that
+ // ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a
+ // precise expression, rather than a UDiv from the user's code. If we can't
+ // find a UDiv in the code with some simple searching, assume the former and
+ // forego rewriting the loop.
+ if (isa<SCEVUDivExpr>(S)) {
+ ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!OrigCond) return true;
+ const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
+ R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
+ if (R != S) {
+ const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
+ L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
+ if (L != S)
+ return true;
}
- DEBUG(dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi << '\n');
- ++NumElimIV;
- Phi->replaceAllUsesWith(OrigPhi);
- DeadInsts.push_back(Phi);
}
-}
-//===----------------------------------------------------------------------===//
-// LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.
-//===----------------------------------------------------------------------===//
+ if (EnableIVRewrite)
+ return false;
+
+ // Recurse past add expressions, which commonly occur in the
+ // BackedgeTakenCount. They may already exist in program code, and if not,
+ // they are not too expensive rematerialize.
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+ I != E; ++I) {
+ if (isHighCostExpansion(*I, BI, SE))
+ return true;
+ }
+ return false;
+ }
+
+ // HowManyLessThans uses a Max expression whenever the loop is not guarded by
+ // the exit condition.
+ if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
+ return true;
+
+ // If we haven't recognized an expensive SCEV patter, assume its an expression
+ // produced by program code.
+ return false;
+}
/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
/// count expression can be safely and cheaply expanded into an instruction
@@ -1465,31 +1292,17 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
if (!BI)
return false;
- // Special case: If the backedge-taken count is a UDiv, it's very likely a
- // UDiv that ScalarEvolution produced in order to compute a precise
- // expression, rather than a UDiv from the user's code. If we can't find a
- // UDiv in the code with some simple searching, assume the former and forego
- // rewriting the loop.
- if (isa<SCEVUDivExpr>(BackedgeTakenCount)) {
- ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
- if (!OrigCond) return false;
- const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
- R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
- if (R != BackedgeTakenCount) {
- const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
- L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
- if (L != BackedgeTakenCount)
- return false;
- }
- }
+ if (isHighCostExpansion(BackedgeTakenCount, BI, SE))
+ return false;
+
return true;
}
/// getBackedgeIVType - Get the widest type used by the loop test after peeking
/// through Truncs.
///
-/// TODO: Unnecessary if LFTR does not force a canonical IV.
-static const Type *getBackedgeIVType(Loop *L) {
+/// TODO: Unnecessary when ForceLFTR is removed.
+static Type *getBackedgeIVType(Loop *L) {
if (!L->getExitingBlock())
return 0;
@@ -1502,7 +1315,7 @@ static const Type *getBackedgeIVType(Loop *L) {
if (!Cond)
return 0;
- const Type *Ty = 0;
+ Type *Ty = 0;
for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end();
OI != OE; ++OI) {
assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types");
@@ -1515,12 +1328,187 @@ static const Type *getBackedgeIVType(Loop *L) {
return Ty;
}
+/// getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop
+/// invariant value to the phi.
+static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
+ Instruction *IncI = dyn_cast<Instruction>(IncV);
+ if (!IncI)
+ return 0;
+
+ switch (IncI->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ break;
+ case Instruction::GetElementPtr:
+ // An IV counter must preserve its type.
+ if (IncI->getNumOperands() == 2)
+ break;
+ default:
+ return 0;
+ }
+
+ PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
+ if (Phi && Phi->getParent() == L->getHeader()) {
+ if (isLoopInvariant(IncI->getOperand(1), L, DT))
+ return Phi;
+ return 0;
+ }
+ if (IncI->getOpcode() == Instruction::GetElementPtr)
+ return 0;
+
+ // Allow add/sub to be commuted.
+ Phi = dyn_cast<PHINode>(IncI->getOperand(1));
+ if (Phi && Phi->getParent() == L->getHeader()) {
+ if (isLoopInvariant(IncI->getOperand(0), L, DT))
+ return Phi;
+ }
+ return 0;
+}
+
+/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show
+/// that the current exit test is already sufficiently canonical.
+static bool needsLFTR(Loop *L, DominatorTree *DT) {
+ assert(L->getExitingBlock() && "expected loop exit");
+
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ // Don't bother with LFTR if the loop is not properly simplified.
+ if (!LatchBlock)
+ return false;
+
+ BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
+ assert(BI && "expected exit branch");
+
+ // Do LFTR to simplify the exit condition to an ICMP.
+ ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!Cond)
+ return true;
+
+ // Do LFTR to simplify the exit ICMP to EQ/NE
+ ICmpInst::Predicate Pred = Cond->getPredicate();
+ if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
+ return true;
+
+ // Look for a loop invariant RHS
+ Value *LHS = Cond->getOperand(0);
+ Value *RHS = Cond->getOperand(1);
+ if (!isLoopInvariant(RHS, L, DT)) {
+ if (!isLoopInvariant(LHS, L, DT))
+ return true;
+ std::swap(LHS, RHS);
+ }
+ // Look for a simple IV counter LHS
+ PHINode *Phi = dyn_cast<PHINode>(LHS);
+ if (!Phi)
+ Phi = getLoopPhiForCounter(LHS, L, DT);
+
+ if (!Phi)
+ return true;
+
+ // Do LFTR if the exit condition's IV is *not* a simple counter.
+ Value *IncV = Phi->getIncomingValueForBlock(L->getLoopLatch());
+ return Phi != getLoopPhiForCounter(IncV, L, DT);
+}
+
+/// AlmostDeadIV - Return true if this IV has any uses other than the (soon to
+/// be rewritten) loop exit test.
+static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
+ int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
+ Value *IncV = Phi->getIncomingValue(LatchIdx);
+
+ for (Value::use_iterator UI = Phi->use_begin(), UE = Phi->use_end();
+ UI != UE; ++UI) {
+ if (*UI != Cond && *UI != IncV) return false;
+ }
+
+ for (Value::use_iterator UI = IncV->use_begin(), UE = IncV->use_end();
+ UI != UE; ++UI) {
+ if (*UI != Cond && *UI != Phi) return false;
+ }
+ return true;
+}
+
+/// FindLoopCounter - Find an affine IV in canonical form.
+///
+/// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
+///
+/// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
+/// This is difficult in general for SCEV because of potential overflow. But we
+/// could at least handle constant BECounts.
+static PHINode *
+FindLoopCounter(Loop *L, const SCEV *BECount,
+ ScalarEvolution *SE, DominatorTree *DT, const TargetData *TD) {
+ // I'm not sure how BECount could be a pointer type, but we definitely don't
+ // want to LFTR that.
+ if (BECount->getType()->isPointerTy())
+ return 0;
+
+ uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
+
+ Value *Cond =
+ cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
+
+ // Loop over all of the PHI nodes, looking for a simple counter.
+ PHINode *BestPhi = 0;
+ const SCEV *BestInit = 0;
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ assert(LatchBlock && "needsLFTR should guarantee a loop latch");
+
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+ PHINode *Phi = cast<PHINode>(I);
+ if (!SE->isSCEVable(Phi->getType()))
+ continue;
+
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
+ if (!AR || AR->getLoop() != L || !AR->isAffine())
+ continue;
+
+ // AR may be a pointer type, while BECount is an integer type.
+ // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
+ // AR may not be a narrower type, or we may never exit.
+ uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
+ if (PhiWidth < BCWidth || (TD && !TD->isLegalInteger(PhiWidth)))
+ continue;
+
+ const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
+ if (!Step || !Step->isOne())
+ continue;
+
+ int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
+ Value *IncV = Phi->getIncomingValue(LatchIdx);
+ if (getLoopPhiForCounter(IncV, L, DT) != Phi)
+ continue;
+
+ const SCEV *Init = AR->getStart();
+
+ if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
+ // Don't force a live loop counter if another IV can be used.
+ if (AlmostDeadIV(Phi, LatchBlock, Cond))
+ continue;
+
+ // Prefer to count-from-zero. This is a more "canonical" counter form. It
+ // also prefers integer to pointer IVs.
+ if (BestInit->isZero() != Init->isZero()) {
+ if (BestInit->isZero())
+ continue;
+ }
+ // If two IVs both count from zero or both count from nonzero then the
+ // narrower is likely a dead phi that has been widened. Use the wider phi
+ // to allow the other to be eliminated.
+ if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
+ continue;
+ }
+ BestPhi = Phi;
+ BestInit = Init;
+ }
+ return BestPhi;
+}
+
/// LinearFunctionTestReplace - This method rewrites the exit condition of the
/// loop to be a canonical != comparison against the incremented loop induction
/// variable. This pass is able to rewrite the exit tests of any loop where the
/// SCEV analysis can determine a loop-invariant trip count of the loop, which
/// is actually a much broader range than just linear tests.
-ICmpInst *IndVarSimplify::
+Value *IndVarSimplify::
LinearFunctionTestReplace(Loop *L,
const SCEV *BackedgeTakenCount,
PHINode *IndVar,
@@ -1528,62 +1516,117 @@ LinearFunctionTestReplace(Loop *L,
assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+ // LFTR can ignore IV overflow and truncate to the width of
+ // BECount. This avoids materializing the add(zext(add)) expression.
+ Type *CntTy = !EnableIVRewrite ?
+ BackedgeTakenCount->getType() : IndVar->getType();
+
+ const SCEV *IVLimit = BackedgeTakenCount;
+
// If the exiting block is not the same as the backedge block, we must compare
// against the preincremented value, otherwise we prefer to compare against
// the post-incremented value.
Value *CmpIndVar;
- const SCEV *RHS = BackedgeTakenCount;
if (L->getExitingBlock() == L->getLoopLatch()) {
// Add one to the "backedge-taken" count to get the trip count.
// If this addition may overflow, we have to be more pessimistic and
// cast the induction variable before doing the add.
- const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0);
const SCEV *N =
- SE->getAddExpr(BackedgeTakenCount,
- SE->getConstant(BackedgeTakenCount->getType(), 1));
- if ((isa<SCEVConstant>(N) && !N->isZero()) ||
- SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
- // No overflow. Cast the sum.
- RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType());
- } else {
- // Potential overflow. Cast before doing the add.
- RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
- IndVar->getType());
- RHS = SE->getAddExpr(RHS,
- SE->getConstant(IndVar->getType(), 1));
+ SE->getAddExpr(IVLimit, SE->getConstant(IVLimit->getType(), 1));
+ if (CntTy == IVLimit->getType())
+ IVLimit = N;
+ else {
+ const SCEV *Zero = SE->getConstant(IVLimit->getType(), 0);
+ if ((isa<SCEVConstant>(N) && !N->isZero()) ||
+ SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
+ // No overflow. Cast the sum.
+ IVLimit = SE->getTruncateOrZeroExtend(N, CntTy);
+ } else {
+ // Potential overflow. Cast before doing the add.
+ IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy);
+ IVLimit = SE->getAddExpr(IVLimit, SE->getConstant(CntTy, 1));
+ }
}
-
// The BackedgeTaken expression contains the number of times that the
// backedge branches to the loop header. This is one less than the
// number of times the loop executes, so use the incremented indvar.
CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
} else {
// We have to use the preincremented value...
- RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
- IndVar->getType());
+ IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy);
CmpIndVar = IndVar;
}
+ // For unit stride, IVLimit = Start + BECount with 2's complement overflow.
+ // So for, non-zero start compute the IVLimit here.
+ bool isPtrIV = false;
+ Type *CmpTy = CntTy;
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
+ assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
+ if (!AR->getStart()->isZero()) {
+ assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
+ const SCEV *IVInit = AR->getStart();
+
+ // For pointer types, sign extend BECount in order to materialize a GEP.
+ // Note that for without EnableIVRewrite, we never run SCEVExpander on a
+ // pointer type, because we must preserve the existing GEPs. Instead we
+ // directly generate a GEP later.
+ if (IVInit->getType()->isPointerTy()) {
+ isPtrIV = true;
+ CmpTy = SE->getEffectiveSCEVType(IVInit->getType());
+ IVLimit = SE->getTruncateOrSignExtend(IVLimit, CmpTy);
+ }
+ // For integer types, truncate the IV before computing IVInit + BECount.
+ else {
+ if (SE->getTypeSizeInBits(IVInit->getType())
+ > SE->getTypeSizeInBits(CmpTy))
+ IVInit = SE->getTruncateExpr(IVInit, CmpTy);
+
+ IVLimit = SE->getAddExpr(IVInit, IVLimit);
+ }
+ }
// Expand the code for the iteration count.
- assert(SE->isLoopInvariant(RHS, L) &&
+ IRBuilder<> Builder(BI);
+
+ assert(SE->isLoopInvariant(IVLimit, L) &&
"Computed iteration count is not loop invariant!");
- Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI);
+ Value *ExitCnt = Rewriter.expandCodeFor(IVLimit, CmpTy, BI);
+
+ // Create a gep for IVInit + IVLimit from on an existing pointer base.
+ assert(isPtrIV == IndVar->getType()->isPointerTy() &&
+ "IndVar type must match IVInit type");
+ if (isPtrIV) {
+ Value *IVStart = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
+ assert(AR->getStart() == SE->getSCEV(IVStart) && "bad loop counter");
+ assert(SE->getSizeOfExpr(
+ cast<PointerType>(IVStart->getType())->getElementType())->isOne()
+ && "unit stride pointer IV must be i8*");
+
+ Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
+ ExitCnt = Builder.CreateGEP(IVStart, ExitCnt, "lftr.limit");
+ Builder.SetInsertPoint(BI);
+ }
// Insert a new icmp_ne or icmp_eq instruction before the branch.
- ICmpInst::Predicate Opcode;
+ ICmpInst::Predicate P;
if (L->contains(BI->getSuccessor(0)))
- Opcode = ICmpInst::ICMP_NE;
+ P = ICmpInst::ICMP_NE;
else
- Opcode = ICmpInst::ICMP_EQ;
+ P = ICmpInst::ICMP_EQ;
DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
<< " LHS:" << *CmpIndVar << '\n'
<< " op:\t"
- << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
- << " RHS:\t" << *RHS << "\n");
+ << (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
+ << " RHS:\t" << *ExitCnt << "\n"
+ << " Expr:\t" << *IVLimit << "\n");
+
+ if (SE->getTypeSizeInBits(CmpIndVar->getType())
+ > SE->getTypeSizeInBits(CmpTy)) {
+ CmpIndVar = Builder.CreateTrunc(CmpIndVar, CmpTy, "lftr.wideiv");
+ }
- ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond");
- Cond->setDebugLoc(BI->getDebugLoc());
+ Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
Value *OrigCond = BI->getCondition();
// It's tempting to use replaceAllUsesWith here to fully replace the old
// comparison, but that's not immediately safe, since users of the old
@@ -1612,7 +1655,7 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) return;
- Instruction *InsertPt = ExitBlock->getFirstNonPHI();
+ Instruction *InsertPt = ExitBlock->getFirstInsertionPt();
BasicBlock::iterator I = Preheader->getTerminator();
while (I != Preheader->begin()) {
--I;
@@ -1633,6 +1676,10 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
if (isa<DbgInfoIntrinsic>(I))
continue;
+ // Skip landingpad instructions.
+ if (isa<LandingPadInst>(I))
+ continue;
+
// Don't sink static AllocaInsts out of the entry block, which would
// turn them into dynamic allocas!
if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
@@ -1699,7 +1746,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
if (!L->isLoopSimplifyForm())
return false;
- if (!DisableIVRewrite)
+ if (EnableIVRewrite)
IU = &getAnalysis<IVUsers>();
LI = &getAnalysis<LoopInfo>();
SE = &getAnalysis<ScalarEvolution>();
@@ -1717,6 +1764,9 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// Create a rewriter object which we'll use to transform the code with.
SCEVExpander Rewriter(*SE, "indvars");
+#ifndef NDEBUG
+ Rewriter.setDebugType(DEBUG_TYPE);
+#endif
// Eliminate redundant IV users.
//
@@ -1724,9 +1774,9 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// attempt to avoid evaluating SCEVs for sign/zero extend operations until
// other expressions involving loop IVs have been evaluated. This helps SCEV
// set no-wrap flags before normalizing sign/zero extension.
- if (DisableIVRewrite) {
+ if (!EnableIVRewrite) {
Rewriter.disableCanonicalMode();
- SimplifyIVUsersNoRewrite(L, Rewriter);
+ SimplifyAndExtend(L, Rewriter, LPM);
}
// Check to see if this loop has a computable loop-invariant execution count.
@@ -1739,25 +1789,25 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
RewriteLoopExitValues(L, Rewriter);
// Eliminate redundant IV users.
- if (!DisableIVRewrite)
- SimplifyIVUsers(Rewriter);
+ if (EnableIVRewrite)
+ Changed |= simplifyIVUsers(IU, SE, &LPM, DeadInsts);
// Eliminate redundant IV cycles.
- if (DisableIVRewrite)
- SimplifyCongruentIVs(L);
+ if (!EnableIVRewrite)
+ NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
// Compute the type of the largest recurrence expression, and decide whether
// a canonical induction variable should be inserted.
- const Type *LargestType = 0;
+ Type *LargestType = 0;
bool NeedCannIV = false;
bool ExpandBECount = canExpandBackedgeTakenCount(L, SE);
- if (ExpandBECount) {
+ if (EnableIVRewrite && ExpandBECount) {
// If we have a known trip count and a single exit block, we'll be
// rewriting the loop exit test condition below, which requires a
// canonical induction variable.
NeedCannIV = true;
- const Type *Ty = BackedgeTakenCount->getType();
- if (DisableIVRewrite) {
+ Type *Ty = BackedgeTakenCount->getType();
+ if (!EnableIVRewrite) {
// In this mode, SimplifyIVUsers may have already widened the IV used by
// the backedge test and inserted a Trunc on the compare's operand. Get
// the wider type to avoid creating a redundant narrow IV only used by the
@@ -1769,10 +1819,10 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
SE->getTypeSizeInBits(LargestType))
LargestType = SE->getEffectiveSCEVType(Ty);
}
- if (!DisableIVRewrite) {
+ if (EnableIVRewrite) {
for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
NeedCannIV = true;
- const Type *Ty =
+ Type *Ty =
SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType());
if (!LargestType ||
SE->getTypeSizeInBits(Ty) >
@@ -1811,18 +1861,16 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// the end of the pass.
while (!OldCannIVs.empty()) {
PHINode *OldCannIV = OldCannIVs.pop_back_val();
- OldCannIV->insertBefore(L->getHeader()->getFirstNonPHI());
+ OldCannIV->insertBefore(L->getHeader()->getFirstInsertionPt());
}
}
-
+ else if (!EnableIVRewrite && ExpandBECount && needsLFTR(L, DT)) {
+ IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD);
+ }
// If we have a trip count expression, rewrite the loop's exit condition
// using it. We can currently only handle loops with a single exit.
- ICmpInst *NewICmp = 0;
- if (ExpandBECount) {
- assert(canExpandBackedgeTakenCount(L, SE) &&
- "canonical IV disrupted BackedgeTaken expansion");
- assert(NeedCannIV &&
- "LinearFunctionTestReplace requires a canonical induction variable");
+ Value *NewICmp = 0;
+ if (ExpandBECount && IndVar) {
// Check preconditions for proper SCEVExpander operation. SCEV does not
// express SCEVExpander's dependencies, such as LoopSimplify. Instead any
// pass that uses the SCEVExpander must do it. This does not work well for
@@ -1837,7 +1885,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, Rewriter);
}
// Rewrite IV-derived expressions.
- if (!DisableIVRewrite)
+ if (EnableIVRewrite)
RewriteIVExpressions(L, Rewriter);
// Clear the rewriter cache, because values that are in the rewriter's cache
@@ -1860,12 +1908,34 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// For completeness, inform IVUsers of the IV use in the newly-created
// loop exit test instruction.
- if (NewICmp && IU)
- IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0)));
-
+ if (IU && NewICmp) {
+ ICmpInst *NewICmpInst = dyn_cast<ICmpInst>(NewICmp);
+ if (NewICmpInst)
+ IU->AddUsersIfInteresting(cast<Instruction>(NewICmpInst->getOperand(0)));
+ }
// Clean up dead instructions.
Changed |= DeleteDeadPHIs(L->getHeader());
// Check a post-condition.
- assert(L->isLCSSAForm(*DT) && "Indvars did not leave the loop in lcssa form!");
+ assert(L->isLCSSAForm(*DT) &&
+ "Indvars did not leave the loop in lcssa form!");
+
+ // Verify that LFTR, and any other change have not interfered with SCEV's
+ // ability to compute trip count.
+#ifndef NDEBUG
+ if (!EnableIVRewrite && VerifyIndvars &&
+ !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
+ SE->forgetLoop(L);
+ const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
+ if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
+ SE->getTypeSizeInBits(NewBECount->getType()))
+ NewBECount = SE->getTruncateOrNoop(NewBECount,
+ BackedgeTakenCount->getType());
+ else
+ BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
+ NewBECount->getType());
+ assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
+ }
+#endif
+
return Changed;
}
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index b500d5b..f410af3 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -811,8 +811,8 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
/// important optimization that encourages jump threading, and needs to be run
/// interlaced with other jump threading tasks.
bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
- // Don't hack volatile loads.
- if (LI->isVolatile()) return false;
+ // Don't hack volatile/atomic loads.
+ if (!LI->isSimple()) return false;
// If the load is defined in a block with exactly one predecessor, it can't be
// partially redundant.
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 66add6c..b79bb13 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -151,6 +151,11 @@ namespace {
///
bool isSafeToExecuteUnconditionally(Instruction &I);
+ /// isGuaranteedToExecute - Check that the instruction is guaranteed to
+ /// execute.
+ ///
+ bool isGuaranteedToExecute(Instruction &I);
+
/// pointerInvalidatedByLoop - Return true if the body of this loop may
/// store into the memory location pointed to by V.
///
@@ -357,8 +362,8 @@ void LICM::HoistRegion(DomTreeNode *N) {
bool LICM::canSinkOrHoistInst(Instruction &I) {
// Loads have extra constraints we have to verify before we can hoist them.
if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
- if (LI->isVolatile())
- return false; // Don't hoist volatile loads!
+ if (!LI->isUnordered())
+ return false; // Don't hoist volatile/atomic loads!
// Loads from constant memory are always safe to move, even if they end up
// in the same alias set as something that ends up being modified.
@@ -461,7 +466,7 @@ void LICM::sink(Instruction &I) {
} else {
// Move the instruction to the start of the exit block, after any PHI
// nodes in it.
- I.moveBefore(ExitBlocks[0]->getFirstNonPHI());
+ I.moveBefore(ExitBlocks[0]->getFirstInsertionPt());
// This instruction is no longer in the AST for the current loop, because
// we just sunk it out of the loop. If we just sunk it into an outer
@@ -504,7 +509,7 @@ void LICM::sink(Instruction &I) {
continue;
// Insert the code after the last PHI node.
- BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI();
+ BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
// If this is the first exit block processed, just move the original
// instruction, otherwise clone the original instruction and insert
@@ -577,6 +582,10 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
if (Inst.isSafeToSpeculativelyExecute())
return true;
+ return isGuaranteedToExecute(Inst);
+}
+
+bool LICM::isGuaranteedToExecute(Instruction &Inst) {
// Otherwise we have to check to make sure that the instruction dominates all
// of the exit blocks. If it doesn't, then there is a path out of the loop
// which does not execute this instruction, so we can't hoist it.
@@ -635,7 +644,7 @@ namespace {
for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
BasicBlock *ExitBlock = LoopExitBlocks[i];
Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
- Instruction *InsertPos = ExitBlock->getFirstNonPHI();
+ Instruction *InsertPos = ExitBlock->getFirstInsertionPt();
StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos);
NewSI->setAlignment(Alignment);
NewSI->setDebugLoc(DL);
@@ -713,34 +722,41 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
// If there is an non-load/store instruction in the loop, we can't promote
// it.
- unsigned InstAlignment;
if (LoadInst *load = dyn_cast<LoadInst>(Use)) {
- assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken");
- InstAlignment = load->getAlignment();
+ assert(!load->isVolatile() && "AST broken");
+ if (!load->isSimple())
+ return;
} else if (StoreInst *store = dyn_cast<StoreInst>(Use)) {
// Stores *of* the pointer are not interesting, only stores *to* the
// pointer.
if (Use->getOperand(1) != ASIV)
continue;
- InstAlignment = store->getAlignment();
- assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken");
+ assert(!store->isVolatile() && "AST broken");
+ if (!store->isSimple())
+ return;
+
+ // Note that we only check GuaranteedToExecute inside the store case
+ // so that we do not introduce stores where they did not exist before
+ // (which would break the LLVM concurrency model).
+
+ // If the alignment of this instruction allows us to specify a more
+ // restrictive (and performant) alignment and if we are sure this
+ // instruction will be executed, update the alignment.
+ // Larger is better, with the exception of 0 being the best alignment.
+ unsigned InstAlignment = store->getAlignment();
+ if ((InstAlignment > Alignment || InstAlignment == 0)
+ && (Alignment != 0))
+ if (isGuaranteedToExecute(*Use)) {
+ GuaranteedToExecute = true;
+ Alignment = InstAlignment;
+ }
+
+ if (!GuaranteedToExecute)
+ GuaranteedToExecute = isGuaranteedToExecute(*Use);
+
} else
return; // Not a load or store.
- // If the alignment of this instruction allows us to specify a more
- // restrictive (and performant) alignment and if we are sure this
- // instruction will be executed, update the alignment.
- // Larger is better, with the exception of 0 being the best alignment.
- if ((InstAlignment > Alignment || InstAlignment == 0)
- && (Alignment != 0))
- if (isSafeToExecuteUnconditionally(*Use)) {
- GuaranteedToExecute = true;
- Alignment = InstAlignment;
- }
-
- if (!GuaranteedToExecute)
- GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use);
-
LoopUses.push_back(Use);
}
}
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index a0e41d9..ad15cbb 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -267,7 +267,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
/// processLoopStore - See if this store can be promoted to a memset or memcpy.
bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
- if (SI->isVolatile()) return false;
+ if (!SI->isSimple()) return false;
Value *StoredVal = SI->getValueOperand();
Value *StorePtr = SI->getPointerOperand();
@@ -314,7 +314,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
const SCEVAddRecExpr *LoadEv =
dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0)));
if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() &&
- StoreEv->getOperand(1) == LoadEv->getOperand(1) && !LI->isVolatile())
+ StoreEv->getOperand(1) == LoadEv->getOperand(1) && LI->isSimple())
if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount))
return true;
}
@@ -463,7 +463,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
SplatValue = 0;
} else {
// Otherwise, this isn't an idiom we can transform. For example, we can't
- // do anything with a 3-byte store, for example.
+ // do anything with a 3-byte store.
return false;
}
@@ -498,7 +498,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
- const Type *IntPtr = TD->getIntPtrType(DestPtr->getContext());
+ Type *IntPtr = TD->getIntPtrType(DestPtr->getContext());
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
@@ -604,7 +604,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
- const Type *IntPtr = TD->getIntPtrType(SI->getContext());
+ Type *IntPtr = TD->getIntPtrType(SI->getContext());
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 509d026..3e122c2 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -70,12 +70,27 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
#include <algorithm>
using namespace llvm;
+namespace llvm {
+cl::opt<bool> EnableNested(
+ "enable-lsr-nested", cl::Hidden, cl::desc("Enable LSR on nested loops"));
+
+cl::opt<bool> EnableRetry(
+ "enable-lsr-retry", cl::Hidden, cl::desc("Enable LSR retry"));
+
+// Temporary flag to cleanup congruent phis after LSR phi expansion.
+// It's currently disabled until we can determine whether it's truly useful or
+// not. The flag should be removed after the v3.0 release.
+cl::opt<bool> EnablePhiElim(
+ "enable-lsr-phielim", cl::Hidden, cl::desc("Enable LSR phi elimination"));
+}
+
namespace {
/// RegSortData - This class holds data which is used to order reuse candidates.
@@ -219,7 +234,7 @@ struct Formula {
void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
unsigned getNumRegs() const;
- const Type *getType() const;
+ Type *getType() const;
void DeleteBaseReg(const SCEV *&S);
@@ -319,7 +334,7 @@ unsigned Formula::getNumRegs() const {
/// getType - Return the type of this formula, if it has one, or null
/// otherwise. This type is meaningless except for the bit size.
-const Type *Formula::getType() const {
+Type *Formula::getType() const {
return !BaseRegs.empty() ? BaseRegs.front()->getType() :
ScaledReg ? ScaledReg->getType() :
AM.BaseGV ? AM.BaseGV->getType() :
@@ -397,7 +412,7 @@ void Formula::dump() const {
/// isAddRecSExtable - Return true if the given addrec can be sign-extended
/// without changing its value.
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
- const Type *WideTy =
+ Type *WideTy =
IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
}
@@ -405,7 +420,7 @@ static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
/// isAddSExtable - Return true if the given add can be sign-extended
/// without changing its value.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
- const Type *WideTy =
+ Type *WideTy =
IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
}
@@ -413,7 +428,7 @@ static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
/// isMulSExtable - Return true if the given mul can be sign-extended
/// without changing its value.
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
- const Type *WideTy =
+ Type *WideTy =
IntegerType::get(SE.getContext(),
SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());
return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
@@ -594,8 +609,8 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
}
/// getAccessType - Return the type of the memory being accessed.
-static const Type *getAccessType(const Instruction *Inst) {
- const Type *AccessTy = Inst->getType();
+static Type *getAccessType(const Instruction *Inst) {
+ Type *AccessTy = Inst->getType();
if (const StoreInst *SI = dyn_cast<StoreInst>(Inst))
AccessTy = SI->getOperand(0)->getType();
else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
@@ -614,7 +629,7 @@ static const Type *getAccessType(const Instruction *Inst) {
// All pointers have the same requirements, so canonicalize them to an
// arbitrary pointer type to minimize variation.
- if (const PointerType *PTy = dyn_cast<PointerType>(AccessTy))
+ if (PointerType *PTy = dyn_cast<PointerType>(AccessTy))
AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
PTy->getAddressSpace());
@@ -670,6 +685,21 @@ public:
void Loose();
+#ifndef NDEBUG
+ // Once any of the metrics loses, they must all remain losers.
+ bool isValid() {
+ return ((NumRegs | AddRecCost | NumIVMuls | NumBaseAdds
+ | ImmCost | SetupCost) != ~0u)
+ || ((NumRegs & AddRecCost & NumIVMuls & NumBaseAdds
+ & ImmCost & SetupCost) == ~0u);
+ }
+#endif
+
+ bool isLoser() {
+ assert(isValid() && "invalid cost");
+ return NumRegs == ~0u;
+ }
+
void RateFormula(const Formula &F,
SmallPtrSet<const SCEV *, 16> &Regs,
const DenseSet<const SCEV *> &VisitedRegs,
@@ -702,34 +732,48 @@ void Cost::RateRegister(const SCEV *Reg,
if (AR->getLoop() == L)
AddRecCost += 1; /// TODO: This should be a function of the stride.
- // If this is an addrec for a loop that's already been visited by LSR,
- // don't second-guess its addrec phi nodes. LSR isn't currently smart
- // enough to reason about more than one loop at a time. Consider these
- // registers free and leave them alone.
- else if (L->contains(AR->getLoop()) ||
+ // If this is an addrec for another loop, don't second-guess its addrec phi
+ // nodes. LSR isn't currently smart enough to reason about more than one
+ // loop at a time. LSR has either already run on inner loops, will not run
+ // on other loops, and cannot be expected to change sibling loops. If the
+ // AddRec exists, consider it's register free and leave it alone. Otherwise,
+ // do not consider this formula at all.
+ // FIXME: why do we need to generate such fomulae?
+ else if (!EnableNested || L->contains(AR->getLoop()) ||
(!AR->getLoop()->contains(L) &&
DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) {
for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
- PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
if (SE.isSCEVable(PN->getType()) &&
(SE.getEffectiveSCEVType(PN->getType()) ==
SE.getEffectiveSCEVType(AR->getType())) &&
SE.getSCEV(PN) == AR)
return;
-
+ }
+ if (!EnableNested) {
+ Loose();
+ return;
+ }
// If this isn't one of the addrecs that the loop already has, it
// would require a costly new phi and add. TODO: This isn't
// precisely modeled right now.
++NumBaseAdds;
- if (!Regs.count(AR->getStart()))
+ if (!Regs.count(AR->getStart())) {
RateRegister(AR->getStart(), Regs, L, SE, DT);
+ if (isLoser())
+ return;
+ }
}
// Add the step value register, if it needs one.
// TODO: The non-affine case isn't precisely modeled here.
- if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1)))
- if (!Regs.count(AR->getStart()))
+ if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
+ if (!Regs.count(AR->getOperand(1))) {
RateRegister(AR->getOperand(1), Regs, L, SE, DT);
+ if (isLoser())
+ return;
+ }
+ }
}
++NumRegs;
@@ -769,6 +813,8 @@ void Cost::RateFormula(const Formula &F,
return;
}
RatePrimaryRegister(ScaledReg, Regs, L, SE, DT);
+ if (isLoser())
+ return;
}
for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
E = F.BaseRegs.end(); I != E; ++I) {
@@ -778,6 +824,8 @@ void Cost::RateFormula(const Formula &F,
return;
}
RatePrimaryRegister(BaseReg, Regs, L, SE, DT);
+ if (isLoser())
+ return;
}
// Determine how many (unfolded) adds we'll need inside the loop.
@@ -795,6 +843,7 @@ void Cost::RateFormula(const Formula &F,
else if (Offset != 0)
ImmCost += APInt(64, Offset, true).getMinSignedBits();
}
+ assert(isValid() && "invalid cost");
}
/// Loose - Set this cost to a losing value.
@@ -980,7 +1029,7 @@ public:
};
KindType Kind;
- const Type *AccessTy;
+ Type *AccessTy;
SmallVector<int64_t, 8> Offsets;
int64_t MinOffset;
@@ -995,7 +1044,7 @@ public:
/// this LSRUse. FindUseWithSimilarFormula can't consider uses with different
/// max fixup widths to be equivalent, because the narrower one may be relying
/// on the implicit truncation to truncate away bogus bits.
- const Type *WidestFixupType;
+ Type *WidestFixupType;
/// Formulae - A list of ways to build a value that can satisfy this user.
/// After the list is populated, one of these is selected heuristically and
@@ -1005,7 +1054,7 @@ public:
/// Regs - The set of register candidates used by all formulae in this LSRUse.
SmallPtrSet<const SCEV *, 4> Regs;
- LSRUse(KindType K, const Type *T) : Kind(K), AccessTy(T),
+ LSRUse(KindType K, Type *T) : Kind(K), AccessTy(T),
MinOffset(INT64_MAX),
MaxOffset(INT64_MIN),
AllFixupsOutsideLoop(true),
@@ -1127,7 +1176,7 @@ void LSRUse::dump() const {
/// be completely folded into the user instruction at isel time. This includes
/// address-mode folding and special icmp tricks.
static bool isLegalUse(const TargetLowering::AddrMode &AM,
- LSRUse::KindType Kind, const Type *AccessTy,
+ LSRUse::KindType Kind, Type *AccessTy,
const TargetLowering *TLI) {
switch (Kind) {
case LSRUse::Address:
@@ -1156,7 +1205,7 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM,
// If we have low-level target information, ask the target if it can fold an
// integer immediate on an icmp.
if (AM.BaseOffs != 0) {
- if (TLI) return TLI->isLegalICmpImmediate(-AM.BaseOffs);
+ if (TLI) return TLI->isLegalICmpImmediate(-(uint64_t)AM.BaseOffs);
return false;
}
@@ -1176,7 +1225,7 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM,
static bool isLegalUse(TargetLowering::AddrMode AM,
int64_t MinOffset, int64_t MaxOffset,
- LSRUse::KindType Kind, const Type *AccessTy,
+ LSRUse::KindType Kind, Type *AccessTy,
const TargetLowering *TLI) {
// Check for overflow.
if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) !=
@@ -1198,7 +1247,7 @@ static bool isLegalUse(TargetLowering::AddrMode AM,
static bool isAlwaysFoldable(int64_t BaseOffs,
GlobalValue *BaseGV,
bool HasBaseReg,
- LSRUse::KindType Kind, const Type *AccessTy,
+ LSRUse::KindType Kind, Type *AccessTy,
const TargetLowering *TLI) {
// Fast-path: zero is always foldable.
if (BaseOffs == 0 && !BaseGV) return true;
@@ -1224,7 +1273,7 @@ static bool isAlwaysFoldable(int64_t BaseOffs,
static bool isAlwaysFoldable(const SCEV *S,
int64_t MinOffset, int64_t MaxOffset,
bool HasBaseReg,
- LSRUse::KindType Kind, const Type *AccessTy,
+ LSRUse::KindType Kind, Type *AccessTy,
const TargetLowering *TLI,
ScalarEvolution &SE) {
// Fast-path: zero is always foldable.
@@ -1299,7 +1348,7 @@ class LSRInstance {
SmallSetVector<int64_t, 8> Factors;
/// Types - Interesting use types, to facilitate truncation reuse.
- SmallSetVector<const Type *, 4> Types;
+ SmallSetVector<Type *, 4> Types;
/// Fixups - The list of operands which are to be replaced.
SmallVector<LSRFixup, 16> Fixups;
@@ -1330,11 +1379,11 @@ class LSRInstance {
UseMapTy UseMap;
bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
- LSRUse::KindType Kind, const Type *AccessTy);
+ LSRUse::KindType Kind, Type *AccessTy);
std::pair<size_t, int64_t> getUse(const SCEV *&Expr,
LSRUse::KindType Kind,
- const Type *AccessTy);
+ Type *AccessTy);
void DeleteUse(LSRUse &LU, size_t LUIdx);
@@ -1426,7 +1475,8 @@ void LSRInstance::OptimizeShadowIV() {
IVUsers::const_iterator CandidateUI = UI;
++UI;
Instruction *ShadowUse = CandidateUI->getUser();
- const Type *DestTy = NULL;
+ Type *DestTy = NULL;
+ bool IsSigned = false;
/* If shadow use is a int->float cast then insert a second IV
to eliminate this cast.
@@ -1440,10 +1490,14 @@ void LSRInstance::OptimizeShadowIV() {
for (unsigned i = 0; i < n; ++i, ++d)
foo(d);
*/
- if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
+ if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
+ IsSigned = false;
DestTy = UCast->getDestTy();
- else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser()))
+ }
+ else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
+ IsSigned = true;
DestTy = SCast->getDestTy();
+ }
if (!DestTy) continue;
if (TLI) {
@@ -1457,7 +1511,7 @@ void LSRInstance::OptimizeShadowIV() {
if (!PH) continue;
if (PH->getNumIncomingValues() != 2) continue;
- const Type *SrcTy = PH->getType();
+ Type *SrcTy = PH->getType();
int Mantissa = DestTy->getFPMantissaWidth();
if (Mantissa == -1) continue;
if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
@@ -1474,7 +1528,9 @@ void LSRInstance::OptimizeShadowIV() {
ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
if (!Init) continue;
- Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
+ Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
+ (double)Init->getSExtValue() :
+ (double)Init->getZExtValue());
BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
@@ -1776,7 +1832,7 @@ LSRInstance::OptimizeLoopTermCond() {
if (!TLI)
goto decline_post_inc;
// Check for possible scaled-address reuse.
- const Type *AccessTy = getAccessType(UI->getUser());
+ Type *AccessTy = getAccessType(UI->getUser());
TargetLowering::AddrMode AM;
AM.Scale = C->getSExtValue();
if (TLI->isLegalAddressingMode(AM, AccessTy))
@@ -1840,10 +1896,10 @@ LSRInstance::OptimizeLoopTermCond() {
/// return true.
bool
LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
- LSRUse::KindType Kind, const Type *AccessTy) {
+ LSRUse::KindType Kind, Type *AccessTy) {
int64_t NewMinOffset = LU.MinOffset;
int64_t NewMaxOffset = LU.MaxOffset;
- const Type *NewAccessTy = AccessTy;
+ Type *NewAccessTy = AccessTy;
// Check for a mismatched kind. It's tempting to collapse mismatched kinds to
// something conservative, however this can pessimize in the case that one of
@@ -1882,7 +1938,7 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
/// Either reuse an existing use or create a new one, as needed.
std::pair<size_t, int64_t>
LSRInstance::getUse(const SCEV *&Expr,
- LSRUse::KindType Kind, const Type *AccessTy) {
+ LSRUse::KindType Kind, Type *AccessTy) {
const SCEV *Copy = Expr;
int64_t Offset = ExtractImmediate(Expr, SE);
@@ -2044,7 +2100,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
LF.PostIncLoops = UI->getPostIncLoops();
LSRUse::KindType Kind = LSRUse::Basic;
- const Type *AccessTy = 0;
+ Type *AccessTy = 0;
if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
Kind = LSRUse::Address;
AccessTy = getAccessType(LF.UserInst);
@@ -2464,7 +2520,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
if (LU.Kind != LSRUse::ICmpZero) return;
// Determine the integer type for the base formula.
- const Type *IntTy = Base.getType();
+ Type *IntTy = Base.getType();
if (!IntTy) return;
if (SE.getTypeSizeInBits(IntTy) > 64) return;
@@ -2538,7 +2594,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
/// scaled-offset address modes, for example.
void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
// Determine the integer type for the base formula.
- const Type *IntTy = Base.getType();
+ Type *IntTy = Base.getType();
if (!IntTy) return;
// If this Formula already has a scaled register, we can't add another one.
@@ -2598,13 +2654,13 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
if (Base.AM.BaseGV) return;
// Determine the integer type for the base formula.
- const Type *DstTy = Base.getType();
+ Type *DstTy = Base.getType();
if (!DstTy) return;
DstTy = SE.getEffectiveSCEVType(DstTy);
- for (SmallSetVector<const Type *, 4>::const_iterator
+ for (SmallSetVector<Type *, 4>::const_iterator
I = Types.begin(), E = Types.end(); I != E; ++I) {
- const Type *SrcTy = *I;
+ Type *SrcTy = *I;
if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
Formula F = Base;
@@ -2741,7 +2797,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
int64_t Imm = WI.Imm;
const SCEV *OrigReg = WI.OrigReg;
- const Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
+ Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
@@ -3275,6 +3331,9 @@ retry:
skip:;
}
+ if (!EnableRetry && !AnySatisfiedReqRegs)
+ return;
+
// If none of the formulae had all of the required registers, relax the
// constraint so that we don't exclude all formulae.
if (!AnySatisfiedReqRegs) {
@@ -3298,6 +3357,10 @@ void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
// SolveRecurse does all the work.
SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
CurRegs, VisitedRegs);
+ if (Solution.empty()) {
+ DEBUG(dbgs() << "\nNo Satisfactory Solution\n");
+ return;
+ }
// Ok, we've now made all our decisions.
DEBUG(dbgs() << "\n"
@@ -3416,6 +3479,9 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP,
// Don't insert instructions before PHI nodes.
while (isa<PHINode>(IP)) ++IP;
+ // Ignore landingpad instructions.
+ while (isa<LandingPadInst>(IP)) ++IP;
+
// Ignore debug intrinsics.
while (isa<DbgInfoIntrinsic>(IP)) ++IP;
@@ -3440,9 +3506,9 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
Rewriter.setPostInc(LF.PostIncLoops);
// This is the type that the user actually needs.
- const Type *OpTy = LF.OperandValToReplace->getType();
+ Type *OpTy = LF.OperandValToReplace->getType();
// This will be the type that we'll initially expand to.
- const Type *Ty = F.getType();
+ Type *Ty = F.getType();
if (!Ty)
// No type known; just expand directly to the ultimate type.
Ty = OpTy;
@@ -3450,7 +3516,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Expand directly to the ultimate type if it's the right size.
Ty = OpTy;
// This is the type to do integer arithmetic in.
- const Type *IntTy = SE.getEffectiveSCEVType(Ty);
+ Type *IntTy = SE.getEffectiveSCEVType(Ty);
// Build up a list of operands to add together to form the full base.
SmallVector<const SCEV *, 8> Ops;
@@ -3527,7 +3593,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// The other interesting way of "folding" with an ICmpZero is to use a
// negated immediate.
if (!ICmpScaledV)
- ICmpScaledV = ConstantInt::get(IntTy, -Offset);
+ ICmpScaledV = ConstantInt::get(IntTy, -(uint64_t)Offset);
else {
Ops.push_back(SE.getUnknown(ICmpScaledV));
ICmpScaledV = ConstantInt::get(IntTy, Offset);
@@ -3611,10 +3677,20 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
// users.
if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
!isa<IndirectBrInst>(BB->getTerminator())) {
- Loop *PNLoop = LI.getLoopFor(PN->getParent());
- if (!PNLoop || PN->getParent() != PNLoop->getHeader()) {
+ BasicBlock *Parent = PN->getParent();
+ Loop *PNLoop = LI.getLoopFor(Parent);
+ if (!PNLoop || Parent != PNLoop->getHeader()) {
// Split the critical edge.
- BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
+ BasicBlock *NewBB = 0;
+ if (!Parent->isLandingPad()) {
+ NewBB = SplitCriticalEdge(BB, Parent, P,
+ /*MergeIdenticalEdges=*/true,
+ /*DontDeleteUselessPhis=*/true);
+ } else {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(Parent, BB, "", "", P, NewBBs);
+ NewBB = NewBBs[0];
+ }
// If PN is outside of the loop and BB is in the loop, we want to
// move the block to be immediately before the PHI block, not
@@ -3637,7 +3713,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts);
// If this is reuse-by-noop-cast, insert the noop cast.
- const Type *OpTy = LF.OperandValToReplace->getType();
+ Type *OpTy = LF.OperandValToReplace->getType();
if (FullV->getType() != OpTy)
FullV =
CastInst::Create(CastInst::getCastOpcode(FullV, false,
@@ -3667,7 +3743,7 @@ void LSRInstance::Rewrite(const LSRFixup &LF,
Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts);
// If this is reuse-by-noop-cast, insert the noop cast.
- const Type *OpTy = LF.OperandValToReplace->getType();
+ Type *OpTy = LF.OperandValToReplace->getType();
if (FullV->getType() != OpTy) {
Instruction *Cast =
CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
@@ -3700,6 +3776,7 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
SCEVExpander Rewriter(SE, "lsr");
Rewriter.disableCanonicalMode();
+ Rewriter.enableLSRMode();
Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
// Expand the new value definitions and update the users.
@@ -3740,6 +3817,23 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
OptimizeShadowIV();
OptimizeLoopTermCond();
+ // If loop preparation eliminates all interesting IV users, bail.
+ if (IU.empty()) return;
+
+ // Skip nested loops until we can model them better with formulae.
+ if (!EnableNested && !L->empty()) {
+
+ if (EnablePhiElim) {
+ // Remove any extra phis created by processing inner loops.
+ SmallVector<WeakVH, 16> DeadInsts;
+ SCEVExpander Rewriter(SE, "lsr");
+ Changed |= Rewriter.replaceCongruentIVs(L, &DT, DeadInsts);
+ Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
+ }
+ DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");
+ return;
+ }
+
// Start collecting data and preparing for the solver.
CollectInterestingTypesAndFactors();
CollectFixupsAndInitialFormulae();
@@ -3763,6 +3857,9 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
Types.clear();
RegUses.clear();
+ if (Solution.empty())
+ return;
+
#ifndef NDEBUG
// Formulae should be legal.
for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
@@ -3778,6 +3875,14 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
// Now that we've decided what we want, make it so.
ImplementSolution(Solution, P);
+
+ if (EnablePhiElim) {
+ // Remove any extra phis created by processing inner loops.
+ SmallVector<WeakVH, 16> DeadInsts;
+ SCEVExpander Rewriter(SE, "lsr");
+ Changed |= Rewriter.replaceCongruentIVs(L, &DT, DeadInsts);
+ Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
+ }
}
void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
@@ -3793,7 +3898,7 @@ void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
OS << '*' << *I;
}
- for (SmallSetVector<const Type *, 4>::const_iterator
+ for (SmallSetVector<Type *, 4>::const_iterator
I = Types.begin(), E = Types.end(); I != E; ++I) {
if (!First) OS << ", ";
First = false;
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index fef6bc3..91395b2 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -22,6 +22,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/Target/TargetData.h"
#include <climits>
using namespace llvm;
@@ -39,6 +40,11 @@ UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
cl::desc("Allows loops to be partially unrolled until "
"-unroll-threshold loop size is reached."));
+// Temporary flag to be removed in 3.0
+static cl::opt<bool>
+NoSCEVUnroll("disable-unroll-scev", cl::init(false), cl::Hidden,
+ cl::desc("Use ScalarEvolution to analyze loop trip counts for unrolling"));
+
namespace {
class LoopUnroll : public LoopPass {
public:
@@ -49,7 +55,7 @@ namespace {
CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P;
UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0);
-
+
initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
}
@@ -57,11 +63,11 @@ namespace {
/// that the loop unroll should be performed regardless of how much
/// code expansion would result.
static const unsigned NoThreshold = UINT_MAX;
-
+
// Threshold to use when optsize is specified (and there is no
// explicit -unroll-threshold).
static const unsigned OptSizeUnrollThreshold = 50;
-
+
unsigned CurrentCount;
unsigned CurrentThreshold;
bool CurrentAllowPartial;
@@ -79,6 +85,7 @@ namespace {
AU.addPreservedID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
+ AU.addRequired<ScalarEvolution>();
AU.addPreserved<ScalarEvolution>();
// FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
// If loop unroll does not preserve dom info then LCSSA pass on next
@@ -101,45 +108,62 @@ Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) {
}
/// ApproximateLoopSize - Approximate the size of the loop.
-static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) {
+static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
+ const TargetData *TD) {
CodeMetrics Metrics;
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I)
- Metrics.analyzeBasicBlock(*I);
+ Metrics.analyzeBasicBlock(*I, TD);
NumCalls = Metrics.NumInlineCandidates;
-
+
unsigned LoopSize = Metrics.NumInsts;
-
+
// Don't allow an estimate of size zero. This would allows unrolling of loops
// with huge iteration counts, which is a compile time problem even if it's
// not a problem for code quality.
if (LoopSize == 0) LoopSize = 1;
-
+
return LoopSize;
}
bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
LoopInfo *LI = &getAnalysis<LoopInfo>();
+ ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
BasicBlock *Header = L->getHeader();
DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
<< "] Loop %" << Header->getName() << "\n");
(void)Header;
-
+
// Determine the current unrolling threshold. While this is normally set
// from UnrollThreshold, it is overridden to a smaller value if the current
// function is marked as optimize-for-size, and the unroll threshold was
// not user specified.
unsigned Threshold = CurrentThreshold;
- if (!UserThreshold &&
+ if (!UserThreshold &&
Header->getParent()->hasFnAttr(Attribute::OptimizeForSize))
Threshold = OptSizeUnrollThreshold;
- // Find trip count
- unsigned TripCount = L->getSmallConstantTripCount();
- unsigned Count = CurrentCount;
-
+ // Find trip count and trip multiple if count is not available
+ unsigned TripCount = 0;
+ unsigned TripMultiple = 1;
+ if (!NoSCEVUnroll) {
+ // Find "latch trip count". UnrollLoop assumes that control cannot exit
+ // via the loop latch on any iteration prior to TripCount. The loop may exit
+ // early via an earlier branch.
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ if (LatchBlock) {
+ TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
+ TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
+ }
+ }
+ else {
+ TripCount = L->getSmallConstantTripCount();
+ if (TripCount == 0)
+ TripMultiple = L->getSmallConstantTripMultiple();
+ }
// Automatically select an unroll count.
+ unsigned Count = CurrentCount;
if (Count == 0) {
// Conservative heuristic: if we know the trip count, see if we can
// completely unroll (subject to the threshold, checked below); otherwise
@@ -152,8 +176,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// Enforce the threshold.
if (Threshold != NoThreshold) {
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
unsigned NumInlineCandidates;
- unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates);
+ unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates, TD);
DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
if (NumInlineCandidates != 0) {
DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
@@ -182,12 +207,8 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
}
// Unroll the loop.
- Function *F = L->getHeader()->getParent();
- if (!UnrollLoop(L, Count, LI, &LPM))
+ if (!UnrollLoop(L, Count, TripCount, TripMultiple, LI, &LPM))
return false;
- // FIXME: Reconstruct dom info, because it is not preserved properly.
- if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
- DT->runOnFunction(*F);
return true;
}
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 840c4b6..458949c 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -492,7 +492,7 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
Value *BranchVal = LIC;
if (!isa<ConstantInt>(Val) ||
Val->getType() != Type::getInt1Ty(LIC->getContext()))
- BranchVal = new ICmpInst(InsertPt, ICmpInst::ICMP_EQ, LIC, Val, "tmp");
+ BranchVal = new ICmpInst(InsertPt, ICmpInst::ICMP_EQ, LIC, Val);
else if (Val != ConstantInt::getTrue(Val->getContext()))
// We want to enter the new loop when the condition is true.
std::swap(TrueDest, FalseDest);
@@ -561,10 +561,17 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
BasicBlock *ExitBlock = ExitBlocks[i];
SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock),
pred_end(ExitBlock));
+
// Although SplitBlockPredecessors doesn't preserve loop-simplify in
// general, if we call it on all predecessors of all exits then it does.
- SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(),
- ".us-lcssa", this);
+ if (!ExitBlock->isLandingPad()) {
+ SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(),
+ ".us-lcssa", this);
+ } else {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(ExitBlock, Preds, ".us-lcssa", ".us-lcssa",
+ this, NewBBs);
+ }
}
}
@@ -632,7 +639,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
// as well.
ParentLoop->addBasicBlockToLoop(NewBlocks[0], LI->getBase());
}
-
+
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]);
// The new exit block should be in the same loop as the old one.
@@ -653,6 +660,19 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
if (It != VMap.end()) V = It->second;
PN->addIncoming(V, NewExit);
}
+
+ if (LandingPadInst *LPad = NewExit->getLandingPadInst()) {
+ PN = PHINode::Create(LPad->getType(), 0, "",
+ ExitSucc->getFirstInsertionPt());
+
+ for (pred_iterator I = pred_begin(ExitSucc), E = pred_end(ExitSucc);
+ I != E; ++I) {
+ BasicBlock *BB = *I;
+ LandingPadInst *LPI = BB->getLandingPadInst();
+ LPI->replaceAllUsesWith(PN);
+ PN->addIncoming(LPI, BB);
+ }
+ }
}
// Rewrite the code to refer to itself.
diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp
index 9087b46..689bbe9 100644
--- a/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -20,98 +20,88 @@
#include "llvm/Support/IRBuilder.h"
using namespace llvm;
-static bool LowerAtomicIntrinsic(IntrinsicInst *II) {
- IRBuilder<> Builder(II->getParent(), II);
- unsigned IID = II->getIntrinsicID();
- switch (IID) {
- case Intrinsic::memory_barrier:
- break;
+static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
+ IRBuilder<> Builder(CXI->getParent(), CXI);
+ Value *Ptr = CXI->getPointerOperand();
+ Value *Cmp = CXI->getCompareOperand();
+ Value *Val = CXI->getNewValOperand();
+
+ LoadInst *Orig = Builder.CreateLoad(Ptr);
+ Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
+ Value *Res = Builder.CreateSelect(Equal, Val, Orig);
+ Builder.CreateStore(Res, Ptr);
+
+ CXI->replaceAllUsesWith(Orig);
+ CXI->eraseFromParent();
+ return true;
+}
- case Intrinsic::atomic_load_add:
- case Intrinsic::atomic_load_sub:
- case Intrinsic::atomic_load_and:
- case Intrinsic::atomic_load_nand:
- case Intrinsic::atomic_load_or:
- case Intrinsic::atomic_load_xor:
- case Intrinsic::atomic_load_max:
- case Intrinsic::atomic_load_min:
- case Intrinsic::atomic_load_umax:
- case Intrinsic::atomic_load_umin: {
- Value *Ptr = II->getArgOperand(0), *Delta = II->getArgOperand(1);
+static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) {
+ IRBuilder<> Builder(RMWI->getParent(), RMWI);
+ Value *Ptr = RMWI->getPointerOperand();
+ Value *Val = RMWI->getValOperand();
- LoadInst *Orig = Builder.CreateLoad(Ptr);
- Value *Res = NULL;
- switch (IID) {
- default: assert(0 && "Unrecognized atomic modify operation");
- case Intrinsic::atomic_load_add:
- Res = Builder.CreateAdd(Orig, Delta);
- break;
- case Intrinsic::atomic_load_sub:
- Res = Builder.CreateSub(Orig, Delta);
- break;
- case Intrinsic::atomic_load_and:
- Res = Builder.CreateAnd(Orig, Delta);
- break;
- case Intrinsic::atomic_load_nand:
- Res = Builder.CreateNot(Builder.CreateAnd(Orig, Delta));
- break;
- case Intrinsic::atomic_load_or:
- Res = Builder.CreateOr(Orig, Delta);
- break;
- case Intrinsic::atomic_load_xor:
- Res = Builder.CreateXor(Orig, Delta);
- break;
- case Intrinsic::atomic_load_max:
- Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
- Delta, Orig);
- break;
- case Intrinsic::atomic_load_min:
- Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
- Orig, Delta);
- break;
- case Intrinsic::atomic_load_umax:
- Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
- Delta, Orig);
- break;
- case Intrinsic::atomic_load_umin:
- Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
- Orig, Delta);
- break;
- }
- Builder.CreateStore(Res, Ptr);
+ LoadInst *Orig = Builder.CreateLoad(Ptr);
+ Value *Res = NULL;
- II->replaceAllUsesWith(Orig);
+ switch (RMWI->getOperation()) {
+ default: llvm_unreachable("Unexpected RMW operation");
+ case AtomicRMWInst::Xchg:
+ Res = Val;
break;
- }
-
- case Intrinsic::atomic_swap: {
- Value *Ptr = II->getArgOperand(0), *Val = II->getArgOperand(1);
- LoadInst *Orig = Builder.CreateLoad(Ptr);
- Builder.CreateStore(Val, Ptr);
- II->replaceAllUsesWith(Orig);
+ case AtomicRMWInst::Add:
+ Res = Builder.CreateAdd(Orig, Val);
break;
- }
-
- case Intrinsic::atomic_cmp_swap: {
- Value *Ptr = II->getArgOperand(0), *Cmp = II->getArgOperand(1);
- Value *Val = II->getArgOperand(2);
-
- LoadInst *Orig = Builder.CreateLoad(Ptr);
- Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
- Value *Res = Builder.CreateSelect(Equal, Val, Orig);
- Builder.CreateStore(Res, Ptr);
- II->replaceAllUsesWith(Orig);
+ case AtomicRMWInst::Sub:
+ Res = Builder.CreateSub(Orig, Val);
+ break;
+ case AtomicRMWInst::And:
+ Res = Builder.CreateAnd(Orig, Val);
+ break;
+ case AtomicRMWInst::Nand:
+ Res = Builder.CreateNot(Builder.CreateAnd(Orig, Val));
+ break;
+ case AtomicRMWInst::Or:
+ Res = Builder.CreateOr(Orig, Val);
+ break;
+ case AtomicRMWInst::Xor:
+ Res = Builder.CreateXor(Orig, Val);
+ break;
+ case AtomicRMWInst::Max:
+ Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Val),
+ Val, Orig);
+ break;
+ case AtomicRMWInst::Min:
+ Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Val),
+ Orig, Val);
+ break;
+ case AtomicRMWInst::UMax:
+ Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val),
+ Val, Orig);
+ break;
+ case AtomicRMWInst::UMin:
+ Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val),
+ Orig, Val);
break;
}
+ Builder.CreateStore(Res, Ptr);
+ RMWI->replaceAllUsesWith(Orig);
+ RMWI->eraseFromParent();
+ return true;
+}
- default:
- return false;
- }
+static bool LowerFenceInst(FenceInst *FI) {
+ FI->eraseFromParent();
+ return true;
+}
- assert(II->use_empty() &&
- "Lowering should have eliminated any uses of the intrinsic call!");
- II->eraseFromParent();
+static bool LowerLoadInst(LoadInst *LI) {
+ LI->setAtomic(NotAtomic);
+ return true;
+}
+static bool LowerStoreInst(StoreInst *SI) {
+ SI->setAtomic(NotAtomic);
return true;
}
@@ -123,9 +113,22 @@ namespace {
}
bool runOnBasicBlock(BasicBlock &BB) {
bool Changed = false;
- for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; )
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DI++))
- Changed |= LowerAtomicIntrinsic(II);
+ for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) {
+ Instruction *Inst = DI++;
+ if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
+ Changed |= LowerFenceInst(FI);
+ else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(Inst))
+ Changed |= LowerAtomicCmpXchgInst(CXI);
+ else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(Inst))
+ Changed |= LowerAtomicRMWInst(RMWI);
+ else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ if (LI->isAtomic())
+ LowerLoadInst(LI);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (SI->isAtomic())
+ LowerStoreInst(SI);
+ }
+ }
return Changed;
}
};
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 7ed3db6..eeb8931 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -54,7 +54,7 @@ static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
if (OpC->isZero()) continue; // No offset.
// Handle struct indices, which add their field offset to the pointer.
- if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
continue;
}
@@ -384,7 +384,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
// If this is a store, see if we can merge it in.
- if (NextStore->isVolatile()) break;
+ if (!NextStore->isSimple()) break;
// Check to see if this stored value is of the same byte-splattable value.
if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
@@ -448,7 +448,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
// Determine alignment
unsigned Alignment = Range.Alignment;
if (Alignment == 0) {
- const Type *EltType =
+ Type *EltType =
cast<PointerType>(StartPtr->getType())->getElementType();
Alignment = TD->getABITypeAlignment(EltType);
}
@@ -479,7 +479,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
- if (SI->isVolatile()) return false;
+ if (!SI->isSimple()) return false;
if (TD == 0) return false;
@@ -487,7 +487,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// happen to be using a load-store pair to implement it, rather than
// a memcpy.
if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
- if (!LI->isVolatile() && LI->hasOneUse() &&
+ if (LI->isSimple() && LI->hasOneUse() &&
LI->getParent() == SI->getParent()) {
MemDepResult ldep = MD->getDependency(LI);
CallInst *C = 0;
@@ -616,7 +616,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
if (!A->hasStructRetAttr())
return false;
- const Type *StructTy = cast<PointerType>(A->getType())->getElementType();
+ Type *StructTy = cast<PointerType>(A->getType())->getElementType();
uint64_t destSize = TD->getTypeAllocSize(StructTy);
if (destSize < srcSize)
@@ -860,7 +860,7 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
// Find out what feeds this byval argument.
Value *ByValArg = CS.getArgument(ArgNo);
- const Type *ByValTy =cast<PointerType>(ByValArg->getType())->getElementType();
+ Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
uint64_t ByValSize = TD->getTypeAllocSize(ByValTy);
MemDepResult DepInfo =
MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize),
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
index ee132d3..da74e9c 100644
--- a/lib/Transforms/Scalar/ObjCARC.cpp
+++ b/lib/Transforms/Scalar/ObjCARC.cpp
@@ -180,7 +180,7 @@ static bool IsPotentialUse(const Value *Op) {
Arg->hasStructRetAttr())
return false;
// Only consider values with pointer types, and not function pointers.
- const PointerType *Ty = dyn_cast<PointerType>(Op->getType());
+ PointerType *Ty = dyn_cast<PointerType>(Op->getType());
if (!Ty || isa<FunctionType>(Ty->getElementType()))
return false;
// Conservatively assume anything else is a potential use.
@@ -213,8 +213,8 @@ static InstructionClass GetFunctionClass(const Function *F) {
const Argument *A0 = AI++;
if (AI == AE)
// Argument is a pointer.
- if (const PointerType *PTy = dyn_cast<PointerType>(A0->getType())) {
- const Type *ETy = PTy->getElementType();
+ if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) {
+ Type *ETy = PTy->getElementType();
// Argument is i8*.
if (ETy->isIntegerTy(8))
return StringSwitch<InstructionClass>(F->getName())
@@ -234,7 +234,7 @@ static InstructionClass GetFunctionClass(const Function *F) {
.Default(IC_CallOrUser);
// Argument is i8**
- if (const PointerType *Pte = dyn_cast<PointerType>(ETy))
+ if (PointerType *Pte = dyn_cast<PointerType>(ETy))
if (Pte->getElementType()->isIntegerTy(8))
return StringSwitch<InstructionClass>(F->getName())
.Case("objc_loadWeakRetained", IC_LoadWeakRetained)
@@ -246,11 +246,11 @@ static InstructionClass GetFunctionClass(const Function *F) {
// Two arguments, first is i8**.
const Argument *A1 = AI++;
if (AI == AE)
- if (const PointerType *PTy = dyn_cast<PointerType>(A0->getType()))
- if (const PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType()))
+ if (PointerType *PTy = dyn_cast<PointerType>(A0->getType()))
+ if (PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType()))
if (Pte->getElementType()->isIntegerTy(8))
- if (const PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) {
- const Type *ETy1 = PTy1->getElementType();
+ if (PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) {
+ Type *ETy1 = PTy1->getElementType();
// Second argument is i8*
if (ETy1->isIntegerTy(8))
return StringSwitch<InstructionClass>(F->getName())
@@ -258,7 +258,7 @@ static InstructionClass GetFunctionClass(const Function *F) {
.Case("objc_initWeak", IC_InitWeak)
.Default(IC_CallOrUser);
// Second argument is i8**.
- if (const PointerType *Pte1 = dyn_cast<PointerType>(ETy1))
+ if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1))
if (Pte1->getElementType()->isIntegerTy(8))
return StringSwitch<InstructionClass>(F->getName())
.Case("objc_moveWeak", IC_MoveWeak)
@@ -344,6 +344,10 @@ static InstructionClass GetInstructionClass(const Value *V) {
break;
default:
// For anything else, check all the operands.
+ // Note that this includes both operands of a Store: while the first
+ // operand isn't actually being dereferenced, it is being stored to
+ // memory where we can no longer track who might read it and dereference
+ // it, so we have to consider it potentially used.
for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end();
OI != OE; ++OI)
if (IsPotentialUse(*OI))
@@ -421,9 +425,10 @@ static bool IsAlwaysTail(InstructionClass Class) {
/// IsNoThrow - Test if the given class represents instructions which are always
/// safe to mark with the nounwind attribute..
static bool IsNoThrow(InstructionClass Class) {
+ // objc_retainBlock is not nounwind because it calls user copy constructors
+ // which could theoretically throw.
return Class == IC_Retain ||
Class == IC_RetainRV ||
- Class == IC_RetainBlock ||
Class == IC_Release ||
Class == IC_Autorelease ||
Class == IC_AutoreleaseRV ||
@@ -515,6 +520,10 @@ static bool IsObjCIdentifiedObject(const Value *V) {
const Value *Pointer =
StripPointerCastsAndObjCCalls(LI->getPointerOperand());
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) {
+ // A constant pointer can't be pointing to an object on the heap. It may
+ // be reference-counted, but it won't be deleted.
+ if (GV->isConstant())
+ return true;
StringRef Name = GV->getName();
// These special variables are known to hold values which are not
// reference-counted pointers.
@@ -738,7 +747,6 @@ ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) {
switch (GetBasicInstructionClass(CS.getInstruction())) {
case IC_Retain:
case IC_RetainRV:
- case IC_RetainBlock:
case IC_Autorelease:
case IC_AutoreleaseRV:
case IC_NoopCast:
@@ -746,6 +754,8 @@ ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) {
case IC_FusedRetainAutorelease:
case IC_FusedRetainAutoreleaseRV:
// These functions don't access any memory visible to the compiler.
+ // Note that this doesn't include objc_retainBlock, becuase it updates
+ // pointers when it copies block data.
return NoModRef;
default:
break;
@@ -877,7 +887,9 @@ bool ObjCARCExpand::runOnFunction(Function &F) {
// usually can't sink them past other calls, which would be the main
// case where it would be useful.
-/// TODO: The pointer returned from objc_loadWeakRetained is retained.
+// TODO: The pointer returned from objc_loadWeakRetained is retained.
+
+// TODO: Delete release+retain pairs (rare).
#include "llvm/GlobalAlias.h"
#include "llvm/Constants.h"
@@ -1098,16 +1110,16 @@ static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) {
if (A == S_None || B == S_None)
return S_None;
- // Note that we can't merge S_CanRelease and S_Use.
if (A > B) std::swap(A, B);
if (TopDown) {
// Choose the side which is further along in the sequence.
- if (A == S_Retain && (B == S_CanRelease || B == S_Use))
+ if ((A == S_Retain || A == S_CanRelease) &&
+ (B == S_CanRelease || B == S_Use))
return B;
} else {
// Choose the side which is further along in the sequence.
if ((A == S_Use || A == S_CanRelease) &&
- (B == S_Release || B == S_Stop || B == S_MovableRelease))
+ (B == S_Use || B == S_Release || B == S_Stop || B == S_MovableRelease))
return A;
// If both sides are releases, choose the more conservative one.
if (A == S_Stop && (B == S_Release || B == S_MovableRelease))
@@ -1124,13 +1136,19 @@ namespace {
/// retain-decrement-use-release sequence or release-use-decrement-retain
/// reverese sequence.
struct RRInfo {
- /// KnownIncremented - After an objc_retain, the reference count of the
- /// referenced object is known to be positive. Similarly, before an
- /// objc_release, the reference count of the referenced object is known to
- /// be positive. If there are retain-release pairs in code regions where the
- /// retain count is known to be positive, they can be eliminated, regardless
- /// of any side effects between them.
- bool KnownIncremented;
+ /// KnownSafe - After an objc_retain, the reference count of the referenced
+ /// object is known to be positive. Similarly, before an objc_release, the
+ /// reference count of the referenced object is known to be positive. If
+ /// there are retain-release pairs in code regions where the retain count
+ /// is known to be positive, they can be eliminated, regardless of any side
+ /// effects between them.
+ ///
+ /// Also, a retain+release pair nested within another retain+release
+ /// pair all on the known same pointer value can be eliminated, regardless
+ /// of any intervening side effects.
+ ///
+ /// KnownSafe is true when either of these conditions is satisfied.
+ bool KnownSafe;
/// IsRetainBlock - True if the Calls are objc_retainBlock calls (as
/// opposed to objc_retain calls).
@@ -1153,7 +1171,7 @@ namespace {
SmallPtrSet<Instruction *, 2> ReverseInsertPts;
RRInfo() :
- KnownIncremented(false), IsRetainBlock(false), IsTailCallRelease(false),
+ KnownSafe(false), IsRetainBlock(false), IsTailCallRelease(false),
ReleaseMetadata(0) {}
void clear();
@@ -1161,7 +1179,7 @@ namespace {
}
void RRInfo::clear() {
- KnownIncremented = false;
+ KnownSafe = false;
IsRetainBlock = false;
IsTailCallRelease = false;
ReleaseMetadata = 0;
@@ -1176,6 +1194,9 @@ namespace {
/// RefCount - The known minimum number of reference count increments.
unsigned RefCount;
+ /// NestCount - The known minimum level of retain+release nesting.
+ unsigned NestCount;
+
/// Seq - The current position in the sequence.
Sequence Seq;
@@ -1184,7 +1205,11 @@ namespace {
/// TODO: Encapsulate this better.
RRInfo RRI;
- PtrState() : RefCount(0), Seq(S_None) {}
+ PtrState() : RefCount(0), NestCount(0), Seq(S_None) {}
+
+ void SetAtLeastOneRefCount() {
+ if (RefCount == 0) RefCount = 1;
+ }
void IncrementRefCount() {
if (RefCount != UINT_MAX) ++RefCount;
@@ -1194,14 +1219,22 @@ namespace {
if (RefCount != 0) --RefCount;
}
- void ClearRefCount() {
- RefCount = 0;
- }
-
bool IsKnownIncremented() const {
return RefCount > 0;
}
+ void IncrementNestCount() {
+ if (NestCount != UINT_MAX) ++NestCount;
+ }
+
+ void DecrementNestCount() {
+ if (NestCount != 0) --NestCount;
+ }
+
+ bool IsKnownNested() const {
+ return NestCount > 0;
+ }
+
void SetSeq(Sequence NewSeq) {
Seq = NewSeq;
}
@@ -1233,6 +1266,7 @@ void
PtrState::Merge(const PtrState &Other, bool TopDown) {
Seq = MergeSeqs(Seq, Other.Seq, TopDown);
RefCount = std::min(RefCount, Other.RefCount);
+ NestCount = std::min(NestCount, Other.NestCount);
// We can't merge a plain objc_retain with an objc_retainBlock.
if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock)
@@ -1245,7 +1279,7 @@ PtrState::Merge(const PtrState &Other, bool TopDown) {
if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata)
RRI.ReleaseMetadata = 0;
- RRI.KnownIncremented = RRI.KnownIncremented && Other.RRI.KnownIncremented;
+ RRI.KnownSafe = RRI.KnownSafe && Other.RRI.KnownSafe;
RRI.IsTailCallRelease = RRI.IsTailCallRelease && Other.RRI.IsTailCallRelease;
RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end());
RRI.ReverseInsertPts.insert(Other.RRI.ReverseInsertPts.begin(),
@@ -1316,7 +1350,7 @@ namespace {
}
void clearBottomUpPointers() {
- PerPtrTopDown.clear();
+ PerPtrBottomUp.clear();
}
void clearTopDownPointers() {
@@ -1334,6 +1368,12 @@ namespace {
unsigned GetAllPathCount() const {
return TopDownPathCount * BottomUpPathCount;
}
+
+ /// IsVisitedTopDown - Test whether the block for this BBState has been
+ /// visited by the top-down portion of the algorithm.
+ bool isVisitedTopDown() const {
+ return TopDownPathCount != 0;
+ }
};
}
@@ -1364,7 +1404,7 @@ void BBState::MergePred(const BBState &Other) {
/*TopDown=*/true);
}
- // For each entry in our set, if the other set doens't have an entry with the
+ // For each entry in our set, if the other set doesn't have an entry with the
// same key, force it to merge with an empty entry.
for (ptr_iterator MI = top_down_ptr_begin(),
ME = top_down_ptr_end(); MI != ME; ++MI)
@@ -1389,7 +1429,7 @@ void BBState::MergeSucc(const BBState &Other) {
/*TopDown=*/false);
}
- // For each entry in our set, if the other set doens't have an entry
+ // For each entry in our set, if the other set doesn't have an entry
// with the same key, force it to merge with an empty entry.
for (ptr_iterator MI = bottom_up_ptr_begin(),
ME = bottom_up_ptr_end(); MI != ME; ++MI)
@@ -1406,15 +1446,11 @@ namespace {
/// Run - A flag indicating whether this optimization pass should run.
bool Run;
- /// RetainFunc, RelaseFunc - Declarations for objc_retain,
- /// objc_retainBlock, and objc_release.
- Function *RetainFunc, *RetainBlockFunc, *RetainRVFunc, *ReleaseFunc;
-
/// RetainRVCallee, etc. - Declarations for ObjC runtime
/// functions, for use in creating calls to them. These are initialized
/// lazily to avoid cluttering up the Module with unused declarations.
Constant *RetainRVCallee, *AutoreleaseRVCallee, *ReleaseCallee,
- *RetainCallee, *AutoreleaseCallee;
+ *RetainCallee, *RetainBlockCallee, *AutoreleaseCallee;
/// UsedInThisFunciton - Flags which determine whether each of the
/// interesting runtine functions is in fact used in the current function.
@@ -1428,6 +1464,7 @@ namespace {
Constant *getAutoreleaseRVCallee(Module *M);
Constant *getReleaseCallee(Module *M);
Constant *getRetainCallee(Module *M);
+ Constant *getRetainBlockCallee(Module *M);
Constant *getAutoreleaseCallee(Module *M);
void OptimizeRetainCall(Function &F, Instruction *Retain);
@@ -1452,11 +1489,13 @@ namespace {
void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
MapVector<Value *, RRInfo> &Retains,
DenseMap<Value *, RRInfo> &Releases,
- SmallVectorImpl<Instruction *> &DeadInsts);
+ SmallVectorImpl<Instruction *> &DeadInsts,
+ Module *M);
bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates,
MapVector<Value *, RRInfo> &Retains,
- DenseMap<Value *, RRInfo> &Releases);
+ DenseMap<Value *, RRInfo> &Releases,
+ Module *M);
void OptimizeWeakCalls(Function &F);
@@ -1501,7 +1540,7 @@ Constant *ObjCARCOpt::getRetainRVCallee(Module *M) {
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
std::vector<Type *> Params;
Params.push_back(I8X);
- const FunctionType *FTy =
+ FunctionType *FTy =
FunctionType::get(I8X, Params, /*isVarArg=*/false);
AttrListPtr Attributes;
Attributes.addAttr(~0u, Attribute::NoUnwind);
@@ -1518,7 +1557,7 @@ Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
std::vector<Type *> Params;
Params.push_back(I8X);
- const FunctionType *FTy =
+ FunctionType *FTy =
FunctionType::get(I8X, Params, /*isVarArg=*/false);
AttrListPtr Attributes;
Attributes.addAttr(~0u, Attribute::NoUnwind);
@@ -1561,6 +1600,23 @@ Constant *ObjCARCOpt::getRetainCallee(Module *M) {
return RetainCallee;
}
+Constant *ObjCARCOpt::getRetainBlockCallee(Module *M) {
+ if (!RetainBlockCallee) {
+ LLVMContext &C = M->getContext();
+ std::vector<Type *> Params;
+ Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C)));
+ AttrListPtr Attributes;
+ // objc_retainBlock is not nounwind because it calls user copy constructors
+ // which could theoretically throw.
+ RetainBlockCallee =
+ M->getOrInsertFunction(
+ "objc_retainBlock",
+ FunctionType::get(Params[0], Params, /*isVarArg=*/false),
+ Attributes);
+ }
+ return RetainBlockCallee;
+}
+
Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
if (!AutoreleaseCallee) {
LLVMContext &C = M->getContext();
@@ -1904,12 +1960,19 @@ void
ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV) {
// Check for a return of the pointer value.
const Value *Ptr = GetObjCArg(AutoreleaseRV);
- for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end();
- UI != UE; ++UI) {
- const User *I = *UI;
- if (isa<ReturnInst>(I) || GetBasicInstructionClass(I) == IC_RetainRV)
- return;
- }
+ SmallVector<const Value *, 2> Users;
+ Users.push_back(Ptr);
+ do {
+ Ptr = Users.pop_back_val();
+ for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end();
+ UI != UE; ++UI) {
+ const User *I = *UI;
+ if (isa<ReturnInst>(I) || GetBasicInstructionClass(I) == IC_RetainRV)
+ return;
+ if (isa<BitCastInst>(I))
+ Users.push_back(I);
+ }
+ } while (!Users.empty());
Changed = true;
++NumPeeps;
@@ -1953,7 +2016,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
case IC_DestroyWeak: {
CallInst *CI = cast<CallInst>(Inst);
if (isNullOrUndef(CI->getArgOperand(0))) {
- const Type *Ty = CI->getArgOperand(0)->getType();
+ Type *Ty = CI->getArgOperand(0)->getType();
new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
Constant::getNullValue(Ty),
CI);
@@ -1968,7 +2031,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
CallInst *CI = cast<CallInst>(Inst);
if (isNullOrUndef(CI->getArgOperand(0)) ||
isNullOrUndef(CI->getArgOperand(1))) {
- const Type *Ty = CI->getArgOperand(0)->getType();
+ Type *Ty = CI->getArgOperand(0)->getType();
new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
Constant::getNullValue(Ty),
CI);
@@ -2090,7 +2153,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
++NumPartialNoops;
// Clone the call into each predecessor that has a non-null value.
CallInst *CInst = cast<CallInst>(Inst);
- const Type *ParamTy = CInst->getArgOperand(0)->getType();
+ Type *ParamTy = CInst->getArgOperand(0)->getType();
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *Incoming =
StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
@@ -2132,41 +2195,49 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
bool SomeSuccHasSame = false;
bool AllSuccsHaveSame = true;
- for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI)
- switch (BBStates[*SI].getPtrBottomUpState(Arg).GetSeq()) {
+ PtrState &S = MyStates.getPtrTopDownState(Arg);
+ for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) {
+ PtrState &SuccS = BBStates[*SI].getPtrBottomUpState(Arg);
+ switch (SuccS.GetSeq()) {
case S_None:
- case S_CanRelease:
- MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
- SomeSuccHasSame = false;
- break;
+ case S_CanRelease: {
+ if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe)
+ S.ClearSequenceProgress();
+ continue;
+ }
case S_Use:
SomeSuccHasSame = true;
break;
case S_Stop:
case S_Release:
case S_MovableRelease:
- AllSuccsHaveSame = false;
+ if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe)
+ AllSuccsHaveSame = false;
break;
case S_Retain:
llvm_unreachable("bottom-up pointer in retain state!");
}
+ }
// If the state at the other end of any of the successor edges
// matches the current state, require all edges to match. This
// guards against loops in the middle of a sequence.
if (SomeSuccHasSame && !AllSuccsHaveSame)
- MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
+ S.ClearSequenceProgress();
}
case S_CanRelease: {
const Value *Arg = I->first;
const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
bool SomeSuccHasSame = false;
bool AllSuccsHaveSame = true;
- for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI)
- switch (BBStates[*SI].getPtrBottomUpState(Arg).GetSeq()) {
- case S_None:
- MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
- SomeSuccHasSame = false;
- break;
+ PtrState &S = MyStates.getPtrTopDownState(Arg);
+ for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) {
+ PtrState &SuccS = BBStates[*SI].getPtrBottomUpState(Arg);
+ switch (SuccS.GetSeq()) {
+ case S_None: {
+ if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe)
+ S.ClearSequenceProgress();
+ continue;
+ }
case S_CanRelease:
SomeSuccHasSame = true;
break;
@@ -2174,16 +2245,18 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
case S_Release:
case S_MovableRelease:
case S_Use:
- AllSuccsHaveSame = false;
+ if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe)
+ AllSuccsHaveSame = false;
break;
case S_Retain:
llvm_unreachable("bottom-up pointer in retain state!");
}
+ }
// If the state at the other end of any of the successor edges
// matches the current state, require all edges to match. This
// guards against loops in the middle of a sequence.
if (SomeSuccHasSame && !AllSuccsHaveSame)
- MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
+ S.ClearSequenceProgress();
}
}
}
@@ -2207,6 +2280,8 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
if (Succ == BB)
continue;
DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ);
+ // If we haven't seen this node yet, then we've found a CFG cycle.
+ // Be optimistic here; it's CheckForCFGHazards' job detect trouble.
if (I == BBStates.end())
continue;
MyStates.InitFromSucc(I->second);
@@ -2245,11 +2320,12 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
S.SetSeqToRelease(Inst->getMetadata(ImpreciseReleaseMDKind));
S.RRI.clear();
- S.RRI.KnownIncremented = S.IsKnownIncremented();
+ S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented();
S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
S.RRI.Calls.insert(Inst);
S.IncrementRefCount();
+ S.IncrementNestCount();
break;
}
case IC_RetainBlock:
@@ -2259,6 +2335,13 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
PtrState &S = MyStates.getPtrBottomUpState(Arg);
S.DecrementRefCount();
+ S.SetAtLeastOneRefCount();
+ S.DecrementNestCount();
+
+ // An objc_retainBlock call with just a use still needs to be kept,
+ // because it may be copying a block from the stack to the heap.
+ if (Class == IC_RetainBlock && S.GetSeq() == S_Use)
+ S.SetSeq(S_CanRelease);
switch (S.GetSeq()) {
case S_Stop:
@@ -2281,7 +2364,7 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
case S_Retain:
llvm_unreachable("bottom-up pointer in retain state!");
}
- break;
+ continue;
}
case IC_AutoreleasepoolPop:
// Conservatively, clear MyStates for all known pointers.
@@ -2305,26 +2388,22 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
PtrState &S = MI->second;
Sequence Seq = S.GetSeq();
- // Check for possible retains and releases.
+ // Check for possible releases.
if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
- // Check for a retain (we're going bottom-up here).
S.DecrementRefCount();
-
- // Check for a release.
- if (!IsRetain(Class) && Class != IC_RetainBlock)
- switch (Seq) {
- case S_Use:
- S.SetSeq(S_CanRelease);
- continue;
- case S_CanRelease:
- case S_Release:
- case S_MovableRelease:
- case S_Stop:
- case S_None:
- break;
- case S_Retain:
- llvm_unreachable("bottom-up pointer in retain state!");
- }
+ switch (Seq) {
+ case S_Use:
+ S.SetSeq(S_CanRelease);
+ continue;
+ case S_CanRelease:
+ case S_Release:
+ case S_MovableRelease:
+ case S_Stop:
+ case S_None:
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
}
// Check for possible direct uses.
@@ -2332,14 +2411,14 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
case S_Release:
case S_MovableRelease:
if (CanUse(Inst, Ptr, PA, Class)) {
- S.RRI.ReverseInsertPts.clear();
+ assert(S.RRI.ReverseInsertPts.empty());
S.RRI.ReverseInsertPts.insert(Inst);
S.SetSeq(S_Use);
} else if (Seq == S_Release &&
(Class == IC_User || Class == IC_CallOrUser)) {
// Non-movable releases depend on any possible objc pointer use.
S.SetSeq(S_Stop);
- S.RRI.ReverseInsertPts.clear();
+ assert(S.RRI.ReverseInsertPts.empty());
S.RRI.ReverseInsertPts.insert(Inst);
}
break;
@@ -2378,14 +2457,18 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
if (Pred == BB)
continue;
DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
- if (I == BBStates.end())
+ assert(I != BBStates.end());
+ // If we haven't seen this node yet, then we've found a CFG cycle.
+ // Be optimistic here; it's CheckForCFGHazards' job detect trouble.
+ if (!I->second.isVisitedTopDown())
continue;
MyStates.InitFromPred(I->second);
while (PI != PE) {
Pred = *PI++;
if (Pred != BB) {
I = BBStates.find(Pred);
- if (I != BBStates.end())
+ assert(I != BBStates.end());
+ if (I->second.isVisitedTopDown())
MyStates.MergePred(I->second);
}
}
@@ -2422,18 +2505,23 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
S.SetSeq(S_Retain);
S.RRI.clear();
S.RRI.IsRetainBlock = Class == IC_RetainBlock;
- S.RRI.KnownIncremented = S.IsKnownIncremented();
+ // Don't check S.IsKnownIncremented() here because it's not
+ // sufficient.
+ S.RRI.KnownSafe = S.IsKnownNested();
S.RRI.Calls.insert(Inst);
}
+ S.SetAtLeastOneRefCount();
S.IncrementRefCount();
- break;
+ S.IncrementNestCount();
+ continue;
}
case IC_Release: {
Arg = GetObjCArg(Inst);
PtrState &S = MyStates.getPtrTopDownState(Arg);
S.DecrementRefCount();
+ S.DecrementNestCount();
switch (S.GetSeq()) {
case S_Retain:
@@ -2478,16 +2566,12 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
Sequence Seq = S.GetSeq();
// Check for possible releases.
- if (!IsRetain(Class) && Class != IC_RetainBlock &&
- CanAlterRefCount(Inst, Ptr, PA, Class)) {
- // Check for a release.
+ if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
S.DecrementRefCount();
-
- // Check for a release.
switch (Seq) {
case S_Retain:
S.SetSeq(S_CanRelease);
- S.RRI.ReverseInsertPts.clear();
+ assert(S.RRI.ReverseInsertPts.empty());
S.RRI.ReverseInsertPts.insert(Inst);
// One call can't cause a transition from S_Retain to S_CanRelease
@@ -2511,8 +2595,18 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
if (CanUse(Inst, Ptr, PA, Class))
S.SetSeq(S_Use);
break;
- case S_Use:
case S_Retain:
+ // An objc_retainBlock call may be responsible for copying the block
+ // data from the stack to the heap. Model this by moving it straight
+ // from S_Retain to S_Use.
+ if (S.RRI.IsRetainBlock &&
+ CanUse(Inst, Ptr, PA, Class)) {
+ assert(S.RRI.ReverseInsertPts.empty());
+ S.RRI.ReverseInsertPts.insert(Inst);
+ S.SetSeq(S_Use);
+ }
+ break;
+ case S_Use:
case S_None:
break;
case S_Stop:
@@ -2533,28 +2627,43 @@ ObjCARCOpt::Visit(Function &F,
DenseMap<const BasicBlock *, BBState> &BBStates,
MapVector<Value *, RRInfo> &Retains,
DenseMap<Value *, RRInfo> &Releases) {
- // Use postorder for bottom-up, and reverse-postorder for top-down, because we
+ // Use reverse-postorder on the reverse CFG for bottom-up, because we
// magically know that loops will be well behaved, i.e. they won't repeatedly
- // call retain on a single pointer without doing a release.
+ // call retain on a single pointer without doing a release. We can't use
+ // ReversePostOrderTraversal here because we want to walk up from each
+ // function exit point.
+ SmallPtrSet<BasicBlock *, 16> Visited;
+ SmallVector<std::pair<BasicBlock *, pred_iterator>, 16> Stack;
+ SmallVector<BasicBlock *, 16> Order;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ BasicBlock *BB = I;
+ if (BB->getTerminator()->getNumSuccessors() == 0)
+ Stack.push_back(std::make_pair(BB, pred_begin(BB)));
+ }
+ while (!Stack.empty()) {
+ pred_iterator End = pred_end(Stack.back().first);
+ while (Stack.back().second != End) {
+ BasicBlock *BB = *Stack.back().second++;
+ if (Visited.insert(BB))
+ Stack.push_back(std::make_pair(BB, pred_begin(BB)));
+ }
+ Order.push_back(Stack.pop_back_val().first);
+ }
bool BottomUpNestingDetected = false;
- SmallVector<BasicBlock *, 8> PostOrder;
- for (po_iterator<Function *> I = po_begin(&F), E = po_end(&F); I != E; ++I) {
+ for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I =
+ Order.rbegin(), E = Order.rend(); I != E; ++I) {
BasicBlock *BB = *I;
- PostOrder.push_back(BB);
-
BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains);
}
- // Iterate through the post-order in reverse order, achieving a
- // reverse-postorder traversal. We don't use the ReversePostOrderTraversal
- // class here because it works by computing its own full postorder iteration,
- // recording the sequence, and playing it back in reverse. Since we're already
- // doing a full iteration above, we can just record the sequence manually and
- // avoid the cost of having ReversePostOrderTraversal compute it.
+ // Use regular reverse-postorder for top-down.
bool TopDownNestingDetected = false;
- for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator
- RI = PostOrder.rbegin(), RE = PostOrder.rend(); RI != RE; ++RI)
- TopDownNestingDetected |= VisitTopDown(*RI, BBStates, Releases);
+ typedef ReversePostOrderTraversal<Function *> RPOTType;
+ RPOTType RPOT(&F);
+ for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
+ BasicBlock *BB = *I;
+ TopDownNestingDetected |= VisitTopDown(BB, BBStates, Releases);
+ }
return TopDownNestingDetected && BottomUpNestingDetected;
}
@@ -2565,12 +2674,10 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
RRInfo &ReleasesToMove,
MapVector<Value *, RRInfo> &Retains,
DenseMap<Value *, RRInfo> &Releases,
- SmallVectorImpl<Instruction *> &DeadInsts) {
- const Type *ArgTy = Arg->getType();
- const Type *ParamTy =
- (RetainRVFunc ? RetainRVFunc :
- RetainFunc ? RetainFunc :
- RetainBlockFunc)->arg_begin()->getType();
+ SmallVectorImpl<Instruction *> &DeadInsts,
+ Module *M) {
+ Type *ArgTy = Arg->getType();
+ Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext()));
// Insert the new retain and release calls.
for (SmallPtrSet<Instruction *, 2>::const_iterator
@@ -2581,7 +2688,7 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
new BitCastInst(Arg, ParamTy, "", InsertPt);
CallInst *Call =
CallInst::Create(RetainsToMove.IsRetainBlock ?
- RetainBlockFunc : RetainFunc,
+ getRetainBlockCallee(M) : getRetainCallee(M),
MyArg, "", InsertPt);
Call->setDoesNotThrow();
if (!RetainsToMove.IsRetainBlock)
@@ -2598,8 +2705,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
// The invoke's return value isn't available in the unwind block,
// but our releases will never depend on it, because they must be
// paired with retains from before the invoke.
- InsertPts[0] = II->getNormalDest()->getFirstNonPHI();
- InsertPts[1] = II->getUnwindDest()->getFirstNonPHI();
+ InsertPts[0] = II->getNormalDest()->getFirstInsertionPt();
+ InsertPts[1] = II->getUnwindDest()->getFirstInsertionPt();
} else {
// Insert code immediately after the last use.
InsertPts[0] = llvm::next(BasicBlock::iterator(LastUse));
@@ -2609,7 +2716,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
Instruction *InsertPt = *I;
Value *MyArg = ArgTy == ParamTy ? Arg :
new BitCastInst(Arg, ParamTy, "", InsertPt);
- CallInst *Call = CallInst::Create(ReleaseFunc, MyArg, "", InsertPt);
+ CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg,
+ "", InsertPt);
// Attach a clang.imprecise_release metadata tag, if appropriate.
if (MDNode *M = ReleasesToMove.ReleaseMetadata)
Call->setMetadata(ImpreciseReleaseMDKind, M);
@@ -2640,7 +2748,8 @@ bool
ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
&BBStates,
MapVector<Value *, RRInfo> &Retains,
- DenseMap<Value *, RRInfo> &Releases) {
+ DenseMap<Value *, RRInfo> &Releases,
+ Module *M) {
bool AnyPairsCompletelyEliminated = false;
RRInfo RetainsToMove;
RRInfo ReleasesToMove;
@@ -2649,21 +2758,36 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
SmallVector<Instruction *, 8> DeadInsts;
for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(),
- E = Retains.end(); I != E; ) {
- Value *V = (I++)->first;
+ E = Retains.end(); I != E; ++I) {
+ Value *V = I->first;
if (!V) continue; // blotted
Instruction *Retain = cast<Instruction>(V);
Value *Arg = GetObjCArg(Retain);
- // If the object being released is in static or stack storage, we know it's
+ // If the object being released is in static storage, we know it's
// not being managed by ObjC reference counting, so we can delete pairs
// regardless of what possible decrements or uses lie between them.
- bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg);
+ bool KnownSafe = isa<Constant>(Arg);
+
+ // Same for stack storage, unless this is an objc_retainBlock call,
+ // which is responsible for copying the block data from the stack to
+ // the heap.
+ if (!I->second.IsRetainBlock && isa<AllocaInst>(Arg))
+ KnownSafe = true;
+
+ // A constant pointer can't be pointing to an object on the heap. It may
+ // be reference-counted, but it won't be deleted.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(Arg))
+ if (const GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(
+ StripPointerCastsAndObjCCalls(LI->getPointerOperand())))
+ if (GV->isConstant())
+ KnownSafe = true;
// If a pair happens in a region where it is known that the reference count
// is already incremented, we can similarly ignore possible decrements.
- bool KnownIncrementedTD = true, KnownIncrementedBU = true;
+ bool KnownSafeTD = true, KnownSafeBU = true;
// Connect the dots between the top-down-collected RetainsToMove and
// bottom-up-collected ReleasesToMove to form sets of related calls.
@@ -2683,7 +2807,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
MapVector<Value *, RRInfo>::const_iterator It = Retains.find(NewRetain);
assert(It != Retains.end());
const RRInfo &NewRetainRRI = It->second;
- KnownIncrementedTD &= NewRetainRRI.KnownIncremented;
+ KnownSafeTD &= NewRetainRRI.KnownSafe;
for (SmallPtrSet<Instruction *, 2>::const_iterator
LI = NewRetainRRI.Calls.begin(),
LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) {
@@ -2739,7 +2863,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
Releases.find(NewRelease);
assert(It != Releases.end());
const RRInfo &NewReleaseRRI = It->second;
- KnownIncrementedBU &= NewReleaseRRI.KnownIncremented;
+ KnownSafeBU &= NewReleaseRRI.KnownSafe;
for (SmallPtrSet<Instruction *, 2>::const_iterator
LI = NewReleaseRRI.Calls.begin(),
LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) {
@@ -2787,12 +2911,19 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
if (NewRetains.empty()) break;
}
- // If the pointer is known incremented, we can safely delete the pair
- // regardless of what's between them.
- if (KnownIncrementedTD || KnownIncrementedBU) {
+ // If the pointer is known incremented or nested, we can safely delete the
+ // pair regardless of what's between them.
+ if (KnownSafeTD || KnownSafeBU) {
RetainsToMove.ReverseInsertPts.clear();
ReleasesToMove.ReverseInsertPts.clear();
NewCount = 0;
+ } else {
+ // Determine whether the new insertion points we computed preserve the
+ // balance of retain and release calls through the program.
+ // TODO: If the fully aggressive solution isn't valid, try to find a
+ // less aggressive solution which is.
+ if (NewDelta != 0)
+ goto next_retain;
}
// Determine whether the original call points are balanced in the retain and
@@ -2803,18 +2934,12 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
if (OldDelta != 0)
goto next_retain;
- // Determine whether the new insertion points we computed preserve the
- // balance of retain and release calls through the program.
- // TODO: If the fully aggressive solution isn't valid, try to find a
- // less aggressive solution which is.
- if (NewDelta != 0)
- goto next_retain;
-
// Ok, everything checks out and we're all set. Let's move some code!
Changed = true;
AnyPairsCompletelyEliminated = NewCount == 0;
NumRRs += OldCount - NewCount;
- MoveCalls(Arg, RetainsToMove, ReleasesToMove, Retains, Releases, DeadInsts);
+ MoveCalls(Arg, RetainsToMove, ReleasesToMove,
+ Retains, Releases, DeadInsts, M);
next_retain:
NewReleases.clear();
@@ -2993,7 +3118,8 @@ bool ObjCARCOpt::OptimizeSequences(Function &F) {
bool NestingDetected = Visit(F, BBStates, Retains, Releases);
// Transform.
- return PerformCodePlacement(BBStates, Retains, Releases) && NestingDetected;
+ return PerformCodePlacement(BBStates, Retains, Releases, F.getParent()) &&
+ NestingDetected;
}
/// OptimizeReturns - Look for this pattern:
@@ -3072,7 +3198,8 @@ void ObjCARCOpt::OptimizeReturns(Function &F) {
// Check that there is nothing that can affect the reference
// count between the retain and the call.
- FindDependencies(CanChangeRetainCount, Arg, BB, Retain,
+ // Note that Retain need not be in BB.
+ FindDependencies(CanChangeRetainCount, Arg, Retain->getParent(), Retain,
DependingInstructions, Visited, PA);
if (DependingInstructions.size() != 1)
goto next_block;
@@ -3117,12 +3244,6 @@ bool ObjCARCOpt::doInitialization(Module &M) {
ImpreciseReleaseMDKind =
M.getContext().getMDKindID("clang.imprecise_release");
- // Identify the declarations for objc_retain and friends.
- RetainFunc = M.getFunction("objc_retain");
- RetainBlockFunc = M.getFunction("objc_retainBlock");
- RetainRVFunc = M.getFunction("objc_retainAutoreleasedReturnValue");
- ReleaseFunc = M.getFunction("objc_release");
-
// Intuitively, objc_retain and others are nocapture, however in practice
// they are not, because they return their argument value. And objc_release
// calls finalizers.
@@ -3132,6 +3253,7 @@ bool ObjCARCOpt::doInitialization(Module &M) {
AutoreleaseRVCallee = 0;
ReleaseCallee = 0;
RetainCallee = 0;
+ RetainBlockCallee = 0;
AutoreleaseCallee = 0;
return false;
@@ -3294,7 +3416,7 @@ Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) {
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
std::vector<Type *> Params;
Params.push_back(I8X);
- const FunctionType *FTy =
+ FunctionType *FTy =
FunctionType::get(I8X, Params, /*isVarArg=*/false);
AttrListPtr Attributes;
Attributes.addAttr(~0u, Attribute::NoUnwind);
@@ -3310,7 +3432,7 @@ Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) {
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
std::vector<Type *> Params;
Params.push_back(I8X);
- const FunctionType *FTy =
+ FunctionType *FTy =
FunctionType::get(I8X, Params, /*isVarArg=*/false);
AttrListPtr Attributes;
Attributes.addAttr(~0u, Attribute::NoUnwind);
@@ -3377,7 +3499,7 @@ ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
void ObjCARCContract::ContractRelease(Instruction *Release,
inst_iterator &Iter) {
LoadInst *Load = dyn_cast<LoadInst>(GetObjCArg(Release));
- if (!Load || Load->isVolatile()) return;
+ if (!Load || !Load->isSimple()) return;
// For now, require everything to be in one basic block.
BasicBlock *BB = Release->getParent();
@@ -3393,7 +3515,7 @@ void ObjCARCContract::ContractRelease(Instruction *Release,
!(AA->getModRefInfo(I, Loc) & AliasAnalysis::Mod)))
++I;
StoreInst *Store = dyn_cast<StoreInst>(I);
- if (!Store || Store->isVolatile()) return;
+ if (!Store || !Store->isSimple()) return;
if (Store->getPointerOperand() != Loc.Ptr) return;
Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand());
@@ -3411,8 +3533,8 @@ void ObjCARCContract::ContractRelease(Instruction *Release,
++NumStoreStrongs;
LLVMContext &C = Release->getContext();
- const Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
- const Type *I8XX = PointerType::getUnqual(I8X);
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ Type *I8XX = PointerType::getUnqual(I8X);
Value *Args[] = { Load->getPointerOperand(), New };
if (Args[0]->getType() != I8XX)
@@ -3548,7 +3670,7 @@ bool ObjCARCContract::runOnFunction(Function &F) {
if (Inst != UserInst && DT->dominates(Inst, UserInst)) {
Changed = true;
Instruction *Replacement = Inst;
- const Type *UseTy = U.get()->getType();
+ Type *UseTy = U.get()->getType();
if (PHINode *PHI = dyn_cast<PHINode>(UserInst)) {
// For PHI nodes, insert the bitcast in the predecessor block.
unsigned ValNo =
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index e6341ae..8f98a5b 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -309,7 +309,7 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I,
std::swap(LHS, RHS);
bool Success = !I->swapOperands();
assert(Success && "swapOperands failed");
- Success = false;
+ (void)Success;
MadeChange = true;
} else if (RHSBO) {
// Turn (A+B)+(C+D) -> (((A+B)+C)+D). This guarantees the RHS is not
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 083412e..196a847 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -156,7 +156,7 @@ namespace {
///
class SCCPSolver : public InstVisitor<SCCPSolver> {
const TargetData *TD;
- SmallPtrSet<BasicBlock*, 8> BBExecutable;// The BBs that are executable.
+ SmallPtrSet<BasicBlock*, 8> BBExecutable; // The BBs that are executable.
DenseMap<Value*, LatticeVal> ValueState; // The state each value is in.
/// StructValueState - This maintains ValueState for values that have
@@ -241,7 +241,7 @@ public:
/// this method must be called.
void AddTrackedFunction(Function *F) {
// Add an entry, F -> undef.
- if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) {
+ if (StructType *STy = dyn_cast<StructType>(F->getReturnType())) {
MRVFunctionsTracked.insert(F);
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
TrackedMultipleRetVals.insert(std::make_pair(std::make_pair(F, i),
@@ -302,7 +302,7 @@ public:
/// markAnythingOverdefined - Mark the specified value overdefined. This
/// works with both scalars and structs.
void markAnythingOverdefined(Value *V) {
- if (const StructType *STy = dyn_cast<StructType>(V->getType()))
+ if (StructType *STy = dyn_cast<StructType>(V->getType()))
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
markOverdefined(getStructValueState(V, i), V);
else
@@ -417,7 +417,7 @@ private:
else if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C))
LV.markConstant(CS->getOperand(i)); // Constants are constant.
else if (isa<ConstantAggregateZero>(C)) {
- const Type *FieldTy = cast<StructType>(V->getType())->getElementType(i);
+ Type *FieldTy = cast<StructType>(V->getType())->getElementType(i);
LV.markConstant(Constant::getNullValue(FieldTy));
} else
LV.markOverdefined(); // Unknown sort of constant.
@@ -471,9 +471,9 @@ private:
/// UsersOfOverdefinedPHIs map for PN, remove them now.
void RemoveFromOverdefinedPHIs(Instruction *I, PHINode *PN) {
if (UsersOfOverdefinedPHIs.empty()) return;
- std::multimap<PHINode*, Instruction*>::iterator It, E;
- tie(It, E) = UsersOfOverdefinedPHIs.equal_range(PN);
- while (It != E) {
+ typedef std::multimap<PHINode*, Instruction*>::iterator ItTy;
+ std::pair<ItTy, ItTy> Range = UsersOfOverdefinedPHIs.equal_range(PN);
+ for (ItTy It = Range.first, E = Range.second; It != E;) {
if (It->second == I)
UsersOfOverdefinedPHIs.erase(It++);
else
@@ -486,9 +486,9 @@ private:
/// (Duplicate entries do not break anything directly, but can lead to
/// exponential growth of the table in rare cases.)
void InsertInOverdefinedPHIs(Instruction *I, PHINode *PN) {
- std::multimap<PHINode*, Instruction*>::iterator J, E;
- tie(J, E) = UsersOfOverdefinedPHIs.equal_range(PN);
- for (; J != E; ++J)
+ typedef std::multimap<PHINode*, Instruction*>::iterator ItTy;
+ std::pair<ItTy, ItTy> Range = UsersOfOverdefinedPHIs.equal_range(PN);
+ for (ItTy J = Range.first, E = Range.second; J != E; ++J)
if (J->second == I)
return;
UsersOfOverdefinedPHIs.insert(std::make_pair(PN, I));
@@ -515,6 +515,7 @@ private:
void visitShuffleVectorInst(ShuffleVectorInst &I);
void visitExtractValueInst(ExtractValueInst &EVI);
void visitInsertValueInst(InsertValueInst &IVI);
+ void visitLandingPadInst(LandingPadInst &I) { markAnythingOverdefined(&I); }
// Instructions that cannot be folded away.
void visitStoreInst (StoreInst &I);
@@ -528,8 +529,12 @@ private:
visitTerminatorInst(II);
}
void visitCallSite (CallSite CS);
+ void visitResumeInst (TerminatorInst &I) { /*returns void*/ }
void visitUnwindInst (TerminatorInst &I) { /*returns void*/ }
void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ }
+ void visitFenceInst (FenceInst &I) { /*returns void*/ }
+ void visitAtomicCmpXchgInst (AtomicCmpXchgInst &I) { markOverdefined(&I); }
+ void visitAtomicRMWInst (AtomicRMWInst &I) { markOverdefined(&I); }
void visitAllocaInst (Instruction &I) { markOverdefined(&I); }
void visitVAArgInst (Instruction &I) { markAnythingOverdefined(&I); }
@@ -577,6 +582,10 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
}
if (SwitchInst *SI = dyn_cast<SwitchInst>(&TI)) {
+ if (TI.getNumSuccessors() < 2) {
+ Succs[0] = true;
+ return;
+ }
LatticeVal SCValue = getValueState(SI->getCondition());
ConstantInt *CI = SCValue.getConstantInt();
@@ -637,6 +646,9 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
return true;
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ if (SI->getNumSuccessors() < 2)
+ return true;
+
LatticeVal SCValue = getValueState(SI->getCondition());
ConstantInt *CI = SCValue.getConstantInt();
@@ -692,13 +704,14 @@ void SCCPSolver::visitPHINode(PHINode &PN) {
// There may be instructions using this PHI node that are not overdefined
// themselves. If so, make sure that they know that the PHI node operand
// changed.
- std::multimap<PHINode*, Instruction*>::iterator I, E;
- tie(I, E) = UsersOfOverdefinedPHIs.equal_range(&PN);
- if (I == E)
+ typedef std::multimap<PHINode*, Instruction*>::iterator ItTy;
+ std::pair<ItTy, ItTy> Range = UsersOfOverdefinedPHIs.equal_range(&PN);
+
+ if (Range.first == Range.second)
return;
SmallVector<Instruction*, 16> Users;
- for (; I != E; ++I)
+ for (ItTy I = Range.first, E = Range.second; I != E; ++I)
Users.push_back(I->second);
while (!Users.empty())
visit(Users.pop_back_val());
@@ -772,7 +785,7 @@ void SCCPSolver::visitReturnInst(ReturnInst &I) {
// Handle functions that return multiple values.
if (!TrackedMultipleRetVals.empty()) {
- if (const StructType *STy = dyn_cast<StructType>(ResultOp->getType()))
+ if (StructType *STy = dyn_cast<StructType>(ResultOp->getType()))
if (MRVFunctionsTracked.count(F))
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
mergeInValue(TrackedMultipleRetVals[std::make_pair(F, i)], F,
@@ -825,7 +838,7 @@ void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
}
void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
- const StructType *STy = dyn_cast<StructType>(IVI.getType());
+ StructType *STy = dyn_cast<StructType>(IVI.getType());
if (STy == 0)
return markOverdefined(&IVI);
@@ -925,7 +938,7 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
// Could annihilate value.
if (I.getOpcode() == Instruction::And)
markConstant(IV, &I, Constant::getNullValue(I.getType()));
- else if (const VectorType *PT = dyn_cast<VectorType>(I.getType()))
+ else if (VectorType *PT = dyn_cast<VectorType>(I.getType()))
markConstant(IV, &I, Constant::getAllOnesValue(PT));
else
markConstant(IV, &I,
@@ -1179,8 +1192,8 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
}
Constant *Ptr = Operands[0];
- markConstant(&I, ConstantExpr::getGetElementPtr(Ptr, &Operands[0]+1,
- Operands.size()-1));
+ ArrayRef<Constant *> Indices(Operands.begin() + 1, Operands.end());
+ markConstant(&I, ConstantExpr::getGetElementPtr(Ptr, Indices));
}
void SCCPSolver::visitStoreInst(StoreInst &SI) {
@@ -1278,7 +1291,7 @@ CallOverdefined:
// If we can constant fold this, mark the result of the call as a
// constant.
- if (Constant *C = ConstantFoldCall(F, Operands.data(), Operands.size()))
+ if (Constant *C = ConstantFoldCall(F, Operands))
return markConstant(I, C);
}
@@ -1303,7 +1316,7 @@ CallOverdefined:
continue;
}
- if (const StructType *STy = dyn_cast<StructType>(AI->getType())) {
+ if (StructType *STy = dyn_cast<StructType>(AI->getType())) {
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
LatticeVal CallArg = getStructValueState(*CAI, i);
mergeInValue(getStructValueState(AI, i), AI, CallArg);
@@ -1315,7 +1328,7 @@ CallOverdefined:
}
// If this is a single/zero retval case, see if we're tracking the function.
- if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) {
+ if (StructType *STy = dyn_cast<StructType>(F->getReturnType())) {
if (!MRVFunctionsTracked.count(F))
goto CallOverdefined; // Not tracking this callee.
@@ -1419,67 +1432,116 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// Look for instructions which produce undef values.
if (I->getType()->isVoidTy()) continue;
- if (const StructType *STy = dyn_cast<StructType>(I->getType())) {
- // Only a few things that can be structs matter for undef. Just send
- // all their results to overdefined. We could be more precise than this
- // but it isn't worth bothering.
- if (isa<CallInst>(I) || isa<SelectInst>(I)) {
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- LatticeVal &LV = getStructValueState(I, i);
- if (LV.isUndefined())
- markOverdefined(LV, I);
- }
+ if (StructType *STy = dyn_cast<StructType>(I->getType())) {
+ // Only a few things that can be structs matter for undef.
+
+ // Tracked calls must never be marked overdefined in ResolvedUndefsIn.
+ if (CallSite CS = CallSite(I))
+ if (Function *F = CS.getCalledFunction())
+ if (MRVFunctionsTracked.count(F))
+ continue;
+
+ // extractvalue and insertvalue don't need to be marked; they are
+ // tracked as precisely as their operands.
+ if (isa<ExtractValueInst>(I) || isa<InsertValueInst>(I))
+ continue;
+
+ // Send the results of everything else to overdefined. We could be
+ // more precise than this but it isn't worth bothering.
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ LatticeVal &LV = getStructValueState(I, i);
+ if (LV.isUndefined())
+ markOverdefined(LV, I);
}
continue;
}
-
+
LatticeVal &LV = getValueState(I);
if (!LV.isUndefined()) continue;
- // No instructions using structs need disambiguation.
- if (I->getOperand(0)->getType()->isStructTy())
+ // extractvalue is safe; check here because the argument is a struct.
+ if (isa<ExtractValueInst>(I))
continue;
- // Get the lattice values of the first two operands for use below.
+ // Compute the operand LatticeVals, for convenience below.
+ // Anything taking a struct is conservatively assumed to require
+ // overdefined markings.
+ if (I->getOperand(0)->getType()->isStructTy()) {
+ markOverdefined(I);
+ return true;
+ }
LatticeVal Op0LV = getValueState(I->getOperand(0));
LatticeVal Op1LV;
if (I->getNumOperands() == 2) {
- // No instructions using structs need disambiguation.
- if (I->getOperand(1)->getType()->isStructTy())
- continue;
-
- // If this is a two-operand instruction, and if both operands are
- // undefs, the result stays undef.
+ if (I->getOperand(1)->getType()->isStructTy()) {
+ markOverdefined(I);
+ return true;
+ }
+
Op1LV = getValueState(I->getOperand(1));
- if (Op0LV.isUndefined() && Op1LV.isUndefined())
- continue;
}
-
// If this is an instructions whose result is defined even if the input is
// not fully defined, propagate the information.
- const Type *ITy = I->getType();
+ Type *ITy = I->getType();
switch (I->getOpcode()) {
- default: break; // Leave the instruction as an undef.
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast:
+ break; // Any undef -> undef
+ case Instruction::FSub:
+ case Instruction::FAdd:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ // Floating-point binary operation: be conservative.
+ if (Op0LV.isUndefined() && Op1LV.isUndefined())
+ markForcedConstant(I, Constant::getNullValue(ITy));
+ else
+ markOverdefined(I);
+ return true;
case Instruction::ZExt:
- // After a zero extend, we know the top part is zero. SExt doesn't have
- // to be handled here, because we don't know whether the top part is 1's
- // or 0's.
- case Instruction::SIToFP: // some FP values are not possible, just use 0.
- case Instruction::UIToFP: // some FP values are not possible, just use 0.
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ // undef -> 0; some outputs are impossible
markForcedConstant(I, Constant::getNullValue(ITy));
return true;
case Instruction::Mul:
case Instruction::And:
+ // Both operands undef -> undef
+ if (Op0LV.isUndefined() && Op1LV.isUndefined())
+ break;
// undef * X -> 0. X could be zero.
// undef & X -> 0. X could be zero.
markForcedConstant(I, Constant::getNullValue(ITy));
return true;
case Instruction::Or:
+ // Both operands undef -> undef
+ if (Op0LV.isUndefined() && Op1LV.isUndefined())
+ break;
// undef | X -> -1. X could be -1.
markForcedConstant(I, Constant::getAllOnesValue(ITy));
return true;
+ case Instruction::Xor:
+ // undef ^ undef -> 0; strictly speaking, this is not strictly
+ // necessary, but we try to be nice to people who expect this
+ // behavior in simple cases
+ if (Op0LV.isUndefined() && Op1LV.isUndefined()) {
+ markForcedConstant(I, Constant::getNullValue(ITy));
+ return true;
+ }
+ // undef ^ X -> undef
+ break;
+
case Instruction::SDiv:
case Instruction::UDiv:
case Instruction::SRem:
@@ -1494,26 +1556,24 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
return true;
case Instruction::AShr:
- // undef >>s X -> undef. No change.
- if (Op0LV.isUndefined()) break;
-
- // X >>s undef -> X. X could be 0, X could have the high-bit known set.
- if (Op0LV.isConstant())
- markForcedConstant(I, Op0LV.getConstant());
- else
- markOverdefined(I);
+ // X >>a undef -> undef.
+ if (Op1LV.isUndefined()) break;
+
+ // undef >>a X -> all ones
+ markForcedConstant(I, Constant::getAllOnesValue(ITy));
return true;
case Instruction::LShr:
case Instruction::Shl:
- // undef >> X -> undef. No change.
- // undef << X -> undef. No change.
- if (Op0LV.isUndefined()) break;
-
- // X >> undef -> 0. X could be 0.
- // X << undef -> 0. X could be 0.
+ // X << undef -> undef.
+ // X >> undef -> undef.
+ if (Op1LV.isUndefined()) break;
+
+ // undef << X -> 0
+ // undef >> X -> 0
markForcedConstant(I, Constant::getNullValue(ITy));
return true;
case Instruction::Select:
+ Op1LV = getValueState(I->getOperand(1));
// undef ? X : Y -> X or Y. There could be commonality between X/Y.
if (Op0LV.isUndefined()) {
if (!Op1LV.isConstant()) // Pick the constant one if there is any.
@@ -1533,9 +1593,35 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
else
markOverdefined(I);
return true;
+ case Instruction::Load:
+ // A load here means one of two things: a load of undef from a global,
+ // a load from an unknown pointer. Either way, having it return undef
+ // is okay.
+ break;
+ case Instruction::ICmp:
+ // X == undef -> undef. Other comparisons get more complicated.
+ if (cast<ICmpInst>(I)->isEquality())
+ break;
+ markOverdefined(I);
+ return true;
case Instruction::Call:
- // If a call has an undef result, it is because it is constant foldable
- // but one of the inputs was undef. Just force the result to
+ case Instruction::Invoke: {
+ // There are two reasons a call can have an undef result
+ // 1. It could be tracked.
+ // 2. It could be constant-foldable.
+ // Because of the way we solve return values, tracked calls must
+ // never be marked overdefined in ResolvedUndefsIn.
+ if (Function *F = CallSite(I).getCalledFunction())
+ if (TrackedRetVals.count(F))
+ break;
+
+ // If the call is constant-foldable, we mark it overdefined because
+ // we do not know what return values are valid.
+ markOverdefined(I);
+ return true;
+ }
+ default:
+ // If we don't know what should happen here, conservatively mark it
// overdefined.
markOverdefined(I);
return true;
@@ -1621,15 +1707,25 @@ FunctionPass *llvm::createSCCPPass() {
static void DeleteInstructionInBlock(BasicBlock *BB) {
DEBUG(dbgs() << " BasicBlock Dead:" << *BB);
++NumDeadBlocks;
-
- // Delete the instructions backwards, as it has a reduced likelihood of
- // having to update as many def-use and use-def chains.
- while (!isa<TerminatorInst>(BB->begin())) {
- Instruction *I = --BasicBlock::iterator(BB->getTerminator());
-
- if (!I->use_empty())
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
- BB->getInstList().erase(I);
+
+ // Check to see if there are non-terminating instructions to delete.
+ if (isa<TerminatorInst>(BB->begin()))
+ return;
+
+ // Delete the instructions backwards, as it has a reduced likelihood of having
+ // to update as many def-use and use-def chains.
+ Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
+ while (EndInst != BB->begin()) {
+ // Delete the next to last instruction.
+ BasicBlock::iterator I = EndInst;
+ Instruction *Inst = --I;
+ if (!Inst->use_empty())
+ Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
+ if (isa<LandingPadInst>(Inst)) {
+ EndInst = Inst;
+ continue;
+ }
+ BB->getInstList().erase(Inst);
++NumInstRemoved;
}
}
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 302c287..f6918de 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -63,7 +63,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeCFGSimplifyPassPass(Registry);
initializeSimplifyLibCallsPass(Registry);
initializeSinkingPass(Registry);
- initializeTailDupPass(Registry);
initializeTailCallElimPass(Registry);
}
@@ -187,3 +186,7 @@ void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) {
void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createBasicAliasAnalysisPass());
}
+
+void LLVMAddLowerExpectIntrinsicPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLowerExpectIntrinsicPass());
+}
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 7d6349c..c6d9123 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -129,11 +129,11 @@ namespace {
AllocaInfo &Info);
void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info);
void isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
- const Type *MemOpType, bool isStore, AllocaInfo &Info,
+ Type *MemOpType, bool isStore, AllocaInfo &Info,
Instruction *TheAccess, bool AllowWholeAccess);
- bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size);
- uint64_t FindElementAndOffset(const Type *&T, uint64_t &Offset,
- const Type *&IdxTy);
+ bool TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size);
+ uint64_t FindElementAndOffset(Type *&T, uint64_t &Offset,
+ Type *&IdxTy);
void DoScalarReplacement(AllocaInst *AI,
std::vector<AllocaInst*> &WorkList);
@@ -145,6 +145,9 @@ namespace {
SmallVector<AllocaInst*, 32> &NewElts);
void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
SmallVector<AllocaInst*, 32> &NewElts);
+ void RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
+ uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts);
void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts);
@@ -253,7 +256,7 @@ class ConvertToScalarInfo {
/// VectorTy - This tracks the type that we should promote the vector to if
/// it is possible to turn it into a vector. This starts out null, and if it
/// isn't possible to turn into a vector type, it gets set to VoidTy.
- const VectorType *VectorTy;
+ VectorType *VectorTy;
/// HadNonMemTransferAccess - True if there is at least one access to the
/// alloca that is not a MemTransferInst. We don't want to turn structs into
@@ -269,11 +272,11 @@ public:
private:
bool CanConvertToScalar(Value *V, uint64_t Offset);
- void MergeInTypeForLoadOrStore(const Type *In, uint64_t Offset);
- bool MergeInVectorType(const VectorType *VInTy, uint64_t Offset);
+ void MergeInTypeForLoadOrStore(Type *In, uint64_t Offset);
+ bool MergeInVectorType(VectorType *VInTy, uint64_t Offset);
void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
- Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType,
+ Value *ConvertScalar_ExtractValue(Value *NV, Type *ToType,
uint64_t Offset, IRBuilder<> &Builder);
Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
uint64_t Offset, IRBuilder<> &Builder);
@@ -295,8 +298,6 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
if (ScalarKind == Unknown)
ScalarKind = Integer;
- // FIXME: It should be possible to promote the vector type up to the alloca's
- // size.
if (ScalarKind == Vector && VectorTy->getBitWidth() != AllocaSize * 8)
ScalarKind = Integer;
@@ -306,7 +307,7 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
// random stuff that doesn't use vectors (e.g. <9 x double>) because then
// we just get a lot of insert/extracts. If at least one vector is
// involved, then we probably really do have a union of vector/array.
- const Type *NewTy;
+ Type *NewTy;
if (ScalarKind == Vector) {
assert(VectorTy && "Missing type for vector scalar.");
DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = "
@@ -331,20 +332,16 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
/// (VectorTy) so far at the offset specified by Offset (which is specified in
/// bytes).
///
-/// There are three cases we handle here:
+/// There are two cases we handle here:
/// 1) A union of vector types of the same size and potentially its elements.
/// Here we turn element accesses into insert/extract element operations.
/// This promotes a <4 x float> with a store of float to the third element
/// into a <4 x float> that uses insert element.
-/// 2) A union of vector types with power-of-2 size differences, e.g. a float,
-/// <2 x float> and <4 x float>. Here we turn element accesses into insert
-/// and extract element operations, and <2 x float> accesses into a cast to
-/// <2 x double>, an extract, and a cast back to <2 x float>.
-/// 3) A fully general blob of memory, which we turn into some (potentially
+/// 2) A fully general blob of memory, which we turn into some (potentially
/// large) integer type with extract and insert operations where the loads
/// and stores would mutate the memory. We mark this by setting VectorTy
/// to VoidTy.
-void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In,
+void ConvertToScalarInfo::MergeInTypeForLoadOrStore(Type *In,
uint64_t Offset) {
// If we already decided to turn this into a blob of integer memory, there is
// nothing to be done.
@@ -355,7 +352,7 @@ void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In,
// If the In type is a vector that is the same size as the alloca, see if it
// matches the existing VecTy.
- if (const VectorType *VInTy = dyn_cast<VectorType>(In)) {
+ if (VectorType *VInTy = dyn_cast<VectorType>(In)) {
if (MergeInVectorType(VInTy, Offset))
return;
} else if (In->isFloatTy() || In->isDoubleTy() ||
@@ -371,20 +368,13 @@ void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In,
// if the implied vector agrees with what we already have and if Offset is
// compatible with it.
if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
- (!VectorTy || Offset * 8 < VectorTy->getPrimitiveSizeInBits())) {
+ (!VectorTy || EltSize == VectorTy->getElementType()
+ ->getPrimitiveSizeInBits()/8)) {
if (!VectorTy) {
ScalarKind = ImplicitVector;
VectorTy = VectorType::get(In, AllocaSize/EltSize);
- return;
}
-
- unsigned CurrentEltSize = VectorTy->getElementType()
- ->getPrimitiveSizeInBits()/8;
- if (EltSize == CurrentEltSize)
- return;
-
- if (In->isIntegerTy() && isPowerOf2_32(AllocaSize / EltSize))
- return;
+ return;
}
}
@@ -395,74 +385,21 @@ void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In,
/// MergeInVectorType - Handles the vector case of MergeInTypeForLoadOrStore,
/// returning true if the type was successfully merged and false otherwise.
-bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy,
+bool ConvertToScalarInfo::MergeInVectorType(VectorType *VInTy,
uint64_t Offset) {
- // TODO: Support nonzero offsets?
- if (Offset != 0)
- return false;
-
- // Only allow vectors that are a power-of-2 away from the size of the alloca.
- if (!isPowerOf2_64(AllocaSize / (VInTy->getBitWidth() / 8)))
- return false;
-
- // If this the first vector we see, remember the type so that we know the
- // element size.
- if (!VectorTy) {
+ if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
+ // If we're storing/loading a vector of the right size, allow it as a
+ // vector. If this the first vector we see, remember the type so that
+ // we know the element size. If this is a subsequent access, ignore it
+ // even if it is a differing type but the same size. Worst case we can
+ // bitcast the resultant vectors.
+ if (!VectorTy)
+ VectorTy = VInTy;
ScalarKind = Vector;
- VectorTy = VInTy;
return true;
}
- unsigned BitWidth = VectorTy->getBitWidth();
- unsigned InBitWidth = VInTy->getBitWidth();
-
- // Vectors of the same size can be converted using a simple bitcast.
- if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8)) {
- ScalarKind = Vector;
- return true;
- }
-
- const Type *ElementTy = VectorTy->getElementType();
- const Type *InElementTy = VInTy->getElementType();
-
- // Do not allow mixed integer and floating-point accesses from vectors of
- // different sizes.
- if (ElementTy->isFloatingPointTy() != InElementTy->isFloatingPointTy())
- return false;
-
- if (ElementTy->isFloatingPointTy()) {
- // Only allow floating-point vectors of different sizes if they have the
- // same element type.
- // TODO: This could be loosened a bit, but would anything benefit?
- if (ElementTy != InElementTy)
- return false;
-
- // There are no arbitrary-precision floating-point types, which limits the
- // number of legal vector types with larger element types that we can form
- // to bitcast and extract a subvector.
- // TODO: We could support some more cases with mixed fp128 and double here.
- if (!(BitWidth == 64 || BitWidth == 128) ||
- !(InBitWidth == 64 || InBitWidth == 128))
- return false;
- } else {
- assert(ElementTy->isIntegerTy() && "Vector elements must be either integer "
- "or floating-point.");
- unsigned BitWidth = ElementTy->getPrimitiveSizeInBits();
- unsigned InBitWidth = InElementTy->getPrimitiveSizeInBits();
-
- // Do not allow integer types smaller than a byte or types whose widths are
- // not a multiple of a byte.
- if (BitWidth < 8 || InBitWidth < 8 ||
- BitWidth % 8 != 0 || InBitWidth % 8 != 0)
- return false;
- }
-
- // Pick the largest of the two vector types.
- ScalarKind = Vector;
- if (InBitWidth > BitWidth)
- VectorTy = VInTy;
-
- return true;
+ return false;
}
/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all
@@ -480,7 +417,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
// Don't break volatile loads.
- if (LI->isVolatile())
+ if (!LI->isSimple())
return false;
// Don't touch MMX operations.
if (LI->getType()->isX86_MMXTy())
@@ -492,7 +429,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Storing the pointer, not into the value?
- if (SI->getOperand(0) == V || SI->isVolatile()) return false;
+ if (SI->getOperand(0) == V || !SI->isSimple()) return false;
// Don't touch MMX operations.
if (SI->getOperand(0)->getType()->isX86_MMXTy())
return false;
@@ -502,7 +439,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
}
if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
- IsNotTrivial = true; // Can't be mem2reg'd.
+ if (!onlyUsedByLifetimeMarkers(BCI))
+ IsNotTrivial = true; // Can't be mem2reg'd.
if (!CanConvertToScalar(BCI, Offset))
return false;
continue;
@@ -516,7 +454,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
- &Indices[0], Indices.size());
+ Indices);
// See if all uses can be converted.
if (!CanConvertToScalar(GEP, Offset+GEPOffset))
return false;
@@ -560,6 +498,14 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
continue;
}
+ // If this is a lifetime intrinsic, we can handle it.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ continue;
+ }
+ }
+
// Otherwise, we cannot handle this!
return false;
}
@@ -589,7 +535,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
- &Indices[0], Indices.size());
+ Indices);
ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8);
GEP->eraseFromParent();
continue;
@@ -599,7 +545,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
// The load is a bit extract from NewAI shifted right by Offset bits.
- Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp");
+ Value *LoadedVal = Builder.CreateLoad(NewAI);
Value *NewLoadVal
= ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder);
LI->replaceAllUsesWith(NewLoadVal);
@@ -668,8 +614,8 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
// pointer (bitcasted), then a store to our new alloca.
assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
Value *SrcPtr = MTI->getSource();
- const PointerType* SPTy = cast<PointerType>(SrcPtr->getType());
- const PointerType* AIPTy = cast<PointerType>(NewAI->getType());
+ PointerType* SPTy = cast<PointerType>(SrcPtr->getType());
+ PointerType* AIPTy = cast<PointerType>(NewAI->getType());
if (SPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
AIPTy = PointerType::get(AIPTy->getElementType(),
SPTy->getAddressSpace());
@@ -685,8 +631,8 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
- const PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType());
- const PointerType* AIPTy = cast<PointerType>(NewAI->getType());
+ PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType());
+ PointerType* AIPTy = cast<PointerType>(NewAI->getType());
if (DPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
AIPTy = PointerType::get(AIPTy->getElementType(),
DPTy->getAddressSpace());
@@ -703,65 +649,18 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
continue;
}
- llvm_unreachable("Unsupported operation!");
- }
-}
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ // There's no need to preserve these, as the resulting alloca will be
+ // converted to a register anyways.
+ II->eraseFromParent();
+ continue;
+ }
+ }
-/// getScaledElementType - Gets a scaled element type for a partial vector
-/// access of an alloca. The input types must be integer or floating-point
-/// scalar or vector types, and the resulting type is an integer, float or
-/// double.
-static const Type *getScaledElementType(const Type *Ty1, const Type *Ty2,
- unsigned NewBitWidth) {
- bool IsFP1 = Ty1->isFloatingPointTy() ||
- (Ty1->isVectorTy() &&
- cast<VectorType>(Ty1)->getElementType()->isFloatingPointTy());
- bool IsFP2 = Ty2->isFloatingPointTy() ||
- (Ty2->isVectorTy() &&
- cast<VectorType>(Ty2)->getElementType()->isFloatingPointTy());
-
- LLVMContext &Context = Ty1->getContext();
-
- // Prefer floating-point types over integer types, as integer types may have
- // been created by earlier scalar replacement.
- if (IsFP1 || IsFP2) {
- if (NewBitWidth == 32)
- return Type::getFloatTy(Context);
- if (NewBitWidth == 64)
- return Type::getDoubleTy(Context);
+ llvm_unreachable("Unsupported operation!");
}
-
- return Type::getIntNTy(Context, NewBitWidth);
-}
-
-/// CreateShuffleVectorCast - Creates a shuffle vector to convert one vector
-/// to another vector of the same element type which has the same allocation
-/// size but different primitive sizes (e.g. <3 x i32> and <4 x i32>).
-static Value *CreateShuffleVectorCast(Value *FromVal, const Type *ToType,
- IRBuilder<> &Builder) {
- const Type *FromType = FromVal->getType();
- const VectorType *FromVTy = cast<VectorType>(FromType);
- const VectorType *ToVTy = cast<VectorType>(ToType);
- assert((ToVTy->getElementType() == FromVTy->getElementType()) &&
- "Vectors must have the same element type");
- Value *UnV = UndefValue::get(FromType);
- unsigned numEltsFrom = FromVTy->getNumElements();
- unsigned numEltsTo = ToVTy->getNumElements();
-
- SmallVector<Constant*, 3> Args;
- const Type* Int32Ty = Builder.getInt32Ty();
- unsigned minNumElts = std::min(numEltsFrom, numEltsTo);
- unsigned i;
- for (i=0; i != minNumElts; ++i)
- Args.push_back(ConstantInt::get(Int32Ty, i));
-
- if (i < numEltsTo) {
- Constant* UnC = UndefValue::get(Int32Ty);
- for (; i != numEltsTo; ++i)
- Args.push_back(UnC);
- }
- Constant *Mask = ConstantVector::get(Args);
- return Builder.CreateShuffleVector(FromVal, UnV, Mask, "tmpV");
}
/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
@@ -775,50 +674,20 @@ static Value *CreateShuffleVectorCast(Value *FromVal, const Type *ToType,
/// Offset is an offset from the original alloca, in bits that need to be
/// shifted to the right.
Value *ConvertToScalarInfo::
-ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
+ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
uint64_t Offset, IRBuilder<> &Builder) {
// If the load is of the whole new alloca, no conversion is needed.
- const Type *FromType = FromVal->getType();
+ Type *FromType = FromVal->getType();
if (FromType == ToType && Offset == 0)
return FromVal;
// If the result alloca is a vector type, this is either an element
// access or a bitcast to another vector type of the same size.
- if (const VectorType *VTy = dyn_cast<VectorType>(FromType)) {
+ if (VectorType *VTy = dyn_cast<VectorType>(FromType)) {
unsigned FromTypeSize = TD.getTypeAllocSize(FromType);
unsigned ToTypeSize = TD.getTypeAllocSize(ToType);
- if (FromTypeSize == ToTypeSize) {
- // If the two types have the same primitive size, use a bit cast.
- // Otherwise, it is two vectors with the same element type that has
- // the same allocation size but different number of elements so use
- // a shuffle vector.
- if (FromType->getPrimitiveSizeInBits() ==
- ToType->getPrimitiveSizeInBits())
- return Builder.CreateBitCast(FromVal, ToType, "tmp");
- else
- return CreateShuffleVectorCast(FromVal, ToType, Builder);
- }
-
- if (isPowerOf2_64(FromTypeSize / ToTypeSize)) {
- assert(!(ToType->isVectorTy() && Offset != 0) && "Can't extract a value "
- "of a smaller vector type at a nonzero offset.");
-
- const Type *CastElementTy = getScaledElementType(FromType, ToType,
- ToTypeSize * 8);
- unsigned NumCastVectorElements = FromTypeSize / ToTypeSize;
-
- LLVMContext &Context = FromVal->getContext();
- const Type *CastTy = VectorType::get(CastElementTy,
- NumCastVectorElements);
- Value *Cast = Builder.CreateBitCast(FromVal, CastTy, "tmp");
-
- unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy);
- unsigned Elt = Offset/EltSize;
- assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
- Value *Extract = Builder.CreateExtractElement(Cast, ConstantInt::get(
- Type::getInt32Ty(Context), Elt), "tmp");
- return Builder.CreateBitCast(Extract, ToType, "tmp");
- }
+ if (FromTypeSize == ToTypeSize)
+ return Builder.CreateBitCast(FromVal, ToType);
// Otherwise it must be an element access.
unsigned Elt = 0;
@@ -828,40 +697,39 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
}
// Return the element extracted out of it.
- Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get(
- Type::getInt32Ty(FromVal->getContext()), Elt), "tmp");
+ Value *V = Builder.CreateExtractElement(FromVal, Builder.getInt32(Elt));
if (V->getType() != ToType)
- V = Builder.CreateBitCast(V, ToType, "tmp");
+ V = Builder.CreateBitCast(V, ToType);
return V;
}
// If ToType is a first class aggregate, extract out each of the pieces and
// use insertvalue's to form the FCA.
- if (const StructType *ST = dyn_cast<StructType>(ToType)) {
+ if (StructType *ST = dyn_cast<StructType>(ToType)) {
const StructLayout &Layout = *TD.getStructLayout(ST);
Value *Res = UndefValue::get(ST);
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
Offset+Layout.getElementOffsetInBits(i),
Builder);
- Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
+ Res = Builder.CreateInsertValue(Res, Elt, i);
}
return Res;
}
- if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
+ if (ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
Value *Res = UndefValue::get(AT);
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
Offset+i*EltSize, Builder);
- Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
+ Res = Builder.CreateInsertValue(Res, Elt, i);
}
return Res;
}
// Otherwise, this must be a union that was converted to an integer value.
- const IntegerType *NTy = cast<IntegerType>(FromVal->getType());
+ IntegerType *NTy = cast<IntegerType>(FromVal->getType());
// If this is a big-endian system and the load is narrower than the
// full alloca type, we need to do a shift to get the right bits.
@@ -881,33 +749,31 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
// only some bits are used.
if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
FromVal = Builder.CreateLShr(FromVal,
- ConstantInt::get(FromVal->getType(),
- ShAmt), "tmp");
+ ConstantInt::get(FromVal->getType(), ShAmt));
else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
FromVal = Builder.CreateShl(FromVal,
- ConstantInt::get(FromVal->getType(),
- -ShAmt), "tmp");
+ ConstantInt::get(FromVal->getType(), -ShAmt));
// Finally, unconditionally truncate the integer to the right width.
unsigned LIBitWidth = TD.getTypeSizeInBits(ToType);
if (LIBitWidth < NTy->getBitWidth())
FromVal =
Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(),
- LIBitWidth), "tmp");
+ LIBitWidth));
else if (LIBitWidth > NTy->getBitWidth())
FromVal =
Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(),
- LIBitWidth), "tmp");
+ LIBitWidth));
// If the result is an integer, this is a trunc or bitcast.
if (ToType->isIntegerTy()) {
// Should be done.
} else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) {
// Just do a bitcast, we know the sizes match up.
- FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp");
+ FromVal = Builder.CreateBitCast(FromVal, ToType);
} else {
// Otherwise must be a pointer.
- FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp");
+ FromVal = Builder.CreateIntToPtr(FromVal, ToType);
}
assert(FromVal->getType() == ToType && "Didn't convert right?");
return FromVal;
@@ -927,65 +793,30 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
uint64_t Offset, IRBuilder<> &Builder) {
// Convert the stored type to the actual type, shift it left to insert
// then 'or' into place.
- const Type *AllocaType = Old->getType();
+ Type *AllocaType = Old->getType();
LLVMContext &Context = Old->getContext();
- if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
+ if (VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy);
uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType());
// Changing the whole vector with memset or with an access of a different
// vector type?
- if (ValSize == VecSize) {
- // If the two types have the same primitive size, use a bit cast.
- // Otherwise, it is two vectors with the same element type that has
- // the same allocation size but different number of elements so use
- // a shuffle vector.
- if (VTy->getPrimitiveSizeInBits() ==
- SV->getType()->getPrimitiveSizeInBits())
- return Builder.CreateBitCast(SV, AllocaType, "tmp");
- else
- return CreateShuffleVectorCast(SV, VTy, Builder);
- }
-
- if (isPowerOf2_64(VecSize / ValSize)) {
- assert(!(SV->getType()->isVectorTy() && Offset != 0) && "Can't insert a "
- "value of a smaller vector type at a nonzero offset.");
-
- const Type *CastElementTy = getScaledElementType(VTy, SV->getType(),
- ValSize);
- unsigned NumCastVectorElements = VecSize / ValSize;
-
- LLVMContext &Context = SV->getContext();
- const Type *OldCastTy = VectorType::get(CastElementTy,
- NumCastVectorElements);
- Value *OldCast = Builder.CreateBitCast(Old, OldCastTy, "tmp");
-
- Value *SVCast = Builder.CreateBitCast(SV, CastElementTy, "tmp");
-
- unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy);
- unsigned Elt = Offset/EltSize;
- assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
- Value *Insert =
- Builder.CreateInsertElement(OldCast, SVCast, ConstantInt::get(
- Type::getInt32Ty(Context), Elt), "tmp");
- return Builder.CreateBitCast(Insert, AllocaType, "tmp");
- }
+ if (ValSize == VecSize)
+ return Builder.CreateBitCast(SV, AllocaType);
// Must be an element insertion.
assert(SV->getType() == VTy->getElementType());
uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
unsigned Elt = Offset/EltSize;
- return Builder.CreateInsertElement(Old, SV,
- ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt),
- "tmp");
+ return Builder.CreateInsertElement(Old, SV, Builder.getInt32(Elt));
}
// If SV is a first-class aggregate value, insert each value recursively.
- if (const StructType *ST = dyn_cast<StructType>(SV->getType())) {
+ if (StructType *ST = dyn_cast<StructType>(SV->getType())) {
const StructLayout &Layout = *TD.getStructLayout(ST);
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
- Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
+ Value *Elt = Builder.CreateExtractValue(SV, i);
Old = ConvertScalar_InsertValue(Elt, Old,
Offset+Layout.getElementOffsetInBits(i),
Builder);
@@ -993,10 +824,10 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
return Old;
}
- if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
+ if (ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
- Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
+ Value *Elt = Builder.CreateExtractValue(SV, i);
Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder);
}
return Old;
@@ -1009,20 +840,19 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
unsigned SrcStoreWidth = TD.getTypeStoreSizeInBits(SV->getType());
unsigned DestStoreWidth = TD.getTypeStoreSizeInBits(AllocaType);
if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
- SV = Builder.CreateBitCast(SV,
- IntegerType::get(SV->getContext(),SrcWidth), "tmp");
+ SV = Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(),SrcWidth));
else if (SV->getType()->isPointerTy())
- SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()), "tmp");
+ SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()));
// Zero extend or truncate the value if needed.
if (SV->getType() != AllocaType) {
if (SV->getType()->getPrimitiveSizeInBits() <
AllocaType->getPrimitiveSizeInBits())
- SV = Builder.CreateZExt(SV, AllocaType, "tmp");
+ SV = Builder.CreateZExt(SV, AllocaType);
else {
// Truncation may be needed if storing more than the alloca can hold
// (undefined behavior).
- SV = Builder.CreateTrunc(SV, AllocaType, "tmp");
+ SV = Builder.CreateTrunc(SV, AllocaType);
SrcWidth = DestWidth;
SrcStoreWidth = DestStoreWidth;
}
@@ -1045,12 +875,10 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
// only some bits in the structure are set.
APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
- SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(),
- ShAmt), "tmp");
+ SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), ShAmt));
Mask <<= ShAmt;
} else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
- SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(),
- -ShAmt), "tmp");
+ SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), -ShAmt));
Mask = Mask.lshr(-ShAmt);
}
@@ -1196,7 +1024,7 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) {
for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
UI != UE; ++UI) {
LoadInst *LI = dyn_cast<LoadInst>(*UI);
- if (LI == 0 || LI->isVolatile()) return false;
+ if (LI == 0 || !LI->isSimple()) return false;
// Both operands to the select need to be dereferencable, either absolutely
// (e.g. allocas) or at this point because we can see other accesses to it.
@@ -1237,7 +1065,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) {
for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end();
UI != UE; ++UI) {
LoadInst *LI = dyn_cast<LoadInst>(*UI);
- if (LI == 0 || LI->isVolatile()) return false;
+ if (LI == 0 || !LI->isSimple()) return false;
// For now we only allow loads in the same block as the PHI. This is a
// common case that happens when instcombine merges two loads through a PHI.
@@ -1258,17 +1086,21 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) {
// trapping load in the predecessor if it is a critical edge.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *Pred = PN->getIncomingBlock(i);
+ Value *InVal = PN->getIncomingValue(i);
+
+ // If the terminator of the predecessor has side-effects (an invoke),
+ // there is no safe place to put a load in the predecessor.
+ if (Pred->getTerminator()->mayHaveSideEffects())
+ return false;
+
+ // If the value is produced by the terminator of the predecessor
+ // (an invoke), there is no valid place to put a load in the predecessor.
+ if (Pred->getTerminator() == InVal)
+ return false;
// If the predecessor has a single successor, then the edge isn't critical.
if (Pred->getTerminator()->getNumSuccessors() == 1)
continue;
-
- Value *InVal = PN->getIncomingValue(i);
-
- // If the InVal is an invoke in the pred, we can't put a load on the edge.
- if (InvokeInst *II = dyn_cast<InvokeInst>(InVal))
- if (II->getParent() == Pred)
- return false;
// If this pointer is always safe to load, or if we can prove that there is
// already a load in the block, then we can move the load to the pred block.
@@ -1295,13 +1127,13 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
UI != UE; ++UI) {
User *U = *UI;
if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
- if (LI->isVolatile())
+ if (!LI->isSimple())
return false;
continue;
}
if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
- if (SI->getOperand(0) == AI || SI->isVolatile())
+ if (SI->getOperand(0) == AI || !SI->isSimple())
return false; // Don't allow a store OF the AI, only INTO the AI.
continue;
}
@@ -1343,6 +1175,13 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
continue;
}
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+ if (onlyUsedByLifetimeMarkers(BCI)) {
+ InstsToRewrite.insert(BCI);
+ continue;
+ }
+ }
+
return false;
}
@@ -1354,6 +1193,18 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
// If we have instructions that need to be rewritten for this to be promotable
// take care of it now.
for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) {
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(InstsToRewrite[i])) {
+ // This could only be a bitcast used by nothing but lifetime intrinsics.
+ for (BitCastInst::use_iterator I = BCI->use_begin(), E = BCI->use_end();
+ I != E;) {
+ Use &U = I.getUse();
+ ++I;
+ cast<Instruction>(U.getUser())->eraseFromParent();
+ }
+ BCI->eraseFromParent();
+ continue;
+ }
+
if (SelectInst *SI = dyn_cast<SelectInst>(InstsToRewrite[i])) {
// Selects in InstsToRewrite only have load uses. Rewrite each as two
// loads with a new select.
@@ -1393,7 +1244,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
continue;
}
- const Type *LoadTy = cast<PointerType>(PN->getType())->getElementType();
+ Type *LoadTy = cast<PointerType>(PN->getType())->getElementType();
PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(),
PN->getName()+".ld", PN);
@@ -1483,13 +1334,13 @@ bool SROA::performPromotion(Function &F) {
/// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for
/// SROA. It must be a struct or array type with a small number of elements.
static bool ShouldAttemptScalarRepl(AllocaInst *AI) {
- const Type *T = AI->getAllocatedType();
+ Type *T = AI->getAllocatedType();
// Do not promote any struct into more than 32 separate vars.
- if (const StructType *ST = dyn_cast<StructType>(T))
+ if (StructType *ST = dyn_cast<StructType>(T))
return ST->getNumElements() <= 32;
// Arrays are much less likely to be safe for SROA; only consider
// them if they are very small.
- if (const ArrayType *AT = dyn_cast<ArrayType>(T))
+ if (ArrayType *AT = dyn_cast<ArrayType>(T))
return AT->getNumElements() <= 8;
return false;
}
@@ -1594,7 +1445,7 @@ void SROA::DoScalarReplacement(AllocaInst *AI,
std::vector<AllocaInst*> &WorkList) {
DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n');
SmallVector<AllocaInst*, 32> ElementAllocas;
- if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
+ if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
ElementAllocas.reserve(ST->getNumContainedTypes());
for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) {
AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0,
@@ -1604,9 +1455,9 @@ void SROA::DoScalarReplacement(AllocaInst *AI,
WorkList.push_back(NA); // Add to worklist for recursive processing
}
} else {
- const ArrayType *AT = cast<ArrayType>(AI->getAllocatedType());
+ ArrayType *AT = cast<ArrayType>(AI->getAllocatedType());
ElementAllocas.reserve(AT->getNumElements());
- const Type *ElTy = AT->getElementType();
+ Type *ElTy = AT->getElementType();
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(),
AI->getName() + "." + Twine(i), AI);
@@ -1670,22 +1521,26 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
UI.getOperandNo() == 0, Info, MI,
true /*AllowWholeAccess*/);
} else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
- if (LI->isVolatile())
+ if (!LI->isSimple())
return MarkUnsafe(Info, User);
- const Type *LIType = LI->getType();
+ Type *LIType = LI->getType();
isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
LIType, false, Info, LI, true /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
} else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Store is ok if storing INTO the pointer, not storing the pointer
- if (SI->isVolatile() || SI->getOperand(0) == I)
+ if (!SI->isSimple() || SI->getOperand(0) == I)
return MarkUnsafe(Info, User);
- const Type *SIType = SI->getOperand(0)->getType();
+ Type *SIType = SI->getOperand(0)->getType();
isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
SIType, true, Info, SI, true /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
+ if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+ II->getIntrinsicID() != Intrinsic::lifetime_end)
+ return MarkUnsafe(Info, User);
} else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
isSafePHISelectUseForScalarRepl(User, Offset, Info);
} else {
@@ -1725,19 +1580,19 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
return MarkUnsafe(Info, User);
isSafePHISelectUseForScalarRepl(GEPI, Offset, Info);
} else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
- if (LI->isVolatile())
+ if (!LI->isSimple())
return MarkUnsafe(Info, User);
- const Type *LIType = LI->getType();
+ Type *LIType = LI->getType();
isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
LIType, false, Info, LI, false /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
} else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Store is ok if storing INTO the pointer, not storing the pointer
- if (SI->isVolatile() || SI->getOperand(0) == I)
+ if (!SI->isSimple() || SI->getOperand(0) == I)
return MarkUnsafe(Info, User);
- const Type *SIType = SI->getOperand(0)->getType();
+ Type *SIType = SI->getOperand(0)->getType();
isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
SIType, true, Info, SI, false /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
@@ -1776,8 +1631,7 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI,
// Compute the offset due to this GEP and check if the alloca has a
// component element at that offset.
SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
- Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
- &Indices[0], Indices.size());
+ Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, 0))
MarkUnsafe(Info, GEPI);
}
@@ -1786,14 +1640,14 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI,
/// elements of the same type (which is always true for arrays). If so,
/// return true with NumElts and EltTy set to the number of elements and the
/// element type, respectively.
-static bool isHomogeneousAggregate(const Type *T, unsigned &NumElts,
- const Type *&EltTy) {
- if (const ArrayType *AT = dyn_cast<ArrayType>(T)) {
+static bool isHomogeneousAggregate(Type *T, unsigned &NumElts,
+ Type *&EltTy) {
+ if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
NumElts = AT->getNumElements();
EltTy = (NumElts == 0 ? 0 : AT->getElementType());
return true;
}
- if (const StructType *ST = dyn_cast<StructType>(T)) {
+ if (StructType *ST = dyn_cast<StructType>(T)) {
NumElts = ST->getNumContainedTypes();
EltTy = (NumElts == 0 ? 0 : ST->getContainedType(0));
for (unsigned n = 1; n < NumElts; ++n) {
@@ -1807,12 +1661,12 @@ static bool isHomogeneousAggregate(const Type *T, unsigned &NumElts,
/// isCompatibleAggregate - Check if T1 and T2 are either the same type or are
/// "homogeneous" aggregates with the same element type and number of elements.
-static bool isCompatibleAggregate(const Type *T1, const Type *T2) {
+static bool isCompatibleAggregate(Type *T1, Type *T2) {
if (T1 == T2)
return true;
unsigned NumElts1, NumElts2;
- const Type *EltTy1, *EltTy2;
+ Type *EltTy1, *EltTy2;
if (isHomogeneousAggregate(T1, NumElts1, EltTy1) &&
isHomogeneousAggregate(T2, NumElts2, EltTy2) &&
NumElts1 == NumElts2 &&
@@ -1830,7 +1684,7 @@ static bool isCompatibleAggregate(const Type *T1, const Type *T2) {
/// If AllowWholeAccess is true, then this allows uses of the entire alloca as a
/// unit. If false, it only allows accesses known to be in a single element.
void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
- const Type *MemOpType, bool isStore,
+ Type *MemOpType, bool isStore,
AllocaInfo &Info, Instruction *TheAccess,
bool AllowWholeAccess) {
// Check if this is a load/store of the entire alloca.
@@ -1857,7 +1711,7 @@ void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
}
}
// Check if the offset/size correspond to a component within the alloca type.
- const Type *T = Info.AI->getAllocatedType();
+ Type *T = Info.AI->getAllocatedType();
if (TypeHasComponent(T, Offset, MemSize)) {
Info.hasSubelementAccess = true;
return;
@@ -1868,16 +1722,16 @@ void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
/// TypeHasComponent - Return true if T has a component type with the
/// specified offset and size. If Size is zero, do not check the size.
-bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) {
- const Type *EltTy;
+bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) {
+ Type *EltTy;
uint64_t EltSize;
- if (const StructType *ST = dyn_cast<StructType>(T)) {
+ if (StructType *ST = dyn_cast<StructType>(T)) {
const StructLayout *Layout = TD->getStructLayout(ST);
unsigned EltIdx = Layout->getElementContainingOffset(Offset);
EltTy = ST->getContainedType(EltIdx);
EltSize = TD->getTypeAllocSize(EltTy);
Offset -= Layout->getElementOffset(EltIdx);
- } else if (const ArrayType *AT = dyn_cast<ArrayType>(T)) {
+ } else if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
EltTy = AT->getElementType();
EltSize = TD->getTypeAllocSize(EltTy);
if (Offset >= AT->getNumElements() * EltSize)
@@ -1924,9 +1778,17 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
// address operand will be updated, so nothing else needs to be done.
continue;
}
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ RewriteLifetimeIntrinsic(II, AI, Offset, NewElts);
+ }
+ continue;
+ }
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
- const Type *LIType = LI->getType();
+ Type *LIType = LI->getType();
if (isCompatibleAggregate(LIType, AI->getAllocatedType())) {
// Replace:
@@ -1956,7 +1818,7 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
Value *Val = SI->getOperand(0);
- const Type *SIType = Val->getType();
+ Type *SIType = Val->getType();
if (isCompatibleAggregate(SIType, AI->getAllocatedType())) {
// Replace:
// store { i32, i32 } %val, { i32, i32 }* %alloc
@@ -2026,10 +1888,10 @@ void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
/// Sets T to the type of the element and Offset to the offset within that
/// element. IdxTy is set to the type of the index result to be used in a
/// GEP instruction.
-uint64_t SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset,
- const Type *&IdxTy) {
+uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset,
+ Type *&IdxTy) {
uint64_t Idx = 0;
- if (const StructType *ST = dyn_cast<StructType>(T)) {
+ if (StructType *ST = dyn_cast<StructType>(T)) {
const StructLayout *Layout = TD->getStructLayout(ST);
Idx = Layout->getElementContainingOffset(Offset);
T = ST->getContainedType(Idx);
@@ -2037,7 +1899,7 @@ uint64_t SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset,
IdxTy = Type::getInt32Ty(T->getContext());
return Idx;
}
- const ArrayType *AT = cast<ArrayType>(T);
+ ArrayType *AT = cast<ArrayType>(T);
T = AT->getElementType();
uint64_t EltSize = TD->getTypeAllocSize(T);
Idx = Offset / EltSize;
@@ -2053,13 +1915,12 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
SmallVector<AllocaInst*, 32> &NewElts) {
uint64_t OldOffset = Offset;
SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
- Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
- &Indices[0], Indices.size());
+ Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
RewriteForScalarRepl(GEPI, AI, Offset, NewElts);
- const Type *T = AI->getAllocatedType();
- const Type *IdxTy;
+ Type *T = AI->getAllocatedType();
+ Type *IdxTy;
uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy);
if (GEPI->getOperand(0) == AI)
OldIdx = ~0ULL; // Force the GEP to be rewritten.
@@ -2073,7 +1934,7 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
if (Idx == OldIdx)
return;
- const Type *i32Ty = Type::getInt32Ty(AI->getContext());
+ Type *i32Ty = Type::getInt32Ty(AI->getContext());
SmallVector<Value*, 8> NewArgs;
NewArgs.push_back(Constant::getNullValue(i32Ty));
while (EltOffset != 0) {
@@ -2082,8 +1943,7 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
}
Instruction *Val = NewElts[Idx];
if (NewArgs.size() > 1) {
- Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(),
- NewArgs.end(), "", GEPI);
+ Val = GetElementPtrInst::CreateInBounds(Val, NewArgs, "", GEPI);
Val->takeName(GEPI);
}
if (Val->getType() != GEPI->getType())
@@ -2092,6 +1952,62 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
DeadInsts.push_back(GEPI);
}
+/// RewriteLifetimeIntrinsic - II is a lifetime.start/lifetime.end. Rewrite it
+/// to mark the lifetime of the scalarized memory.
+void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
+ uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts) {
+ ConstantInt *OldSize = cast<ConstantInt>(II->getArgOperand(0));
+ // Put matching lifetime markers on everything from Offset up to
+ // Offset+OldSize.
+ Type *AIType = AI->getAllocatedType();
+ uint64_t NewOffset = Offset;
+ Type *IdxTy;
+ uint64_t Idx = FindElementAndOffset(AIType, NewOffset, IdxTy);
+
+ IRBuilder<> Builder(II);
+ uint64_t Size = OldSize->getLimitedValue();
+
+ if (NewOffset) {
+ // Splice the first element and index 'NewOffset' bytes in. SROA will
+ // split the alloca again later.
+ Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy());
+ V = Builder.CreateGEP(V, Builder.getInt64(NewOffset));
+
+ IdxTy = NewElts[Idx]->getAllocatedType();
+ uint64_t EltSize = TD->getTypeAllocSize(IdxTy) - NewOffset;
+ if (EltSize > Size) {
+ EltSize = Size;
+ Size = 0;
+ } else {
+ Size -= EltSize;
+ }
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+ Builder.CreateLifetimeStart(V, Builder.getInt64(EltSize));
+ else
+ Builder.CreateLifetimeEnd(V, Builder.getInt64(EltSize));
+ ++Idx;
+ }
+
+ for (; Idx != NewElts.size() && Size; ++Idx) {
+ IdxTy = NewElts[Idx]->getAllocatedType();
+ uint64_t EltSize = TD->getTypeAllocSize(IdxTy);
+ if (EltSize > Size) {
+ EltSize = Size;
+ Size = 0;
+ } else {
+ Size -= EltSize;
+ }
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+ Builder.CreateLifetimeStart(NewElts[Idx],
+ Builder.getInt64(EltSize));
+ else
+ Builder.CreateLifetimeEnd(NewElts[Idx],
+ Builder.getInt64(EltSize));
+ }
+ DeadInsts.push_back(II);
+}
+
/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI.
/// Rewrite it to copy or set the elements of the scalarized memory.
void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
@@ -2139,7 +2055,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
// If the pointer is not the right type, insert a bitcast to the right
// type.
- const Type *NewTy =
+ Type *NewTy =
PointerType::get(AI->getType()->getElementType(), AddrSpace);
if (OtherPtr->getType() != NewTy)
@@ -2159,16 +2075,16 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
if (OtherPtr) {
Value *Idx[2] = { Zero,
ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) };
- OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2,
+ OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx,
OtherPtr->getName()+"."+Twine(i),
MI);
uint64_t EltOffset;
- const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
- const Type *OtherTy = OtherPtrTy->getElementType();
- if (const StructType *ST = dyn_cast<StructType>(OtherTy)) {
+ PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
+ Type *OtherTy = OtherPtrTy->getElementType();
+ if (StructType *ST = dyn_cast<StructType>(OtherTy)) {
EltOffset = TD->getStructLayout(ST)->getElementOffset(i);
} else {
- const Type *EltTy = cast<SequentialType>(OtherTy)->getElementType();
+ Type *EltTy = cast<SequentialType>(OtherTy)->getElementType();
EltOffset = TD->getTypeAllocSize(EltTy)*i;
}
@@ -2181,7 +2097,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
}
Value *EltPtr = NewElts[i];
- const Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType();
+ Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType();
// If we got down to a scalar, insert a load or store as appropriate.
if (EltTy->isSingleValueType()) {
@@ -2207,7 +2123,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0>
} else {
// If EltTy is a vector type, get the element type.
- const Type *ValTy = EltTy->getScalarType();
+ Type *ValTy = EltTy->getScalarType();
// Construct an integer with the right value.
unsigned EltSize = TD->getTypeSizeInBits(ValTy);
@@ -2228,8 +2144,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
assert(StoreVal->getType() == ValTy && "Type mismatch!");
// If the requested value was a vector constant, create it.
- if (EltTy != ValTy) {
- unsigned NumElts = cast<VectorType>(ValTy)->getNumElements();
+ if (EltTy->isVectorTy()) {
+ unsigned NumElts = cast<VectorType>(EltTy)->getNumElements();
SmallVector<Constant*, 16> Elts(NumElts, StoreVal);
StoreVal = ConstantVector::get(Elts);
}
@@ -2271,7 +2187,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
// Extract each element out of the integer according to its structure offset
// and store the element value to the individual alloca.
Value *SrcVal = SI->getOperand(0);
- const Type *AllocaEltTy = AI->getAllocatedType();
+ Type *AllocaEltTy = AI->getAllocatedType();
uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
IRBuilder<> Builder(SI);
@@ -2286,12 +2202,12 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
// There are two forms here: AI could be an array or struct. Both cases
// have different ways to compute the element offset.
- if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
+ if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
const StructLayout *Layout = TD->getStructLayout(EltSTy);
for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
// Get the number of bits to shift SrcVal to get the value.
- const Type *FieldTy = EltSTy->getElementType(i);
+ Type *FieldTy = EltSTy->getElementType(i);
uint64_t Shift = Layout->getElementOffsetInBits(i);
if (TD->isBigEndian())
@@ -2327,8 +2243,8 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
}
} else {
- const ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
- const Type *ArrayEltTy = ATy->getElementType();
+ ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
+ Type *ArrayEltTy = ATy->getElementType();
uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy);
@@ -2384,7 +2300,7 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts) {
// Extract each element out of the NewElts according to its structure offset
// and form the result value.
- const Type *AllocaEltTy = AI->getAllocatedType();
+ Type *AllocaEltTy = AI->getAllocatedType();
uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
@@ -2394,10 +2310,10 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
// have different ways to compute the element offset.
const StructLayout *Layout = 0;
uint64_t ArrayEltBitOffset = 0;
- if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
+ if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
Layout = TD->getStructLayout(EltSTy);
} else {
- const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
+ Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
}
@@ -2408,14 +2324,14 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
// Load the value from the alloca. If the NewElt is an aggregate, cast
// the pointer to an integer of the same size before doing the load.
Value *SrcField = NewElts[i];
- const Type *FieldTy =
+ Type *FieldTy =
cast<PointerType>(SrcField->getType())->getElementType();
uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
// Ignore zero sized fields like {}, they obviously contain no data.
if (FieldSizeBits == 0) continue;
- const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),
+ IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),
FieldSizeBits);
if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() &&
!FieldTy->isVectorTy())
@@ -2468,14 +2384,14 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
/// HasPadding - Return true if the specified type has any structure or
/// alignment padding in between the elements that would be split apart
/// by SROA; return false otherwise.
-static bool HasPadding(const Type *Ty, const TargetData &TD) {
- if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+static bool HasPadding(Type *Ty, const TargetData &TD) {
+ if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
Ty = ATy->getElementType();
return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
}
// SROA currently handles only Arrays and Structs.
- const StructType *STy = cast<StructType>(Ty);
+ StructType *STy = cast<StructType>(Ty);
const StructLayout *SL = TD.getStructLayout(STy);
unsigned PrevFieldBitOffset = 0;
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
@@ -2530,7 +2446,7 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
// and fusion code.
if (!Info.hasSubelementAccess && Info.hasALoadOrStore) {
// If the struct/array just has one element, use basic SRoA.
- if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
+ if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
if (ST->getNumElements() > 1) return false;
} else {
if (cast<ArrayType>(AI->getAllocatedType())->getNumElements() > 1)
@@ -2576,7 +2492,7 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
// Ignore non-volatile loads, they are always ok.
- if (LI->isVolatile()) return false;
+ if (!LI->isSimple()) return false;
continue;
}
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 7c415e5..fbb9465 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -134,7 +134,7 @@ namespace {
struct StrCatOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Verify the "strcat" function prototype.
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
FT->getReturnType() != B.getInt8PtrTy() ||
FT->getParamType(0) != FT->getReturnType() ||
@@ -184,7 +184,7 @@ struct StrCatOpt : public LibCallOptimization {
struct StrNCatOpt : public StrCatOpt {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Verify the "strncat" function prototype.
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 ||
FT->getReturnType() != B.getInt8PtrTy() ||
FT->getParamType(0) != FT->getReturnType() ||
@@ -232,7 +232,7 @@ struct StrNCatOpt : public StrCatOpt {
struct StrChrOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Verify the "strchr" function prototype.
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
FT->getReturnType() != B.getInt8PtrTy() ||
FT->getParamType(0) != FT->getReturnType() ||
@@ -282,7 +282,7 @@ struct StrChrOpt : public LibCallOptimization {
struct StrRChrOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Verify the "strrchr" function prototype.
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
FT->getReturnType() != B.getInt8PtrTy() ||
FT->getParamType(0) != FT->getReturnType() ||
@@ -323,7 +323,7 @@ struct StrRChrOpt : public LibCallOptimization {
struct StrCmpOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Verify the "strcmp" function prototype.
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
!FT->getReturnType()->isIntegerTy(32) ||
FT->getParamType(0) != FT->getParamType(1) ||
@@ -338,16 +338,17 @@ struct StrCmpOpt : public LibCallOptimization {
bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
bool HasStr2 = GetConstantStringInfo(Str2P, Str2);
- if (HasStr1 && Str1.empty()) // strcmp("", x) -> *x
- return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType());
-
- if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
- return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
-
// strcmp(x, y) -> cnst (if both x and y are constant strings)
if (HasStr1 && HasStr2)
return ConstantInt::get(CI->getType(),
- strcmp(Str1.c_str(),Str2.c_str()));
+ StringRef(Str1).compare(Str2));
+
+ if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x
+ return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
+ CI->getType()));
+
+ if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
+ return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
// strcmp(P, "x") -> memcmp(P, "x", 2)
uint64_t Len1 = GetStringLength(Str1P);
@@ -371,7 +372,7 @@ struct StrCmpOpt : public LibCallOptimization {
struct StrNCmpOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Verify the "strncmp" function prototype.
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 ||
!FT->getReturnType()->isIntegerTy(32) ||
FT->getParamType(0) != FT->getParamType(1) ||
@@ -400,16 +401,20 @@ struct StrNCmpOpt : public LibCallOptimization {
bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
bool HasStr2 = GetConstantStringInfo(Str2P, Str2);
- if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> *x
- return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType());
+ // strncmp(x, y) -> cnst (if both x and y are constant strings)
+ if (HasStr1 && HasStr2) {
+ StringRef SubStr1 = StringRef(Str1).substr(0, Length);
+ StringRef SubStr2 = StringRef(Str2).substr(0, Length);
+ return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2));
+ }
+
+ if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x
+ return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
+ CI->getType()));
if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
- // strncmp(x, y) -> cnst (if both x and y are constant strings)
- if (HasStr1 && HasStr2)
- return ConstantInt::get(CI->getType(),
- strncmp(Str1.c_str(), Str2.c_str(), Length));
return 0;
}
};
@@ -426,7 +431,7 @@ struct StrCpyOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Verify the "strcpy" function prototype.
unsigned NumParams = OptChkCall ? 3 : 2;
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != NumParams ||
FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
@@ -462,7 +467,7 @@ struct StrCpyOpt : public LibCallOptimization {
struct StrNCpyOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != B.getInt8PtrTy() ||
@@ -511,7 +516,7 @@ struct StrNCpyOpt : public LibCallOptimization {
struct StrLenOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 1 ||
FT->getParamType(0) != B.getInt8PtrTy() ||
!FT->getReturnType()->isIntegerTy())
@@ -537,7 +542,7 @@ struct StrLenOpt : public LibCallOptimization {
struct StrPBrkOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
FT->getParamType(0) != B.getInt8PtrTy() ||
FT->getParamType(1) != FT->getParamType(0) ||
@@ -575,7 +580,7 @@ struct StrPBrkOpt : public LibCallOptimization {
struct StrToOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy())
@@ -597,7 +602,7 @@ struct StrToOpt : public LibCallOptimization {
struct StrSpnOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
FT->getParamType(0) != B.getInt8PtrTy() ||
FT->getParamType(1) != FT->getParamType(0) ||
@@ -626,7 +631,7 @@ struct StrSpnOpt : public LibCallOptimization {
struct StrCSpnOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
FT->getParamType(0) != B.getInt8PtrTy() ||
FT->getParamType(1) != FT->getParamType(0) ||
@@ -658,7 +663,7 @@ struct StrCSpnOpt : public LibCallOptimization {
struct StrStrOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
@@ -722,7 +727,7 @@ struct StrStrOpt : public LibCallOptimization {
struct MemCmpOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
!FT->getReturnType()->isIntegerTy(32))
@@ -773,7 +778,7 @@ struct MemCpyOpt : public LibCallOptimization {
// These optimizations require TargetData.
if (!TD) return 0;
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
@@ -795,7 +800,7 @@ struct MemMoveOpt : public LibCallOptimization {
// These optimizations require TargetData.
if (!TD) return 0;
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
@@ -817,7 +822,7 @@ struct MemSetOpt : public LibCallOptimization {
// These optimizations require TargetData.
if (!TD) return 0;
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isIntegerTy() ||
@@ -840,7 +845,7 @@ struct MemSetOpt : public LibCallOptimization {
struct PowOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
// Just make sure this has 2 arguments of the same FP type, which match the
// result type.
if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
@@ -874,8 +879,8 @@ struct PowOpt : public LibCallOptimization {
Callee->getAttributes());
Value *FAbs = EmitUnaryFloatFnCall(Sqrt, "fabs", B,
Callee->getAttributes());
- Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf, "tmp");
- Value *Sel = B.CreateSelect(FCmp, Inf, FAbs, "tmp");
+ Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf);
+ Value *Sel = B.CreateSelect(FCmp, Inf, FAbs);
return Sel;
}
@@ -895,7 +900,7 @@ struct PowOpt : public LibCallOptimization {
struct Exp2Opt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
// Just make sure this has 1 argument of FP type, which matches the
// result type.
if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
@@ -908,10 +913,10 @@ struct Exp2Opt : public LibCallOptimization {
Value *LdExpArg = 0;
if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
- LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty(), "tmp");
+ LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty());
} else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
- LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty(), "tmp");
+ LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty());
}
if (LdExpArg) {
@@ -946,7 +951,7 @@ struct Exp2Opt : public LibCallOptimization {
struct UnaryDoubleFPOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
!FT->getParamType(0)->isDoubleTy())
return 0;
@@ -973,7 +978,7 @@ struct UnaryDoubleFPOpt : public LibCallOptimization {
struct FFSOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
// Just make sure this has 2 arguments of the same FP type, which match the
// result type.
if (FT->getNumParams() != 1 ||
@@ -996,10 +1001,10 @@ struct FFSOpt : public LibCallOptimization {
Value *F = Intrinsic::getDeclaration(Callee->getParent(),
Intrinsic::cttz, ArgType);
Value *V = B.CreateCall(F, Op, "cttz");
- V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp");
- V = B.CreateIntCast(V, B.getInt32Ty(), false, "tmp");
+ V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
+ V = B.CreateIntCast(V, B.getInt32Ty(), false);
- Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp");
+ Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType));
return B.CreateSelect(Cond, V, B.getInt32(0));
}
};
@@ -1009,7 +1014,7 @@ struct FFSOpt : public LibCallOptimization {
struct IsDigitOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
// We require integer(i32)
if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
!FT->getParamType(0)->isIntegerTy(32))
@@ -1028,7 +1033,7 @@ struct IsDigitOpt : public LibCallOptimization {
struct IsAsciiOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
// We require integer(i32)
if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
!FT->getParamType(0)->isIntegerTy(32))
@@ -1046,7 +1051,7 @@ struct IsAsciiOpt : public LibCallOptimization {
struct AbsOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
// We require integer(integer) where the types agree.
if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
FT->getParamType(0) != FT->getReturnType())
@@ -1067,7 +1072,7 @@ struct AbsOpt : public LibCallOptimization {
struct ToAsciiOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
// We require i32(i32)
if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isIntegerTy(32))
@@ -1147,7 +1152,7 @@ struct PrintFOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Require one fixed pointer argument and an integer/void result.
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
!(FT->getReturnType()->isIntegerTy() ||
FT->getReturnType()->isVoidTy()))
@@ -1241,7 +1246,7 @@ struct SPrintFOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Require two fixed pointer arguments and an integer result.
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
!FT->getReturnType()->isIntegerTy())
@@ -1272,7 +1277,7 @@ struct SPrintFOpt : public LibCallOptimization {
struct FWriteOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Require a pointer, an integer, an integer, a pointer, returning integer.
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isIntegerTy() ||
!FT->getParamType(2)->isIntegerTy() ||
@@ -1310,7 +1315,7 @@ struct FPutsOpt : public LibCallOptimization {
if (!TD) return 0;
// Require two pointers. Also, we can't optimize if return value is used.
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
!CI->use_empty())
@@ -1379,7 +1384,7 @@ struct FPrintFOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Require two fixed paramters as pointers and integer result.
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
!FT->getReturnType()->isIntegerTy())
@@ -1410,7 +1415,7 @@ struct FPrintFOpt : public LibCallOptimization {
struct PutsOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Require one fixed pointer argument and an integer/void result.
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
!(FT->getReturnType()->isIntegerTy() ||
FT->getReturnType()->isVoidTy()))
@@ -1685,7 +1690,7 @@ void SimplifyLibCalls::setDoesNotAlias(Function &F, unsigned n) {
void SimplifyLibCalls::inferPrototypeAttributes(Function &F) {
- const FunctionType *FTy = F.getFunctionType();
+ FunctionType *FTy = F.getFunctionType();
StringRef Name = F.getName();
switch (Name[0]) {
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index 705f442..c83f56c 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -153,9 +153,13 @@ bool Sinking::ProcessBlock(BasicBlock &BB) {
static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
SmallPtrSet<Instruction *, 8> &Stores) {
- if (LoadInst *L = dyn_cast<LoadInst>(Inst)) {
- if (L->isVolatile()) return false;
+ if (Inst->mayWriteToMemory()) {
+ Stores.insert(Inst);
+ return false;
+ }
+
+ if (LoadInst *L = dyn_cast<LoadInst>(Inst)) {
AliasAnalysis::Location Loc = AA->getLocation(L);
for (SmallPtrSet<Instruction *, 8>::iterator I = Stores.begin(),
E = Stores.end(); I != E; ++I)
@@ -163,11 +167,6 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
return false;
}
- if (Inst->mayWriteToMemory()) {
- Stores.insert(Inst);
- return false;
- }
-
if (isa<TerminatorInst>(Inst) || isa<PHINode>(Inst))
return false;
diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp
deleted file mode 100644
index 9dd83c0..0000000
--- a/lib/Transforms/Scalar/TailDuplication.cpp
+++ /dev/null
@@ -1,373 +0,0 @@
-//===- TailDuplication.cpp - Simplify CFG through tail duplication --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass performs a limited form of tail duplication, intended to simplify
-// CFGs by removing some unconditional branches. This pass is necessary to
-// straighten out loops created by the C front-end, but also is capable of
-// making other code nicer. After this pass is run, the CFG simplify pass
-// should be run to clean up the mess.
-//
-// This pass could be enhanced in the future to use profile information to be
-// more aggressive.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "tailduplicate"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Constant.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Pass.h"
-#include "llvm/Type.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include <map>
-using namespace llvm;
-
-STATISTIC(NumEliminated, "Number of unconditional branches eliminated");
-
-static cl::opt<unsigned>
-TailDupThreshold("taildup-threshold",
- cl::desc("Max block size to tail duplicate"),
- cl::init(1), cl::Hidden);
-
-namespace {
- class TailDup : public FunctionPass {
- bool runOnFunction(Function &F);
- public:
- static char ID; // Pass identification, replacement for typeid
- TailDup() : FunctionPass(ID) {
- initializeTailDupPass(*PassRegistry::getPassRegistry());
- }
-
- private:
- inline bool shouldEliminateUnconditionalBranch(TerminatorInst *, unsigned);
- inline void eliminateUnconditionalBranch(BranchInst *BI);
- SmallPtrSet<BasicBlock*, 4> CycleDetector;
- };
-}
-
-char TailDup::ID = 0;
-INITIALIZE_PASS(TailDup, "tailduplicate", "Tail Duplication", false, false)
-
-// Public interface to the Tail Duplication pass
-FunctionPass *llvm::createTailDuplicationPass() { return new TailDup(); }
-
-/// runOnFunction - Top level algorithm - Loop over each unconditional branch in
-/// the function, eliminating it if it looks attractive enough. CycleDetector
-/// prevents infinite loops by checking that we aren't redirecting a branch to
-/// a place it already pointed to earlier; see PR 2323.
-bool TailDup::runOnFunction(Function &F) {
- bool Changed = false;
- CycleDetector.clear();
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
- if (shouldEliminateUnconditionalBranch(I->getTerminator(),
- TailDupThreshold)) {
- eliminateUnconditionalBranch(cast<BranchInst>(I->getTerminator()));
- Changed = true;
- } else {
- ++I;
- CycleDetector.clear();
- }
- }
- return Changed;
-}
-
-/// shouldEliminateUnconditionalBranch - Return true if this branch looks
-/// attractive to eliminate. We eliminate the branch if the destination basic
-/// block has <= 5 instructions in it, not counting PHI nodes. In practice,
-/// since one of these is a terminator instruction, this means that we will add
-/// up to 4 instructions to the new block.
-///
-/// We don't count PHI nodes in the count since they will be removed when the
-/// contents of the block are copied over.
-///
-bool TailDup::shouldEliminateUnconditionalBranch(TerminatorInst *TI,
- unsigned Threshold) {
- BranchInst *BI = dyn_cast<BranchInst>(TI);
- if (!BI || !BI->isUnconditional()) return false; // Not an uncond branch!
-
- BasicBlock *Dest = BI->getSuccessor(0);
- if (Dest == BI->getParent()) return false; // Do not loop infinitely!
-
- // Do not inline a block if we will just get another branch to the same block!
- TerminatorInst *DTI = Dest->getTerminator();
- if (BranchInst *DBI = dyn_cast<BranchInst>(DTI))
- if (DBI->isUnconditional() && DBI->getSuccessor(0) == Dest)
- return false; // Do not loop infinitely!
-
- // FIXME: DemoteRegToStack cannot yet demote invoke instructions to the stack,
- // because doing so would require breaking critical edges. This should be
- // fixed eventually.
- if (!DTI->use_empty())
- return false;
-
- // Do not bother with blocks with only a single predecessor: simplify
- // CFG will fold these two blocks together!
- pred_iterator PI = pred_begin(Dest), PE = pred_end(Dest);
- ++PI;
- if (PI == PE) return false; // Exactly one predecessor!
-
- BasicBlock::iterator I = Dest->getFirstNonPHI();
-
- for (unsigned Size = 0; I != Dest->end(); ++I) {
- if (Size == Threshold) return false; // The block is too large.
-
- // Don't tail duplicate call instructions. They are very large compared to
- // other instructions.
- if (isa<CallInst>(I) || isa<InvokeInst>(I)) return false;
-
- // Also alloca and malloc.
- if (isa<AllocaInst>(I)) return false;
-
- // Some vector instructions can expand into a number of instructions.
- if (isa<ShuffleVectorInst>(I) || isa<ExtractElementInst>(I) ||
- isa<InsertElementInst>(I)) return false;
-
- // Only count instructions that are not debugger intrinsics.
- if (!isa<DbgInfoIntrinsic>(I)) ++Size;
- }
-
- // Do not tail duplicate a block that has thousands of successors into a block
- // with a single successor if the block has many other predecessors. This can
- // cause an N^2 explosion in CFG edges (and PHI node entries), as seen in
- // cases that have a large number of indirect gotos.
- unsigned NumSuccs = DTI->getNumSuccessors();
- if (NumSuccs > 8) {
- unsigned TooMany = 128;
- if (NumSuccs >= TooMany) return false;
- TooMany = TooMany/NumSuccs;
- for (; PI != PE; ++PI)
- if (TooMany-- == 0) return false;
- }
-
- // If this unconditional branch is a fall-through, be careful about
- // tail duplicating it. In particular, we don't want to taildup it if the
- // original block will still be there after taildup is completed: doing so
- // would eliminate the fall-through, requiring unconditional branches.
- Function::iterator DestI = Dest;
- if (&*--DestI == BI->getParent()) {
- // The uncond branch is a fall-through. Tail duplication of the block is
- // will eliminate the fall-through-ness and end up cloning the terminator
- // at the end of the Dest block. Since the original Dest block will
- // continue to exist, this means that one or the other will not be able to
- // fall through. One typical example that this helps with is code like:
- // if (a)
- // foo();
- // if (b)
- // foo();
- // Cloning the 'if b' block into the end of the first foo block is messy.
-
- // The messy case is when the fall-through block falls through to other
- // blocks. This is what we would be preventing if we cloned the block.
- DestI = Dest;
- if (++DestI != Dest->getParent()->end()) {
- BasicBlock *DestSucc = DestI;
- // If any of Dest's successors are fall-throughs, don't do this xform.
- for (succ_iterator SI = succ_begin(Dest), SE = succ_end(Dest);
- SI != SE; ++SI)
- if (*SI == DestSucc)
- return false;
- }
- }
-
- // Finally, check that we haven't redirected to this target block earlier;
- // there are cases where we loop forever if we don't check this (PR 2323).
- if (!CycleDetector.insert(Dest))
- return false;
-
- return true;
-}
-
-/// FindObviousSharedDomOf - We know there is a branch from SrcBlock to
-/// DestBlock, and that SrcBlock is not the only predecessor of DstBlock. If we
-/// can find a predecessor of SrcBlock that is a dominator of both SrcBlock and
-/// DstBlock, return it.
-static BasicBlock *FindObviousSharedDomOf(BasicBlock *SrcBlock,
- BasicBlock *DstBlock) {
- // SrcBlock must have a single predecessor.
- pred_iterator PI = pred_begin(SrcBlock), PE = pred_end(SrcBlock);
- if (PI == PE || ++PI != PE) return 0;
-
- BasicBlock *SrcPred = *pred_begin(SrcBlock);
-
- // Look at the predecessors of DstBlock. One of them will be SrcBlock. If
- // there is only one other pred, get it, otherwise we can't handle it.
- PI = pred_begin(DstBlock); PE = pred_end(DstBlock);
- BasicBlock *DstOtherPred = 0;
- BasicBlock *P = *PI;
- if (P == SrcBlock) {
- if (++PI == PE) return 0;
- DstOtherPred = *PI;
- if (++PI != PE) return 0;
- } else {
- DstOtherPred = P;
- if (++PI == PE || *PI != SrcBlock || ++PI != PE) return 0;
- }
-
- // We can handle two situations here: "if then" and "if then else" blocks. An
- // 'if then' situation is just where DstOtherPred == SrcPred.
- if (DstOtherPred == SrcPred)
- return SrcPred;
-
- // Check to see if we have an "if then else" situation, which means that
- // DstOtherPred will have a single predecessor and it will be SrcPred.
- PI = pred_begin(DstOtherPred); PE = pred_end(DstOtherPred);
- if (PI != PE && *PI == SrcPred) {
- if (++PI != PE) return 0; // Not a single pred.
- return SrcPred; // Otherwise, it's an "if then" situation. Return the if.
- }
-
- // Otherwise, this is something we can't handle.
- return 0;
-}
-
-
-/// eliminateUnconditionalBranch - Clone the instructions from the destination
-/// block into the source block, eliminating the specified unconditional branch.
-/// If the destination block defines values used by successors of the dest
-/// block, we may need to insert PHI nodes.
-///
-void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
- BasicBlock *SourceBlock = Branch->getParent();
- BasicBlock *DestBlock = Branch->getSuccessor(0);
- assert(SourceBlock != DestBlock && "Our predicate is broken!");
-
- DEBUG(dbgs() << "TailDuplication[" << SourceBlock->getParent()->getName()
- << "]: Eliminating branch: " << *Branch);
-
- // See if we can avoid duplicating code by moving it up to a dominator of both
- // blocks.
- if (BasicBlock *DomBlock = FindObviousSharedDomOf(SourceBlock, DestBlock)) {
- DEBUG(dbgs() << "Found shared dominator: " << DomBlock->getName() << "\n");
-
- // If there are non-phi instructions in DestBlock that have no operands
- // defined in DestBlock, and if the instruction has no side effects, we can
- // move the instruction to DomBlock instead of duplicating it.
- BasicBlock::iterator BBI = DestBlock->getFirstNonPHI();
- while (!isa<TerminatorInst>(BBI)) {
- Instruction *I = BBI++;
-
- bool CanHoist = I->isSafeToSpeculativelyExecute() &&
- !I->mayReadFromMemory();
- if (CanHoist) {
- for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
- if (Instruction *OpI = dyn_cast<Instruction>(I->getOperand(op)))
- if (OpI->getParent() == DestBlock ||
- (isa<InvokeInst>(OpI) && OpI->getParent() == DomBlock)) {
- CanHoist = false;
- break;
- }
- if (CanHoist) {
- // Remove from DestBlock, move right before the term in DomBlock.
- DestBlock->getInstList().remove(I);
- DomBlock->getInstList().insert(DomBlock->getTerminator(), I);
- DEBUG(dbgs() << "Hoisted: " << *I);
- }
- }
- }
- }
-
- // Tail duplication can not update SSA properties correctly if the values
- // defined in the duplicated tail are used outside of the tail itself. For
- // this reason, we spill all values that are used outside of the tail to the
- // stack.
- for (BasicBlock::iterator I = DestBlock->begin(); I != DestBlock->end(); ++I)
- if (I->isUsedOutsideOfBlock(DestBlock)) {
- // We found a use outside of the tail. Create a new stack slot to
- // break this inter-block usage pattern.
- DemoteRegToStack(*I);
- }
-
- // We are going to have to map operands from the original block B to the new
- // copy of the block B'. If there are PHI nodes in the DestBlock, these PHI
- // nodes also define part of this mapping. Loop over these PHI nodes, adding
- // them to our mapping.
- //
- std::map<Value*, Value*> ValueMapping;
-
- BasicBlock::iterator BI = DestBlock->begin();
- bool HadPHINodes = isa<PHINode>(BI);
- for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
- ValueMapping[PN] = PN->getIncomingValueForBlock(SourceBlock);
-
- // Clone the non-phi instructions of the dest block into the source block,
- // keeping track of the mapping...
- //
- for (; BI != DestBlock->end(); ++BI) {
- Instruction *New = BI->clone();
- New->setName(BI->getName());
- SourceBlock->getInstList().push_back(New);
- ValueMapping[BI] = New;
- }
-
- // Now that we have built the mapping information and cloned all of the
- // instructions (giving us a new terminator, among other things), walk the new
- // instructions, rewriting references of old instructions to use new
- // instructions.
- //
- BI = Branch; ++BI; // Get an iterator to the first new instruction
- for (; BI != SourceBlock->end(); ++BI)
- for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) {
- std::map<Value*, Value*>::const_iterator I =
- ValueMapping.find(BI->getOperand(i));
- if (I != ValueMapping.end())
- BI->setOperand(i, I->second);
- }
-
- // Next we check to see if any of the successors of DestBlock had PHI nodes.
- // If so, we need to add entries to the PHI nodes for SourceBlock now.
- for (succ_iterator SI = succ_begin(DestBlock), SE = succ_end(DestBlock);
- SI != SE; ++SI) {
- BasicBlock *Succ = *SI;
- for (BasicBlock::iterator PNI = Succ->begin(); isa<PHINode>(PNI); ++PNI) {
- PHINode *PN = cast<PHINode>(PNI);
- // Ok, we have a PHI node. Figure out what the incoming value was for the
- // DestBlock.
- Value *IV = PN->getIncomingValueForBlock(DestBlock);
-
- // Remap the value if necessary...
- std::map<Value*, Value*>::const_iterator I = ValueMapping.find(IV);
- if (I != ValueMapping.end())
- IV = I->second;
- PN->addIncoming(IV, SourceBlock);
- }
- }
-
- // Next, remove the old branch instruction, and any PHI node entries that we
- // had.
- BI = Branch; ++BI; // Get an iterator to the first new instruction
- DestBlock->removePredecessor(SourceBlock); // Remove entries in PHI nodes...
- SourceBlock->getInstList().erase(Branch); // Destroy the uncond branch...
-
- // Final step: now that we have finished everything up, walk the cloned
- // instructions one last time, constant propagating and DCE'ing them, because
- // they may not be needed anymore.
- //
- if (HadPHINodes) {
- while (BI != SourceBlock->end()) {
- Instruction *Inst = BI++;
- if (isInstructionTriviallyDead(Inst))
- Inst->eraseFromParent();
- else if (Value *V = SimplifyInstruction(Inst)) {
- Inst->replaceAllUsesWith(V);
- Inst->eraseFromParent();
- }
- }
- }
-
- ++NumEliminated; // We just killed a branch!
-}
diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp
index be7bed1..8e5a1eb 100644
--- a/lib/Transforms/Utils/AddrModeMatcher.cpp
+++ b/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -222,7 +222,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
const TargetData *TD = TLI.getTargetData();
gep_type_iterator GTI = gep_type_begin(AddrInst);
for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
- if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
const StructLayout *SL = TD->getStructLayout(STy);
unsigned Idx =
cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
@@ -557,7 +557,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
Value *Address = User->getOperand(OpNo);
if (!Address->getType()->isPointerTy())
return false;
- const Type *AddressAccessTy =
+ Type *AddressAccessTy =
cast<PointerType>(Address->getType())->getElementType();
// Do a match against the root of this address, ignoring profitability. This
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index b4f74f9..a7f9efd 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -287,7 +287,7 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
///
BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
BasicBlock::iterator SplitIt = SplitPt;
- while (isa<PHINode>(SplitIt))
+ while (isa<PHINode>(SplitIt) || isa<LandingPadInst>(SplitIt))
++SplitIt;
BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split");
@@ -299,138 +299,114 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
// Old dominates New. New node dominates all other nodes dominated by Old.
- DomTreeNode *OldNode = DT->getNode(Old);
- std::vector<DomTreeNode *> Children;
- for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end();
- I != E; ++I)
- Children.push_back(*I);
+ if (DomTreeNode *OldNode = DT->getNode(Old)) {
+ std::vector<DomTreeNode *> Children;
+ for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end();
+ I != E; ++I)
+ Children.push_back(*I);
DomTreeNode *NewNode = DT->addNewBlock(New,Old);
for (std::vector<DomTreeNode *>::iterator I = Children.begin(),
E = Children.end(); I != E; ++I)
DT->changeImmediateDominator(*I, NewNode);
+ }
}
return New;
}
+/// UpdateAnalysisInformation - Update DominatorTree, LoopInfo, and LCCSA
+/// analysis information.
+static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
+ ArrayRef<BasicBlock *> Preds,
+ Pass *P, bool &HasLoopExit) {
+ if (!P) return;
-/// SplitBlockPredecessors - This method transforms BB by introducing a new
-/// basic block into the function, and moving some of the predecessors of BB to
-/// be predecessors of the new block. The new predecessors are indicated by the
-/// Preds array, which has NumPreds elements in it. The new block is given a
-/// suffix of 'Suffix'.
-///
-/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
-/// LoopInfo, and LCCSA but no other analyses. In particular, it does not
-/// preserve LoopSimplify (because it's complicated to handle the case where one
-/// of the edges being split is an exit of a loop with other exits).
-///
-BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
- BasicBlock *const *Preds,
- unsigned NumPreds, const char *Suffix,
- Pass *P) {
- // Create new basic block, insert right before the original block.
- BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix,
- BB->getParent(), BB);
-
- // The new block unconditionally branches to the old block.
- BranchInst *BI = BranchInst::Create(BB, NewBB);
-
- LoopInfo *LI = P ? P->getAnalysisIfAvailable<LoopInfo>() : 0;
- Loop *L = LI ? LI->getLoopFor(BB) : 0;
- bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID);
+ LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
+ Loop *L = LI ? LI->getLoopFor(OldBB) : 0;
- // Move the edges from Preds to point to NewBB instead of BB.
- // While here, if we need to preserve loop analyses, collect
- // some information about how this split will affect loops.
- bool HasLoopExit = false;
+ // If we need to preserve loop analyses, collect some information about how
+ // this split will affect loops.
bool IsLoopEntry = !!L;
bool SplitMakesNewLoopHeader = false;
- for (unsigned i = 0; i != NumPreds; ++i) {
- // This is slightly more strict than necessary; the minimum requirement
- // is that there be no more than one indirectbr branching to BB. And
- // all BlockAddress uses would need to be updated.
- assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
- "Cannot split an edge from an IndirectBrInst");
-
- Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
-
- if (LI) {
- // If we need to preserve LCSSA, determine if any of
- // the preds is a loop exit.
+ if (LI) {
+ bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID);
+ for (ArrayRef<BasicBlock*>::iterator
+ i = Preds.begin(), e = Preds.end(); i != e; ++i) {
+ BasicBlock *Pred = *i;
+
+ // If we need to preserve LCSSA, determine if any of the preds is a loop
+ // exit.
if (PreserveLCSSA)
- if (Loop *PL = LI->getLoopFor(Preds[i]))
- if (!PL->contains(BB))
+ if (Loop *PL = LI->getLoopFor(Pred))
+ if (!PL->contains(OldBB))
HasLoopExit = true;
- // If we need to preserve LoopInfo, note whether any of the
- // preds crosses an interesting loop boundary.
- if (L) {
- if (L->contains(Preds[i]))
- IsLoopEntry = false;
- else
- SplitMakesNewLoopHeader = true;
- }
+
+ // If we need to preserve LoopInfo, note whether any of the preds crosses
+ // an interesting loop boundary.
+ if (!L) continue;
+ if (L->contains(Pred))
+ IsLoopEntry = false;
+ else
+ SplitMakesNewLoopHeader = true;
}
}
// Update dominator tree if available.
- DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0;
+ DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
if (DT)
DT->splitBlock(NewBB);
- // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
- // node becomes an incoming value for BB's phi node. However, if the Preds
- // list is empty, we need to insert dummy entries into the PHI nodes in BB to
- // account for the newly created predecessor.
- if (NumPreds == 0) {
- // Insert dummy values as the incoming value.
- for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I)
- cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);
- return NewBB;
+ if (!L) return;
+
+ if (IsLoopEntry) {
+ // Add the new block to the nearest enclosing loop (and not an adjacent
+ // loop). To find this, examine each of the predecessors and determine which
+ // loops enclose them, and select the most-nested loop which contains the
+ // loop containing the block being split.
+ Loop *InnermostPredLoop = 0;
+ for (ArrayRef<BasicBlock*>::iterator
+ i = Preds.begin(), e = Preds.end(); i != e; ++i) {
+ BasicBlock *Pred = *i;
+ if (Loop *PredLoop = LI->getLoopFor(Pred)) {
+ // Seek a loop which actually contains the block being split (to avoid
+ // adjacent loops).
+ while (PredLoop && !PredLoop->contains(OldBB))
+ PredLoop = PredLoop->getParentLoop();
+
+ // Select the most-nested of these loops which contains the block.
+ if (PredLoop && PredLoop->contains(OldBB) &&
+ (!InnermostPredLoop ||
+ InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth()))
+ InnermostPredLoop = PredLoop;
+ }
+ }
+
+ if (InnermostPredLoop)
+ InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ } else {
+ L->addBasicBlockToLoop(NewBB, LI->getBase());
+ if (SplitMakesNewLoopHeader)
+ L->moveToHeader(NewBB);
}
+}
+/// UpdatePHINodes - Update the PHI nodes in OrigBB to include the values coming
+/// from NewBB. This also updates AliasAnalysis, if available.
+static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
+ ArrayRef<BasicBlock*> Preds, BranchInst *BI,
+ Pass *P, bool HasLoopExit) {
+ // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB.
AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0;
-
- if (L) {
- if (IsLoopEntry) {
- // Add the new block to the nearest enclosing loop (and not an
- // adjacent loop). To find this, examine each of the predecessors and
- // determine which loops enclose them, and select the most-nested loop
- // which contains the loop containing the block being split.
- Loop *InnermostPredLoop = 0;
- for (unsigned i = 0; i != NumPreds; ++i)
- if (Loop *PredLoop = LI->getLoopFor(Preds[i])) {
- // Seek a loop which actually contains the block being split (to
- // avoid adjacent loops).
- while (PredLoop && !PredLoop->contains(BB))
- PredLoop = PredLoop->getParentLoop();
- // Select the most-nested of these loops which contains the block.
- if (PredLoop &&
- PredLoop->contains(BB) &&
- (!InnermostPredLoop ||
- InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth()))
- InnermostPredLoop = PredLoop;
- }
- if (InnermostPredLoop)
- InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase());
- } else {
- L->addBasicBlockToLoop(NewBB, LI->getBase());
- if (SplitMakesNewLoopHeader)
- L->moveToHeader(NewBB);
- }
- }
-
- // Otherwise, create a new PHI node in NewBB for each PHI node in BB.
- for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ) {
+ for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) {
PHINode *PN = cast<PHINode>(I++);
-
+
// Check to see if all of the values coming in are the same. If so, we
// don't need to create a new PHI node, unless it's needed for LCSSA.
Value *InVal = 0;
if (!HasLoopExit) {
InVal = PN->getIncomingValueForBlock(Preds[0]);
- for (unsigned i = 1; i != NumPreds; ++i)
+ for (unsigned i = 1, e = Preds.size(); i != e; ++i)
if (InVal != PN->getIncomingValueForBlock(Preds[i])) {
InVal = 0;
break;
@@ -441,31 +417,191 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
// If all incoming values for the new PHI would be the same, just don't
// make a new PHI. Instead, just remove the incoming values from the old
// PHI.
- for (unsigned i = 0; i != NumPreds; ++i)
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i)
PN->removeIncomingValue(Preds[i], false);
} else {
// If the values coming into the block are not the same, we need a PHI.
// Create the new PHI node, insert it into NewBB at the end of the block
PHINode *NewPHI =
- PHINode::Create(PN->getType(), NumPreds, PN->getName()+".ph", BI);
+ PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI);
if (AA) AA->copyValue(PN, NewPHI);
// Move all of the PHI values for 'Preds' to the new PHI.
- for (unsigned i = 0; i != NumPreds; ++i) {
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
Value *V = PN->removeIncomingValue(Preds[i], false);
NewPHI->addIncoming(V, Preds[i]);
}
+
InVal = NewPHI;
}
-
+
// Add an incoming value to the PHI node in the loop for the preheader
// edge.
PN->addIncoming(InVal, NewBB);
}
+}
+
+/// SplitBlockPredecessors - This method transforms BB by introducing a new
+/// basic block into the function, and moving some of the predecessors of BB to
+/// be predecessors of the new block. The new predecessors are indicated by the
+/// Preds array, which has NumPreds elements in it. The new block is given a
+/// suffix of 'Suffix'.
+///
+/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
+/// LoopInfo, and LCCSA but no other analyses. In particular, it does not
+/// preserve LoopSimplify (because it's complicated to handle the case where one
+/// of the edges being split is an exit of a loop with other exits).
+///
+BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
+ BasicBlock *const *Preds,
+ unsigned NumPreds, const char *Suffix,
+ Pass *P) {
+ // Create new basic block, insert right before the original block.
+ BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix,
+ BB->getParent(), BB);
+ // The new block unconditionally branches to the old block.
+ BranchInst *BI = BranchInst::Create(BB, NewBB);
+
+ // Move the edges from Preds to point to NewBB instead of BB.
+ for (unsigned i = 0; i != NumPreds; ++i) {
+ // This is slightly more strict than necessary; the minimum requirement
+ // is that there be no more than one indirectbr branching to BB. And
+ // all BlockAddress uses would need to be updated.
+ assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
+ "Cannot split an edge from an IndirectBrInst");
+ Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
+ }
+
+ // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
+ // node becomes an incoming value for BB's phi node. However, if the Preds
+ // list is empty, we need to insert dummy entries into the PHI nodes in BB to
+ // account for the newly created predecessor.
+ if (NumPreds == 0) {
+ // Insert dummy values as the incoming value.
+ for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I)
+ cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);
+ return NewBB;
+ }
+
+ // Update DominatorTree, LoopInfo, and LCCSA analysis information.
+ bool HasLoopExit = false;
+ UpdateAnalysisInformation(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds),
+ P, HasLoopExit);
+
+ // Update the PHI nodes in BB with the values coming from NewBB.
+ UpdatePHINodes(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds), BI,
+ P, HasLoopExit);
return NewBB;
}
+/// SplitLandingPadPredecessors - This method transforms the landing pad,
+/// OrigBB, by introducing two new basic blocks into the function. One of those
+/// new basic blocks gets the predecessors listed in Preds. The other basic
+/// block gets the remaining predecessors of OrigBB. The landingpad instruction
+/// OrigBB is clone into both of the new basic blocks. The new blocks are given
+/// the suffixes 'Suffix1' and 'Suffix2', and are returned in the NewBBs vector.
+///
+/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
+/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. In particular,
+/// it does not preserve LoopSimplify (because it's complicated to handle the
+/// case where one of the edges being split is an exit of a loop with other
+/// exits).
+///
+void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
+ ArrayRef<BasicBlock*> Preds,
+ const char *Suffix1, const char *Suffix2,
+ Pass *P,
+ SmallVectorImpl<BasicBlock*> &NewBBs) {
+ assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!");
+
+ // Create a new basic block for OrigBB's predecessors listed in Preds. Insert
+ // it right before the original block.
+ BasicBlock *NewBB1 = BasicBlock::Create(OrigBB->getContext(),
+ OrigBB->getName() + Suffix1,
+ OrigBB->getParent(), OrigBB);
+ NewBBs.push_back(NewBB1);
+
+ // The new block unconditionally branches to the old block.
+ BranchInst *BI1 = BranchInst::Create(OrigBB, NewBB1);
+
+ // Move the edges from Preds to point to NewBB1 instead of OrigBB.
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+ // This is slightly more strict than necessary; the minimum requirement
+ // is that there be no more than one indirectbr branching to BB. And
+ // all BlockAddress uses would need to be updated.
+ assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
+ "Cannot split an edge from an IndirectBrInst");
+ Preds[i]->getTerminator()->replaceUsesOfWith(OrigBB, NewBB1);
+ }
+
+ // Update DominatorTree, LoopInfo, and LCCSA analysis information.
+ bool HasLoopExit = false;
+ UpdateAnalysisInformation(OrigBB, NewBB1, Preds, P, HasLoopExit);
+
+ // Update the PHI nodes in OrigBB with the values coming from NewBB1.
+ UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, P, HasLoopExit);
+
+ // Move the remaining edges from OrigBB to point to NewBB2.
+ SmallVector<BasicBlock*, 8> NewBB2Preds;
+ for (pred_iterator i = pred_begin(OrigBB), e = pred_end(OrigBB);
+ i != e; ) {
+ BasicBlock *Pred = *i++;
+ if (Pred == NewBB1) continue;
+ assert(!isa<IndirectBrInst>(Pred->getTerminator()) &&
+ "Cannot split an edge from an IndirectBrInst");
+ NewBB2Preds.push_back(Pred);
+ e = pred_end(OrigBB);
+ }
+
+ BasicBlock *NewBB2 = 0;
+ if (!NewBB2Preds.empty()) {
+ // Create another basic block for the rest of OrigBB's predecessors.
+ NewBB2 = BasicBlock::Create(OrigBB->getContext(),
+ OrigBB->getName() + Suffix2,
+ OrigBB->getParent(), OrigBB);
+ NewBBs.push_back(NewBB2);
+
+ // The new block unconditionally branches to the old block.
+ BranchInst *BI2 = BranchInst::Create(OrigBB, NewBB2);
+
+ // Move the remaining edges from OrigBB to point to NewBB2.
+ for (SmallVectorImpl<BasicBlock*>::iterator
+ i = NewBB2Preds.begin(), e = NewBB2Preds.end(); i != e; ++i)
+ (*i)->getTerminator()->replaceUsesOfWith(OrigBB, NewBB2);
+
+ // Update DominatorTree, LoopInfo, and LCCSA analysis information.
+ HasLoopExit = false;
+ UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, P, HasLoopExit);
+
+ // Update the PHI nodes in OrigBB with the values coming from NewBB2.
+ UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, P, HasLoopExit);
+ }
+
+ LandingPadInst *LPad = OrigBB->getLandingPadInst();
+ Instruction *Clone1 = LPad->clone();
+ Clone1->setName(Twine("lpad") + Suffix1);
+ NewBB1->getInstList().insert(NewBB1->getFirstInsertionPt(), Clone1);
+
+ if (NewBB2) {
+ Instruction *Clone2 = LPad->clone();
+ Clone2->setName(Twine("lpad") + Suffix2);
+ NewBB2->getInstList().insert(NewBB2->getFirstInsertionPt(), Clone2);
+
+ // Create a PHI node for the two cloned landingpad instructions.
+ PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad);
+ PN->addIncoming(Clone1, NewBB1);
+ PN->addIncoming(Clone2, NewBB2);
+ LPad->replaceAllUsesWith(PN);
+ LPad->eraseFromParent();
+ } else {
+ // There is no second clone. Just replace the landing pad with the first
+ // clone.
+ LPad->replaceAllUsesWith(Clone1);
+ LPad->eraseFromParent();
+ }
+}
+
/// FindFunctionBackedges - Analyze the specified function to find all of the
/// loop backedges in the function and return them. This is a relatively cheap
/// (compared to computing dominators and loop info) analysis.
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 92ce500..c052910 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -102,7 +102,7 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
++I; // Skip one edge due to the incoming arc from TI.
if (!AllowIdenticalEdges)
return I != E;
-
+
// If AllowIdenticalEdges is true, then we allow this edge to be considered
// non-critical iff all preds come from TI's block.
while (I != E) {
@@ -155,10 +155,10 @@ static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds,
/// This returns the new block if the edge was split, null otherwise.
///
/// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the
-/// specified successor will be merged into the same critical edge block.
-/// This is most commonly interesting with switch instructions, which may
+/// specified successor will be merged into the same critical edge block.
+/// This is most commonly interesting with switch instructions, which may
/// have many edges to any one destination. This ensures that all edges to that
-/// dest go to one block instead of each going to a different block, but isn't
+/// dest go to one block instead of each going to a different block, but isn't
/// the standard definition of a "critical edge".
///
/// It is invalid to call this function on a critical edge that starts at an
@@ -167,15 +167,20 @@ static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds,
/// to.
///
BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
- Pass *P, bool MergeIdenticalEdges) {
+ Pass *P, bool MergeIdenticalEdges,
+ bool DontDeleteUselessPhis) {
if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0;
-
+
assert(!isa<IndirectBrInst>(TI) &&
"Cannot split critical edge from IndirectBrInst");
-
+
BasicBlock *TIBB = TI->getParent();
BasicBlock *DestBB = TI->getSuccessor(SuccNum);
+ // Splitting the critical edge to a landing pad block is non-trivial. Don't do
+ // it in this generic function.
+ if (DestBB->isLandingPad()) return 0;
+
// Create a new basic block, linking it into the CFG.
BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
TIBB->getName() + "." + DestBB->getName() + "_crit_edge");
@@ -190,7 +195,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
Function &F = *TIBB->getParent();
Function::iterator FBBI = TIBB;
F.getBasicBlockList().insert(++FBBI, NewBB);
-
+
// If there are any PHI nodes in DestBB, we need to update them so that they
// merge incoming values from NewBB instead of from TIBB.
{
@@ -207,35 +212,35 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
// happens because the BB list of PHI nodes are usually in the same
// order.
if (PN->getIncomingBlock(BBIdx) != TIBB)
- BBIdx = PN->getBasicBlockIndex(TIBB);
+ BBIdx = PN->getBasicBlockIndex(TIBB);
PN->setIncomingBlock(BBIdx, NewBB);
}
}
-
+
// If there are any other edges from TIBB to DestBB, update those to go
// through the split block, making those edges non-critical as well (and
// reducing the number of phi entries in the DestBB if relevant).
if (MergeIdenticalEdges) {
for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) {
if (TI->getSuccessor(i) != DestBB) continue;
-
+
// Remove an entry for TIBB from DestBB phi nodes.
- DestBB->removePredecessor(TIBB);
-
+ DestBB->removePredecessor(TIBB, DontDeleteUselessPhis);
+
// We found another edge to DestBB, go to NewBB instead.
TI->setSuccessor(i, NewBB);
}
}
-
-
+
+
// If we don't have a pass object, we can't update anything...
if (P == 0) return NewBB;
-
+
DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
-
+
// If we have nothing to update, just return.
if (DT == 0 && LI == 0 && PI == 0)
return NewBB;
@@ -263,7 +268,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
}
bool NewBBDominatesDestBB = true;
-
+
// Should we update DominatorTree information?
if (DT) {
DomTreeNode *TINode = DT->getNode(TIBB);
@@ -274,7 +279,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
if (TINode) { // Don't break unreachable code!
DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB);
DomTreeNode *DestBBNode = 0;
-
+
// If NewBBDominatesDestBB hasn't been computed yet, do so with DT.
if (!OtherPreds.empty()) {
DestBBNode = DT->getNode(DestBB);
@@ -285,7 +290,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
}
OtherPreds.clear();
}
-
+
// If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it
// doesn't dominate anything.
if (NewBBDominatesDestBB) {
@@ -337,6 +342,8 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
}
// For each unique exit block...
+ // FIXME: This code is functionally equivalent to the corresponding
+ // loop in LoopSimplify.
SmallVector<BasicBlock *, 4> ExitBlocks;
TIL->getExitBlocks(ExitBlocks);
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
@@ -348,10 +355,15 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit);
I != E; ++I) {
BasicBlock *P = *I;
- if (TIL->contains(P))
+ if (TIL->contains(P)) {
+ if (isa<IndirectBrInst>(P->getTerminator())) {
+ Preds.clear();
+ break;
+ }
Preds.push_back(P);
- else
+ } else {
HasPredOutsideOfLoop = true;
+ }
}
// If there are any preds not in the loop, we'll need to split
// the edges. The Preds.empty() check is needed because a block
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 14bb17f..4b5f45b 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -58,8 +58,8 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
AttributeWithIndex AWI =
AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
- const Type *I8Ptr = B.getInt8PtrTy();
- const Type *I32Ty = B.getInt32Ty();
+ Type *I8Ptr = B.getInt8PtrTy();
+ Type *I32Ty = B.getInt32Ty();
Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(&AWI, 1),
I8Ptr, I8Ptr, I32Ty, NULL);
CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B),
@@ -102,7 +102,7 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
AttributeWithIndex AWI[2];
AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
- const Type *I8Ptr = B.getInt8PtrTy();
+ Type *I8Ptr = B.getInt8PtrTy();
Value *StrCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI, 2),
I8Ptr, I8Ptr, I8Ptr, NULL);
CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
@@ -120,7 +120,7 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
AttributeWithIndex AWI[2];
AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
- const Type *I8Ptr = B.getInt8PtrTy();
+ Type *I8Ptr = B.getInt8PtrTy();
Value *StrNCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI, 2),
I8Ptr, I8Ptr, I8Ptr,
Len->getType(), NULL);
@@ -361,7 +361,7 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
this->CI = CI;
Function *Callee = CI->getCalledFunction();
StringRef Name = Callee->getName();
- const FunctionType *FT = Callee->getFunctionType();
+ FunctionType *FT = Callee->getFunctionType();
LLVMContext &Context = CI->getParent()->getContext();
IRBuilder<> B(CI);
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 204c2c6..7adc5f1 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -21,9 +21,17 @@ add_llvm_library(LLVMTransformUtils
PromoteMemoryToRegister.cpp
SSAUpdater.cpp
SimplifyCFG.cpp
+ SimplifyIndVar.cpp
SimplifyInstructions.cpp
UnifyFunctionExitNodes.cpp
Utils.cpp
ValueMapper.cpp
)
+add_llvm_library_dependencies(LLVMTransformUtils
+ LLVMAnalysis
+ LLVMCore
+ LLVMSupport
+ LLVMTarget
+ LLVMipa
+ )
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 6ea831f..cf21f1e 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -331,15 +331,10 @@ ConstantFoldMappedInstruction(const Instruction *I) {
TD);
if (const LoadInst *LI = dyn_cast<LoadInst>(I))
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
- if (!LI->isVolatile() && CE->getOpcode() == Instruction::GetElementPtr)
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0)))
- if (GV->isConstant() && GV->hasDefinitiveInitializer())
- return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(),
- CE);
-
- return ConstantFoldInstOperands(I->getOpcode(), I->getType(), &Ops[0],
- Ops.size(), TD);
+ if (!LI->isVolatile())
+ return ConstantFoldLoadFromConstPtr(Ops[0], TD);
+
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD);
}
/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index a08fa35..a0e027b 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -50,10 +50,12 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
I != E; ++I) {
GlobalVariable *GV = new GlobalVariable(*New,
I->getType()->getElementType(),
- false,
- GlobalValue::ExternalLinkage, 0,
- I->getName());
- GV->setAlignment(I->getAlignment());
+ I->isConstant(), I->getLinkage(),
+ (Constant*) 0, I->getName(),
+ (GlobalVariable*) 0,
+ I->isThreadLocal(),
+ I->getType()->getAddressSpace());
+ GV->copyAttributesFrom(I);
VMap[I] = GV;
}
@@ -61,16 +63,19 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
Function *NF =
Function::Create(cast<FunctionType>(I->getType()->getElementType()),
- GlobalValue::ExternalLinkage, I->getName(), New);
+ I->getLinkage(), I->getName(), New);
NF->copyAttributesFrom(I);
VMap[I] = NF;
}
// Loop over the aliases in the module
for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
- I != E; ++I)
- VMap[I] = new GlobalAlias(I->getType(), GlobalAlias::ExternalLinkage,
- I->getName(), NULL, New);
+ I != E; ++I) {
+ GlobalAlias *GA = new GlobalAlias(I->getType(), I->getLinkage(),
+ I->getName(), NULL, New);
+ GA->copyAttributesFrom(I);
+ VMap[I] = GA;
+ }
// Now that all of the things that global variable initializer can refer to
// have been created, loop through and copy the global variable referrers
@@ -81,9 +86,6 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
GlobalVariable *GV = cast<GlobalVariable>(VMap[I]);
if (I->hasInitializer())
GV->setInitializer(MapValue(I->getInitializer(), VMap));
- GV->setLinkage(I->getLinkage());
- GV->setThreadLocal(I->isThreadLocal());
- GV->setConstant(I->isConstant());
}
// Similarly, copy over function bodies now...
@@ -101,15 +103,12 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
CloneFunctionInto(F, I, VMap, /*ModuleLevelChanges=*/true, Returns);
}
-
- F->setLinkage(I->getLinkage());
}
// And aliases
for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
I != E; ++I) {
GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
- GA->setLinkage(I->getLinkage());
if (const Constant *C = I->getAliasee())
GA->setAliasee(MapValue(C, VMap));
}
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 0813523..5f47ebb 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -50,14 +50,14 @@ namespace {
DominatorTree* DT;
bool AggregateArgs;
unsigned NumExitBlocks;
- const Type *RetTy;
+ Type *RetTy;
public:
CodeExtractor(DominatorTree* dt = 0, bool AggArgs = false)
: DT(dt), AggregateArgs(AggArgs||AggregateArgsOpt), NumExitBlocks(~0U) {}
- Function *ExtractCodeRegion(const std::vector<BasicBlock*> &code);
+ Function *ExtractCodeRegion(ArrayRef<BasicBlock*> code);
- bool isEligible(const std::vector<BasicBlock*> &code);
+ bool isEligible(ArrayRef<BasicBlock*> code);
private:
/// definedInRegion - Return true if the specified value is defined in the
@@ -290,7 +290,7 @@ Function *CodeExtractor::constructFunction(const Values &inputs,
paramTy.clear();
paramTy.push_back(StructPtr);
}
- const FunctionType *funcType =
+ FunctionType *funcType =
FunctionType::get(RetTy, paramTy, false);
// Create the new function
@@ -317,8 +317,7 @@ Function *CodeExtractor::constructFunction(const Values &inputs,
Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i);
TerminatorInst *TI = newFunction->begin()->getTerminator();
GetElementPtrInst *GEP =
- GetElementPtrInst::Create(AI, Idx, Idx+2,
- "gep_" + inputs[i]->getName(), TI);
+ GetElementPtrInst::Create(AI, Idx, "gep_" + inputs[i]->getName(), TI);
RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI);
} else
RewriteVal = AI++;
@@ -420,7 +419,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i);
GetElementPtrInst *GEP =
- GetElementPtrInst::Create(Struct, Idx, Idx + 2,
+ GetElementPtrInst::Create(Struct, Idx,
"gep_" + StructValues[i]->getName());
codeReplacer->getInstList().push_back(GEP);
StoreInst *SI = new StoreInst(StructValues[i], GEP);
@@ -446,7 +445,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
GetElementPtrInst *GEP
- = GetElementPtrInst::Create(Struct, Idx, Idx + 2,
+ = GetElementPtrInst::Create(Struct, Idx,
"gep_reload_" + outputs[i]->getName());
codeReplacer->getInstList().push_back(GEP);
Output = GEP;
@@ -561,7 +560,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
Idx[1] = ConstantInt::get(Type::getInt32Ty(Context),
FirstOut+out);
GetElementPtrInst *GEP =
- GetElementPtrInst::Create(OAI, Idx, Idx + 2,
+ GetElementPtrInst::Create(OAI, Idx,
"gep_" + outputs[out]->getName(),
NTRet);
new StoreInst(outputs[out], GEP, NTRet);
@@ -580,7 +579,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
}
// Now that we've done the deed, simplify the switch instruction.
- const Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType();
+ Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType();
switch (NumExitBlocks) {
case 0:
// There are no successors (the block containing the switch itself), which
@@ -655,7 +654,7 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) {
/// computed result back into memory.
///
Function *CodeExtractor::
-ExtractCodeRegion(const std::vector<BasicBlock*> &code) {
+ExtractCodeRegion(ArrayRef<BasicBlock*> code) {
if (!isEligible(code))
return 0;
@@ -755,9 +754,13 @@ ExtractCodeRegion(const std::vector<BasicBlock*> &code) {
return newFunction;
}
-bool CodeExtractor::isEligible(const std::vector<BasicBlock*> &code) {
+bool CodeExtractor::isEligible(ArrayRef<BasicBlock*> code) {
+ // Deny a single basic block that's a landing pad block.
+ if (code.size() == 1 && code[0]->isLandingPad())
+ return false;
+
// Deny code region if it contains allocas or vastarts.
- for (std::vector<BasicBlock*>::const_iterator BB = code.begin(), e=code.end();
+ for (ArrayRef<BasicBlock*>::iterator BB = code.begin(), e=code.end();
BB != e; ++BB)
for (BasicBlock::const_iterator I = (*BB)->begin(), Ie = (*BB)->end();
I != Ie; ++I)
@@ -771,25 +774,23 @@ bool CodeExtractor::isEligible(const std::vector<BasicBlock*> &code) {
}
-/// ExtractCodeRegion - slurp a sequence of basic blocks into a brand new
-/// function
+/// ExtractCodeRegion - Slurp a sequence of basic blocks into a brand new
+/// function.
///
Function* llvm::ExtractCodeRegion(DominatorTree &DT,
- const std::vector<BasicBlock*> &code,
+ ArrayRef<BasicBlock*> code,
bool AggregateArgs) {
return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(code);
}
-/// ExtractBasicBlock - slurp a natural loop into a brand new function
+/// ExtractLoop - Slurp a natural loop into a brand new function.
///
Function* llvm::ExtractLoop(DominatorTree &DT, Loop *L, bool AggregateArgs) {
return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(L->getBlocks());
}
-/// ExtractBasicBlock - slurp a basic block into a brand new function
+/// ExtractBasicBlock - Slurp a basic block into a brand new function.
///
-Function* llvm::ExtractBasicBlock(BasicBlock *BB, bool AggregateArgs) {
- std::vector<BasicBlock*> Blocks;
- Blocks.push_back(BB);
- return CodeExtractor(0, AggregateArgs).ExtractCodeRegion(Blocks);
+Function* llvm::ExtractBasicBlock(ArrayRef<BasicBlock*> BBs, bool AggregateArgs){
+ return CodeExtractor(0, AggregateArgs).ExtractCodeRegion(BBs);
}
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index d5b382e..5464dbc 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -45,6 +45,9 @@ bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI) {
return InlineFunction(CallSite(II), IFI);
}
+// FIXME: New EH - Remove the functions marked [LIBUNWIND] when new EH is
+// turned on.
+
/// [LIBUNWIND] Look for an llvm.eh.exception call in the given block.
static EHExceptionInst *findExceptionInBlock(BasicBlock *bb) {
for (BasicBlock::iterator i = bb->begin(), e = bb->end(); i != e; i++) {
@@ -250,20 +253,32 @@ namespace {
PHINode *InnerSelectorPHI;
SmallVector<Value*, 8> UnwindDestPHIValues;
+ // FIXME: New EH - These will replace the analogous ones above.
+ BasicBlock *OuterResumeDest; //< Destination of the invoke's unwind.
+ BasicBlock *InnerResumeDest; //< Destination for the callee's resume.
+ LandingPadInst *CallerLPad; //< LandingPadInst associated with the invoke.
+ PHINode *InnerEHValuesPHI; //< PHI for EH values from landingpad insts.
+
public:
- InvokeInliningInfo(InvokeInst *II) :
- OuterUnwindDest(II->getUnwindDest()), OuterSelector(0),
- InnerUnwindDest(0), InnerExceptionPHI(0), InnerSelectorPHI(0) {
-
- // If there are PHI nodes in the unwind destination block, we
- // need to keep track of which values came into them from the
- // invoke before removing the edge from this block.
- llvm::BasicBlock *invokeBB = II->getParent();
- for (BasicBlock::iterator I = OuterUnwindDest->begin();
- isa<PHINode>(I); ++I) {
+ InvokeInliningInfo(InvokeInst *II)
+ : OuterUnwindDest(II->getUnwindDest()), OuterSelector(0),
+ InnerUnwindDest(0), InnerExceptionPHI(0), InnerSelectorPHI(0),
+ OuterResumeDest(II->getUnwindDest()), InnerResumeDest(0),
+ CallerLPad(0), InnerEHValuesPHI(0) {
+ // If there are PHI nodes in the unwind destination block, we need to keep
+ // track of which values came into them from the invoke before removing
+ // the edge from this block.
+ llvm::BasicBlock *InvokeBB = II->getParent();
+ BasicBlock::iterator I = OuterUnwindDest->begin();
+ for (; isa<PHINode>(I); ++I) {
// Save the value to use for this edge.
- PHINode *phi = cast<PHINode>(I);
- UnwindDestPHIValues.push_back(phi->getIncomingValueForBlock(invokeBB));
+ PHINode *PHI = cast<PHINode>(I);
+ UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB));
+ }
+
+ // FIXME: With the new EH, this if/dyn_cast should be a 'cast'.
+ if (LandingPadInst *LPI = dyn_cast<LandingPadInst>(I)) {
+ CallerLPad = LPI;
}
}
@@ -281,11 +296,23 @@ namespace {
BasicBlock *getInnerUnwindDest();
+ // FIXME: New EH - Rename when new EH is turned on.
+ BasicBlock *getInnerUnwindDestNewEH();
+
+ LandingPadInst *getLandingPadInst() const { return CallerLPad; }
+
bool forwardEHResume(CallInst *call, BasicBlock *src);
- /// Add incoming-PHI values to the unwind destination block for
- /// the given basic block, using the values for the original
- /// invoke's source block.
+ /// forwardResume - Forward the 'resume' instruction to the caller's landing
+ /// pad block. When the landing pad block has only one predecessor, this is
+ /// a simple branch. When there is more than one predecessor, we need to
+ /// split the landing pad block after the landingpad instruction and jump
+ /// to there.
+ void forwardResume(ResumeInst *RI);
+
+ /// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind
+ /// destination block for the given basic block, using the values for the
+ /// original invoke's source block.
void addIncomingPHIValuesFor(BasicBlock *BB) const {
addIncomingPHIValuesForInto(BB, OuterUnwindDest);
}
@@ -300,7 +327,7 @@ namespace {
};
}
-/// Get or create a target for the branch out of rewritten calls to
+/// [LIBUNWIND] Get or create a target for the branch out of rewritten calls to
/// llvm.eh.resume.
BasicBlock *InvokeInliningInfo::getInnerUnwindDest() {
if (InnerUnwindDest) return InnerUnwindDest;
@@ -404,6 +431,60 @@ bool InvokeInliningInfo::forwardEHResume(CallInst *call, BasicBlock *src) {
return true;
}
+/// Get or create a target for the branch from ResumeInsts.
+BasicBlock *InvokeInliningInfo::getInnerUnwindDestNewEH() {
+ // FIXME: New EH - rename this function when new EH is turned on.
+ if (InnerResumeDest) return InnerResumeDest;
+
+ // Split the landing pad.
+ BasicBlock::iterator SplitPoint = CallerLPad; ++SplitPoint;
+ InnerResumeDest =
+ OuterResumeDest->splitBasicBlock(SplitPoint,
+ OuterResumeDest->getName() + ".body");
+
+ // The number of incoming edges we expect to the inner landing pad.
+ const unsigned PHICapacity = 2;
+
+ // Create corresponding new PHIs for all the PHIs in the outer landing pad.
+ BasicBlock::iterator InsertPoint = InnerResumeDest->begin();
+ BasicBlock::iterator I = OuterResumeDest->begin();
+ for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
+ PHINode *OuterPHI = cast<PHINode>(I);
+ PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity,
+ OuterPHI->getName() + ".lpad-body",
+ InsertPoint);
+ OuterPHI->replaceAllUsesWith(InnerPHI);
+ InnerPHI->addIncoming(OuterPHI, OuterResumeDest);
+ }
+
+ // Create a PHI for the exception values.
+ InnerEHValuesPHI = PHINode::Create(CallerLPad->getType(), PHICapacity,
+ "eh.lpad-body", InsertPoint);
+ CallerLPad->replaceAllUsesWith(InnerEHValuesPHI);
+ InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest);
+
+ // All done.
+ return InnerResumeDest;
+}
+
+/// forwardResume - Forward the 'resume' instruction to the caller's landing pad
+/// block. When the landing pad block has only one predecessor, this is a simple
+/// branch. When there is more than one predecessor, we need to split the
+/// landing pad block after the landingpad instruction and jump to there.
+void InvokeInliningInfo::forwardResume(ResumeInst *RI) {
+ BasicBlock *Dest = getInnerUnwindDestNewEH();
+ BasicBlock *Src = RI->getParent();
+
+ BranchInst::Create(Dest, Src);
+
+ // Update the PHIs in the destination. They were inserted in an order which
+ // makes this work.
+ addIncomingPHIValuesForInto(Src, Dest);
+
+ InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src);
+ RI->eraseFromParent();
+}
+
/// [LIBUNWIND] Check whether this selector is "only cleanups":
/// call i32 @llvm.eh.selector(blah, blah, i32 0)
static bool isCleanupOnlySelector(EHSelectorInst *selector) {
@@ -421,9 +502,19 @@ static bool isCleanupOnlySelector(EHSelectorInst *selector) {
/// Returns true to indicate that the next block should be skipped.
static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
InvokeInliningInfo &Invoke) {
+ LandingPadInst *LPI = Invoke.getLandingPadInst();
+
for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
Instruction *I = BBI++;
-
+
+ if (LPI) // FIXME: New EH - This won't be NULL in the new EH.
+ if (LandingPadInst *L = dyn_cast<LandingPadInst>(I)) {
+ unsigned NumClauses = LPI->getNumClauses();
+ L->reserveClauses(NumClauses);
+ for (unsigned i = 0; i != NumClauses; ++i)
+ L->addClause(LPI->getClause(i));
+ }
+
// We only need to check for function calls: inlined invoke
// instructions require no special handling.
CallInst *CI = dyn_cast<CallInst>(I);
@@ -557,6 +648,10 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
// there is now a new entry in them.
Invoke.addIncomingPHIValuesFor(BB);
}
+
+ if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) {
+ Invoke.forwardResume(RI);
+ }
}
// Now that everything is happy, we have one final detail. The PHI nodes in
@@ -636,7 +731,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
const Function *CalledFunc,
InlineFunctionInfo &IFI,
unsigned ByValAlignment) {
- const Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
+ Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
// If the called function is readonly, then it could not mutate the caller's
// copy of the byval'd memory. In this case, it is safe to elide the copy and
@@ -726,7 +821,7 @@ static bool isUsedByLifetimeMarker(Value *V) {
// hasLifetimeMarkers - Check whether the given alloca already has
// lifetime.start or lifetime.end intrinsics.
static bool hasLifetimeMarkers(AllocaInst *AI) {
- const Type *Int8PtrTy = Type::getInt8PtrTy(AI->getType()->getContext());
+ Type *Int8PtrTy = Type::getInt8PtrTy(AI->getType()->getContext());
if (AI->getType() == Int8PtrTy)
return isUsedByLifetimeMarker(AI);
@@ -770,8 +865,15 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
BI != BE; ++BI) {
DebugLoc DL = BI->getDebugLoc();
- if (!DL.isUnknown())
+ if (!DL.isUnknown()) {
BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext()));
+ if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) {
+ LLVMContext &Ctx = BI->getContext();
+ MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx);
+ DVI->setOperand(2, createInlinedVariable(DVI->getVariable(),
+ InlinedAt, Ctx));
+ }
+ }
}
}
}
@@ -822,6 +924,40 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
return false;
}
+ // Find the personality function used by the landing pads of the caller. If it
+ // exists, then check to see that it matches the personality function used in
+ // the callee.
+ for (Function::const_iterator
+ I = Caller->begin(), E = Caller->end(); I != E; ++I)
+ if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
+ const BasicBlock *BB = II->getUnwindDest();
+ // FIXME: This 'isa' here should become go away once the new EH system is
+ // in place.
+ if (!isa<LandingPadInst>(BB->getFirstNonPHI()))
+ continue;
+ const LandingPadInst *LP = cast<LandingPadInst>(BB->getFirstNonPHI());
+ const Value *CallerPersFn = LP->getPersonalityFn();
+
+ // If the personality functions match, then we can perform the
+ // inlining. Otherwise, we can't inline.
+ // TODO: This isn't 100% true. Some personality functions are proper
+ // supersets of others and can be used in place of the other.
+ for (Function::const_iterator
+ I = CalledFunc->begin(), E = CalledFunc->end(); I != E; ++I)
+ if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
+ const BasicBlock *BB = II->getUnwindDest();
+ // FIXME: This 'if/dyn_cast' here should become a normal 'cast' once
+ // the new EH system is in place.
+ if (const LandingPadInst *LP =
+ dyn_cast<LandingPadInst>(BB->getFirstNonPHI()))
+ if (CallerPersFn != LP->getPersonalityFn())
+ return false;
+ break;
+ }
+
+ break;
+ }
+
// Get an iterator to the last basic block in the function, which will have
// the new function inlined after it.
//
@@ -1090,7 +1226,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
// Handle all of the return instructions that we just cloned in, and eliminate
// any users of the original call/invoke instruction.
- const Type *RTy = CalledFunc->getReturnType();
+ Type *RTy = CalledFunc->getReturnType();
PHINode *PHI = 0;
if (Returns.size() > 1) {
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 0f6d9ae..7034feb 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -27,7 +27,6 @@
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Analysis/DIBuilder.h"
#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -227,13 +226,17 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
bool llvm::isInstructionTriviallyDead(Instruction *I) {
if (!I->use_empty() || isa<TerminatorInst>(I)) return false;
+ // We don't want the landingpad instruction removed by anything this general.
+ if (isa<LandingPadInst>(I))
+ return false;
+
// We don't want debug info removed by anything this general, unless
// debug info is empty.
if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) {
- if (DDI->getAddress())
+ if (DDI->getAddress())
return false;
return true;
- }
+ }
if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) {
if (DVI->getValue())
return false;
@@ -244,10 +247,16 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) {
// Special case intrinsics that "may have side effects" but can be deleted
// when dead.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
// Safe to delete llvm.stacksave if dead.
if (II->getIntrinsicID() == Intrinsic::stacksave)
return true;
+
+ // Lifetime intrinsics are dead when their right-hand is undef.
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end)
+ return isa<UndefValue>(II->getArgOperand(1));
+ }
return false;
}
@@ -712,10 +721,14 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
/// their preferred alignment from the beginning.
///
static unsigned enforceKnownAlignment(Value *V, unsigned Align,
- unsigned PrefAlign) {
+ unsigned PrefAlign, const TargetData *TD) {
V = V->stripPointerCasts();
if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ // If the preferred alignment is greater than the natural stack alignment
+ // then don't round up. This avoids dynamic stack realignment.
+ if (TD && TD->exceedsNaturalStackAlignment(PrefAlign))
+ return Align;
// If there is a requested alignment and if this is an alloca, round up.
if (AI->getAlignment() >= PrefAlign)
return AI->getAlignment();
@@ -766,7 +779,7 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
Align = std::min(Align, +Value::MaximumAlignment);
if (PrefAlign > Align)
- Align = enforceKnownAlignment(V, Align, PrefAlign);
+ Align = enforceKnownAlignment(V, Align, PrefAlign, TD);
// We don't need to make any adjustment.
return Align;
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index e79fb5a..cbd54a8 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -213,7 +213,7 @@ ReprocessLoop:
// predecessors from outside of the loop, split the edge now.
SmallVector<BasicBlock*, 8> ExitBlocks;
L->getExitBlocks(ExitBlocks);
-
+
SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
ExitBlocks.end());
for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(),
@@ -325,6 +325,14 @@ ReprocessLoop:
DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
<< ExitingBlock->getName() << "\n");
+ // If any reachable control flow within this loop has changed, notify
+ // ScalarEvolution. Currently assume the parent loop doesn't change
+ // (spliting edges doesn't count). If blocks, CFG edges, or other values
+ // in the parent loop change, then we need call to forgetLoop() for the
+ // parent instead.
+ if (SE)
+ SE->forgetLoop(L);
+
assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
Changed = true;
LI->removeBlock(ExitingBlock);
@@ -402,13 +410,24 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
}
assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?");
- BasicBlock *NewBB = SplitBlockPredecessors(Exit, &LoopBlocks[0],
- LoopBlocks.size(), ".loopexit",
- this);
+ BasicBlock *NewExitBB = 0;
+
+ if (Exit->isLandingPad()) {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(Exit, ArrayRef<BasicBlock*>(&LoopBlocks[0],
+ LoopBlocks.size()),
+ ".loopexit", ".nonloopexit",
+ this, NewBBs);
+ NewExitBB = NewBBs[0];
+ } else {
+ NewExitBB = SplitBlockPredecessors(Exit, &LoopBlocks[0],
+ LoopBlocks.size(), ".loopexit",
+ this);
+ }
DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
- << NewBB->getName() << "\n");
- return NewBB;
+ << NewExitBB->getName() << "\n");
+ return NewExitBB;
}
/// AddBlockAndPredsToSet - Add the specified block, and all of its
@@ -467,23 +486,23 @@ void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB,
if (&*BBI == SplitPreds[i])
return;
}
-
+
// If it isn't already after an outside block, move it after one. This is
// always good as it makes the uncond branch from the outside block into a
// fall-through.
-
+
// Figure out *which* outside block to put this after. Prefer an outside
// block that neighbors a BB actually in the loop.
BasicBlock *FoundBB = 0;
for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
Function::iterator BBI = SplitPreds[i];
- if (++BBI != NewBB->getParent()->end() &&
+ if (++BBI != NewBB->getParent()->end() &&
L->contains(BBI)) {
FoundBB = SplitPreds[i];
break;
}
}
-
+
// If our heuristic for a *good* bb to place this after doesn't find
// anything, just pick something. It's likely better than leaving it within
// the loop.
@@ -544,7 +563,7 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
// Make sure that NewBB is put someplace intelligent, which doesn't mess up
// code layout too horribly.
PlaceSplitBlockCarefully(NewBB, OuterLoopPreds, L);
-
+
// Create the new outer loop.
Loop *NewOuter = new Loop();
@@ -735,6 +754,7 @@ void LoopSimplify::verifyAnalysis() const {
}
assert(HasIndBrPred &&
"LoopSimplify has no excuse for missing loop header info!");
+ (void)HasIndBrPred;
}
// Indirectbr can interfere with exit block canonicalization.
@@ -742,12 +762,15 @@ void LoopSimplify::verifyAnalysis() const {
bool HasIndBrExiting = false;
SmallVector<BasicBlock*, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
- for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i)
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) {
HasIndBrExiting = true;
break;
}
+ }
+
assert(HasIndBrExiting &&
"LoopSimplify has no excuse for missing exit block info!");
+ (void)HasIndBrExiting;
}
}
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 6772511..62e4fa2 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -11,9 +11,6 @@
// actual pass or policy, but provides a single function to perform loop
// unrolling.
//
-// It works best when loops have been canonicalized by the -indvars pass,
-// allowing it to determine the trip counts of loops easily.
-//
// The process of unrolling can produce extraneous basic blocks linked with
// unconditional branches. This will be corrected in the future.
//
@@ -24,6 +21,7 @@
#include "llvm/BasicBlock.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Support/Debug.h"
@@ -31,6 +29,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SimplifyIndVar.h"
using namespace llvm;
// TODO: Should these be here or in LoopUnroll?
@@ -61,7 +60,8 @@ static inline void RemapInstruction(Instruction *I,
/// only has one predecessor, and that predecessor only has one successor.
/// The LoopInfo Analysis that is passed will be kept consistent.
/// Returns the new combined block.
-static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {
+static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
+ LPPassManager *LPM) {
// Merge basic blocks into their predecessor if there is only one distinct
// pred, and if there is only one distinct successor of the predecessor, and
// if there are no PHI nodes.
@@ -93,6 +93,12 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {
std::string OldName = BB->getName();
// Erase basic block from the function...
+
+ // ScalarEvolution holds references to loop exit blocks.
+ if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>()) {
+ if (Loop *L = LI->getLoopFor(BB))
+ SE->forgetLoop(L);
+ }
LI->removeBlock(BB);
BB->eraseFromParent();
@@ -109,12 +115,27 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {
/// branch instruction. However, if the trip count (and multiple) are not known,
/// loop unrolling will mostly produce more code that is no faster.
///
+/// TripCount is generally defined as the number of times the loop header
+/// executes. UnrollLoop relaxes the definition to permit early exits: here
+/// TripCount is the iteration on which control exits LatchBlock if no early
+/// exits were taken. Note that UnrollLoop assumes that the loop counter test
+/// terminates LatchBlock in order to remove unnecesssary instances of the
+/// test. In other words, control may exit the loop prior to TripCount
+/// iterations via an early branch, but control may not exit the loop from the
+/// LatchBlock's terminator prior to TripCount iterations.
+///
+/// Similarly, TripMultiple divides the number of times that the LatchBlock may
+/// execute without exiting the loop.
+///
/// The LoopInfo Analysis that is passed will be kept consistent.
///
/// If a LoopPassManager is passed in, and the loop is fully removed, it will be
/// removed from the LoopPassManager as well. LPM can also be NULL.
-bool llvm::UnrollLoop(Loop *L, unsigned Count,
- LoopInfo *LI, LPPassManager *LPM) {
+///
+/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
+/// available it must also preserve those analyses.
+bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
+ unsigned TripMultiple, LoopInfo *LI, LPPassManager *LPM) {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
@@ -129,14 +150,14 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,
BasicBlock *Header = L->getHeader();
BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
-
+
if (!BI || BI->isUnconditional()) {
// The loop-rotate pass can be helpful to avoid this in many cases.
DEBUG(dbgs() <<
" Can't unroll; loop not terminated by a conditional branch.\n");
return false;
}
-
+
if (Header->hasAddressTaken()) {
// The loop-rotate pass can be helpful to avoid this in many cases.
DEBUG(dbgs() <<
@@ -146,16 +167,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,
// Notify ScalarEvolution that the loop will be substantially changed,
// if not outright eliminated.
- if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>())
+ ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
+ if (SE)
SE->forgetLoop(L);
- // Find trip count
- unsigned TripCount = L->getSmallConstantTripCount();
- // Find trip multiple if count is not available
- unsigned TripMultiple = 1;
- if (TripCount == 0)
- TripMultiple = L->getSmallConstantTripMultiple();
-
if (TripCount != 0)
DEBUG(dbgs() << " Trip Count = " << TripCount << "\n");
if (TripMultiple != 1)
@@ -208,12 +223,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,
ValueToValueMapTy LastValueMap;
std::vector<PHINode*> OrigPHINode;
for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
- PHINode *PN = cast<PHINode>(I);
- OrigPHINode.push_back(PN);
- if (Instruction *I =
- dyn_cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)))
- if (L->contains(I))
- LastValueMap[I] = I;
+ OrigPHINode.push_back(cast<PHINode>(I));
}
std::vector<BasicBlock*> Headers;
@@ -221,11 +231,20 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,
Headers.push_back(Header);
Latches.push_back(LatchBlock);
+ // The current on-the-fly SSA update requires blocks to be processed in
+ // reverse postorder so that LastValueMap contains the correct value at each
+ // exit.
+ LoopBlocksDFS DFS(L);
+ DFS.perform(LI);
+
+ // Stash the DFS iterators before adding blocks to the loop.
+ LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
+ LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();
+
for (unsigned It = 1; It != Count; ++It) {
std::vector<BasicBlock*> NewBlocks;
-
- for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(),
- E = LoopBlocks.end(); BB != E; ++BB) {
+
+ for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
ValueToValueMapTy VMap;
BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
Header->getParent()->getBasicBlockList().push_back(New);
@@ -251,75 +270,55 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,
L->addBasicBlockToLoop(New, LI->getBase());
- // Add phi entries for newly created values to all exit blocks except
- // the successor of the latch block. The successor of the exit block will
- // be updated specially after unrolling all the way.
- if (*BB != LatchBlock)
- for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); SI != SE;
- ++SI)
- if (!L->contains(*SI))
- for (BasicBlock::iterator BBI = (*SI)->begin();
- PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) {
- Value *Incoming = phi->getIncomingValueForBlock(*BB);
- phi->addIncoming(Incoming, New);
- }
-
+ // Add phi entries for newly created values to all exit blocks.
+ for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB);
+ SI != SE; ++SI) {
+ if (L->contains(*SI))
+ continue;
+ for (BasicBlock::iterator BBI = (*SI)->begin();
+ PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) {
+ Value *Incoming = phi->getIncomingValueForBlock(*BB);
+ ValueToValueMapTy::iterator It = LastValueMap.find(Incoming);
+ if (It != LastValueMap.end())
+ Incoming = It->second;
+ phi->addIncoming(Incoming, New);
+ }
+ }
// Keep track of new headers and latches as we create them, so that
// we can insert the proper branches later.
if (*BB == Header)
Headers.push_back(New);
- if (*BB == LatchBlock) {
+ if (*BB == LatchBlock)
Latches.push_back(New);
- // Also, clear out the new latch's back edge so that it doesn't look
- // like a new loop, so that it's amenable to being merged with adjacent
- // blocks later on.
- TerminatorInst *Term = New->getTerminator();
- assert(L->contains(Term->getSuccessor(!ContinueOnTrue)));
- assert(Term->getSuccessor(ContinueOnTrue) == LoopExit);
- Term->setSuccessor(!ContinueOnTrue, NULL);
- }
-
NewBlocks.push_back(New);
}
-
+
// Remap all instructions in the most recent iteration
for (unsigned i = 0; i < NewBlocks.size(); ++i)
for (BasicBlock::iterator I = NewBlocks[i]->begin(),
E = NewBlocks[i]->end(); I != E; ++I)
::RemapInstruction(I, LastValueMap);
}
-
- // The latch block exits the loop. If there are any PHI nodes in the
- // successor blocks, update them to use the appropriate values computed as the
- // last iteration of the loop.
- if (Count != 1) {
- BasicBlock *LastIterationBB = cast<BasicBlock>(LastValueMap[LatchBlock]);
- for (succ_iterator SI = succ_begin(LatchBlock), SE = succ_end(LatchBlock);
- SI != SE; ++SI) {
- for (BasicBlock::iterator BBI = (*SI)->begin();
- PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) {
- Value *InVal = PN->removeIncomingValue(LatchBlock, false);
- // If this value was defined in the loop, take the value defined by the
- // last iteration of the loop.
- if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
- if (L->contains(InValI))
- InVal = LastValueMap[InVal];
- }
- PN->addIncoming(InVal, LastIterationBB);
- }
- }
- }
- // Now, if we're doing complete unrolling, loop over the PHI nodes in the
- // original block, setting them to their incoming values.
- if (CompletelyUnroll) {
- BasicBlock *Preheader = L->getLoopPreheader();
- for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
- PHINode *PN = OrigPHINode[i];
+ // Loop over the PHI nodes in the original block, setting incoming values.
+ for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
+ PHINode *PN = OrigPHINode[i];
+ if (CompletelyUnroll) {
PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
Header->getInstList().erase(PN);
}
+ else if (Count > 1) {
+ Value *InVal = PN->removeIncomingValue(LatchBlock, false);
+ // If this value was defined in the loop, take the value defined by the
+ // last iteration of the loop.
+ if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
+ if (L->contains(InValI))
+ InVal = LastValueMap[InVal];
+ }
+ assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch");
+ PN->addIncoming(InVal, Latches.back());
+ }
}
// Now that all the basic blocks for the unrolled iterations are in place,
@@ -351,6 +350,19 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,
// iteration.
Term->setSuccessor(!ContinueOnTrue, Dest);
} else {
+ // Remove phi operands at this loop exit
+ if (Dest != LoopExit) {
+ BasicBlock *BB = Latches[i];
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
+ SI != SE; ++SI) {
+ if (*SI == Headers[i])
+ continue;
+ for (BasicBlock::iterator BBI = (*SI)->begin();
+ PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) {
+ Phi->removeIncomingValue(BB, false);
+ }
+ }
+ }
// Replace the conditional branch with an unconditional one.
BranchInst::Create(Dest, Term);
Term->eraseFromParent();
@@ -362,11 +374,29 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,
BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
if (Term->isUnconditional()) {
BasicBlock *Dest = Term->getSuccessor(0);
- if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI))
+ if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM))
std::replace(Latches.begin(), Latches.end(), Dest, Fold);
}
}
-
+
+ // FIXME: Reconstruct dom info, because it is not preserved properly.
+ // Incrementally updating domtree after loop unrolling would be easy.
+ if (DominatorTree *DT = LPM->getAnalysisIfAvailable<DominatorTree>())
+ DT->runOnFunction(*L->getHeader()->getParent());
+
+ // Simplify any new induction variables in the partially unrolled loop.
+ if (SE && !CompletelyUnroll) {
+ SmallVector<WeakVH, 16> DeadInsts;
+ simplifyLoopIVs(L, SE, LPM, DeadInsts);
+
+ // Aggressively clean up dead instructions that simplifyLoopIVs already
+ // identified. Any remaining should be cleaned up below.
+ while (!DeadInsts.empty())
+ if (Instruction *Inst =
+ dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
+ RecursivelyDeleteTriviallyDeadInstructions(Inst);
+ }
+
// At this point, the code is well formed. We now do a quick sweep over the
// inserted code, doing constant propagation and dead code elimination as we
// go.
diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
index c1213fa..61ab3f6 100644
--- a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
@@ -58,7 +58,7 @@ bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) {
return false;
LLVMContext &Context = CI->getContext();
- const Type *Int32Ty = Type::getInt32Ty(Context);
+ Type *Int32Ty = Type::getInt32Ty(Context);
unsigned caseNo = SI->findCaseValue(ExpectedValue);
std::vector<Value *> Vec;
@@ -105,7 +105,7 @@ bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) {
return false;
LLVMContext &Context = CI->getContext();
- const Type *Int32Ty = Type::getInt32Ty(Context);
+ Type *Int32Ty = Type::getInt32Ty(Context);
bool Likely = ExpectedValue->isOne();
// If expect value is equal to 1 it means that we are more likely to take
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index f77d19d..c96c8fc 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -120,18 +120,18 @@ FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI,
// doInitialization - Make sure that there is a prototype for abort in the
// current module.
bool LowerInvoke::doInitialization(Module &M) {
- const Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
+ Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
if (useExpensiveEHSupport) {
// Insert a type for the linked list of jump buffers.
unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0;
JBSize = JBSize ? JBSize : 200;
Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize);
- JBLinkTy = StructType::createNamed(M.getContext(), "llvm.sjljeh.jmpbufty");
+ JBLinkTy = StructType::create(M.getContext(), "llvm.sjljeh.jmpbufty");
Type *Elts[] = { JmpBufTy, PointerType::getUnqual(JBLinkTy) };
JBLinkTy->setBody(Elts);
- const Type *PtrJBList = PointerType::getUnqual(JBLinkTy);
+ Type *PtrJBList = PointerType::getUnqual(JBLinkTy);
// Now that we've done that, insert the jmpbuf list head global, unless it
// already exists.
@@ -240,14 +240,14 @@ void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
CallInst* StackSaveRet = CallInst::Create(StackSaveFn, "ssret", II);
new StoreInst(StackSaveRet, StackPtr, true, II); // volatile
- BasicBlock::iterator NI = II->getNormalDest()->getFirstNonPHI();
+ BasicBlock::iterator NI = II->getNormalDest()->getFirstInsertionPt();
// nonvolatile.
new StoreInst(Constant::getNullValue(Type::getInt32Ty(II->getContext())),
InvokeNum, false, NI);
- Instruction* StackPtrLoad = new LoadInst(StackPtr, "stackptr.restore", true,
- II->getUnwindDest()->getFirstNonPHI()
- );
+ Instruction* StackPtrLoad =
+ new LoadInst(StackPtr, "stackptr.restore", true,
+ II->getUnwindDest()->getFirstInsertionPt());
CallInst::Create(StackRestoreFn, StackPtrLoad, "")->insertAfter(StackPtrLoad);
// Add a switch case to our unwind block.
@@ -305,7 +305,7 @@ splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) {
++AfterAllocaInsertPt;
for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
AI != E; ++AI) {
- const Type *Ty = AI->getType();
+ Type *Ty = AI->getType();
// Aggregate types can't be cast, but are legal argument types, so we have
// to handle them differently. We use an extract/insert pair as a
// lightweight method to achieve the same goal.
@@ -406,6 +406,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
SmallVector<ReturnInst*,16> Returns;
SmallVector<UnwindInst*,16> Unwinds;
SmallVector<InvokeInst*,16> Invokes;
+ UnreachableInst* UnreachablePlaceholder = 0;
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
@@ -455,8 +456,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())),
ConstantInt::get(Type::getInt32Ty(F.getContext()), 1) };
- OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, &Idx[0], &Idx[2],
- "OldBuf",
+ OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx, "OldBuf",
EntryBB->getTerminator());
// Copy the JBListHead to the alloca.
@@ -487,9 +487,10 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
// Insert a load in the Catch block, and a switch on its value. By default,
// we go to a block that just does an unwind (which is the correct action
- // for a standard call).
+ // for a standard call). We insert an unreachable instruction here and
+ // modify the block to jump to the correct unwinding pad later.
BasicBlock *UnwindBB = BasicBlock::Create(F.getContext(), "unwindbb", &F);
- Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBB));
+ UnreachablePlaceholder = new UnreachableInst(F.getContext(), UnwindBB);
Value *CatchLoad = new LoadInst(InvokeNum, "invoke.num", true, CatchBB);
SwitchInst *CatchSwitch =
@@ -502,8 +503,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
"setjmp.cont");
Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 0);
- Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, &Idx[0], &Idx[2],
- "TheJmpBuf",
+ Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx, "TheJmpBuf",
EntryBB->getTerminator());
JmpBufPtr = new BitCastInst(JmpBufPtr,
Type::getInt8PtrTy(F.getContext()),
@@ -557,8 +557,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
// Get a pointer to the jmpbuf and longjmp.
Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())),
ConstantInt::get(Type::getInt32Ty(F.getContext()), 0) };
- Idx[0] = GetElementPtrInst::Create(BufPtr, &Idx[0], &Idx[2], "JmpBuf",
- UnwindBlock);
+ Idx[0] = GetElementPtrInst::Create(BufPtr, Idx, "JmpBuf", UnwindBlock);
Idx[0] = new BitCastInst(Idx[0],
Type::getInt8PtrTy(F.getContext()),
"tmp", UnwindBlock);
@@ -580,6 +579,12 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
Unwinds[i]->eraseFromParent();
}
+ // Replace the inserted unreachable with a branch to the unwind handler.
+ if (UnreachablePlaceholder) {
+ BranchInst::Create(UnwindHandler, UnreachablePlaceholder);
+ UnreachablePlaceholder->eraseFromParent();
+ }
+
// Finally, for any returns from this function, if this function contains an
// invoke, restore the old jmpbuf pointer to its input value.
if (OldJmpBufPtr) {
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index ed733d3..686178c 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -277,11 +277,11 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
BasicBlock *CurBlock = SI->getParent();
BasicBlock *OrigBlock = CurBlock;
Function *F = CurBlock->getParent();
- Value *Val = SI->getOperand(0); // The value we are switching on...
+ Value *Val = SI->getCondition(); // The value we are switching on...
BasicBlock* Default = SI->getDefaultDest();
// If there is only the default destination, don't bother with the code below.
- if (SI->getNumOperands() == 2) {
+ if (SI->getNumCases() == 1) {
BranchInst::Create(SI->getDefaultDest(), CurBlock);
CurBlock->getInstList().erase(SI);
return;
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index e5a00f4..db3e942 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -86,11 +86,15 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
UI != UE; ++UI) { // Loop over all of the uses of the alloca
const User *U = *UI;
if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ // Note that atomic loads can be transformed; atomic semantics do
+ // not have any meaning for a local alloca.
if (LI->isVolatile())
return false;
} else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
if (SI->getOperand(0) == AI)
return false; // Don't allow a store OF the AI, only INTO the AI.
+ // Note that atomic stores can be transformed; atomic semantics do
+ // not have any meaning for a local alloca.
if (SI->isVolatile())
return false;
} else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index b47a7cc..fa8061c 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -16,6 +16,7 @@
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Support/AlignOf.h"
#include "llvm/Support/Allocator.h"
@@ -43,7 +44,7 @@ SSAUpdater::~SSAUpdater() {
/// Initialize - Reset this object to get ready for a new set of SSA
/// updates with type 'Ty'. PHI nodes get a name based on 'Name'.
-void SSAUpdater::Initialize(const Type *Ty, StringRef Name) {
+void SSAUpdater::Initialize(Type *Ty, StringRef Name) {
if (AV == 0)
AV = new AvailableValsTy();
else
@@ -378,8 +379,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
// First step: bucket up uses of the alloca by the block they occur in.
// This is important because we have to handle multiple defs/uses in a block
// ourselves: SSAUpdater is purely for cross-block references.
- // FIXME: Want a TinyVector<Instruction*> since there is often 0/1 element.
- DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock;
+ DenseMap<BasicBlock*, TinyPtrVector<Instruction*> > UsesByBlock;
for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
Instruction *User = Insts[i];
@@ -395,7 +395,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
Instruction *User = Insts[i];
BasicBlock *BB = User->getParent();
- std::vector<Instruction*> &BlockUses = UsesByBlock[BB];
+ TinyPtrVector<Instruction*> &BlockUses = UsesByBlock[BB];
// If this block has already been processed, ignore this repeat use.
if (BlockUses.empty()) continue;
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 9d9c324..b8c3ab4 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -63,6 +63,7 @@ class SimplifyCFGOpt {
bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
IRBuilder<> &Builder);
+ bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder);
bool SimplifyUnwind(UnwindInst *UI, IRBuilder<> &Builder);
bool SimplifyUnreachable(UnreachableInst *UI);
@@ -322,7 +323,7 @@ static ConstantInt *GetConstantInt(Value *V, const TargetData *TD) {
// This is some kind of pointer constant. Turn it into a pointer-sized
// ConstantInt if possible.
- const IntegerType *PtrTy = TD->getIntPtrType(V->getContext());
+ IntegerType *PtrTy = TD->getIntPtrType(V->getContext());
// Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
if (isa<ConstantPointerNull>(V))
@@ -2138,6 +2139,52 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
return true;
}
+bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
+ // If this is a trivial landing pad that just continues unwinding the caught
+ // exception then zap the landing pad, turning its invokes into calls.
+ BasicBlock *BB = RI->getParent();
+ LandingPadInst *LPInst = dyn_cast<LandingPadInst>(BB->getFirstNonPHI());
+ if (RI->getValue() != LPInst)
+ // Not a landing pad, or the resume is not unwinding the exception that
+ // caused control to branch here.
+ return false;
+
+ // Check that there are no other instructions except for debug intrinsics.
+ BasicBlock::iterator I = LPInst, E = RI;
+ while (++I != E)
+ if (!isa<DbgInfoIntrinsic>(I))
+ return false;
+
+ // Turn all invokes that unwind here into calls and delete the basic block.
+ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
+ InvokeInst *II = cast<InvokeInst>((*PI++)->getTerminator());
+ SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+ // Insert a call instruction before the invoke.
+ CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
+ Call->takeName(II);
+ Call->setCallingConv(II->getCallingConv());
+ Call->setAttributes(II->getAttributes());
+ Call->setDebugLoc(II->getDebugLoc());
+
+ // Anything that used the value produced by the invoke instruction now uses
+ // the value produced by the call instruction. Note that we do this even
+ // for void functions and calls with no uses so that the callgraph edge is
+ // updated.
+ II->replaceAllUsesWith(Call);
+ BB->removePredecessor(II->getParent());
+
+ // Insert a branch to the normal destination right before the invoke.
+ BranchInst::Create(II->getNormalDest(), II);
+
+ // Finally, delete the invoke instruction!
+ II->eraseFromParent();
+ }
+
+ // The landingpad is now unreachable. Zap it.
+ BB->eraseFromParent();
+ return true;
+}
+
bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
BasicBlock *BB = RI->getParent();
if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false;
@@ -2244,18 +2291,34 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
while (UI != BB->begin()) {
BasicBlock::iterator BBI = UI;
--BBI;
- // Do not delete instructions that can have side effects, like calls
- // (which may never return) and volatile loads and stores.
+ // Do not delete instructions that can have side effects which might cause
+ // the unreachable to not be reachable; specifically, calls and volatile
+ // operations may have this effect.
if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) break;
-
- if (StoreInst *SI = dyn_cast<StoreInst>(BBI))
- if (SI->isVolatile())
- break;
-
- if (LoadInst *LI = dyn_cast<LoadInst>(BBI))
- if (LI->isVolatile())
+
+ if (BBI->mayHaveSideEffects()) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+ if (SI->isVolatile())
+ break;
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+ if (LI->isVolatile())
+ break;
+ } else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(BBI)) {
+ if (RMWI->isVolatile())
+ break;
+ } else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(BBI)) {
+ if (CXI->isVolatile())
+ break;
+ } else if (!isa<FenceInst>(BBI) && !isa<VAArgInst>(BBI) &&
+ !isa<LandingPadInst>(BBI)) {
break;
-
+ }
+ // Note that deleting LandingPad's here is in fact okay, although it
+ // involves a bit of subtle reasoning. If this inst is a LandingPad,
+ // all the predecessors of this block will be the unwind edges of Invokes,
+ // and we can therefore guarantee this block will be erased.
+ }
+
// Delete this instruction (any uses are guaranteed to be dead)
if (!BBI->use_empty())
BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
@@ -2707,6 +2770,71 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
return false;
}
+/// Check if passing a value to an instruction will cause undefined behavior.
+static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
+ Constant *C = dyn_cast<Constant>(V);
+ if (!C)
+ return false;
+
+ if (!I->hasOneUse()) // Only look at single-use instructions, for compile time
+ return false;
+
+ if (C->isNullValue()) {
+ Instruction *Use = I->use_back();
+
+ // Now make sure that there are no instructions in between that can alter
+ // control flow (eg. calls)
+ for (BasicBlock::iterator i = ++BasicBlock::iterator(I); &*i != Use; ++i)
+ if (i == I->getParent()->end() || i->mayHaveSideEffects())
+ return false;
+
+ // Look through GEPs. A load from a GEP derived from NULL is still undefined
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use))
+ if (GEP->getPointerOperand() == I)
+ return passingValueIsAlwaysUndefined(V, GEP);
+
+ // Look through bitcasts.
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(Use))
+ return passingValueIsAlwaysUndefined(V, BC);
+
+ // Load from null is undefined.
+ if (LoadInst *LI = dyn_cast<LoadInst>(Use))
+ return LI->getPointerAddressSpace() == 0;
+
+ // Store to null is undefined.
+ if (StoreInst *SI = dyn_cast<StoreInst>(Use))
+ return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I;
+ }
+ return false;
+}
+
+/// If BB has an incoming value that will always trigger undefined behavior
+/// (eg. null pointer derefence), remove the branch leading here.
+static bool removeUndefIntroducingPredecessor(BasicBlock *BB) {
+ for (BasicBlock::iterator i = BB->begin();
+ PHINode *PHI = dyn_cast<PHINode>(i); ++i)
+ for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
+ if (passingValueIsAlwaysUndefined(PHI->getIncomingValue(i), PHI)) {
+ TerminatorInst *T = PHI->getIncomingBlock(i)->getTerminator();
+ IRBuilder<> Builder(T);
+ if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
+ BB->removePredecessor(PHI->getIncomingBlock(i));
+ // Turn uncoditional branches into unreachables and remove the dead
+ // destination from conditional branches.
+ if (BI->isUnconditional())
+ Builder.CreateUnreachable();
+ else
+ Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1) :
+ BI->getSuccessor(0));
+ BI->eraseFromParent();
+ return true;
+ }
+ // TODO: SwitchInst.
+ }
+
+ return false;
+}
+
bool SimplifyCFGOpt::run(BasicBlock *BB) {
bool Changed = false;
@@ -2730,6 +2858,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
// Check for and eliminate duplicate PHI nodes in this block.
Changed |= EliminateDuplicatePHINodes(BB);
+ // Check for and remove branches that will always cause undefined behavior.
+ Changed |= removeUndefIntroducingPredecessor(BB);
+
// Merge basic blocks into their predecessor if there is only one distinct
// pred, and if there is only one distinct successor of the predecessor, and
// if there are no PHI nodes.
@@ -2752,6 +2883,8 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
} else {
if (SimplifyCondBranch(BI, Builder)) return true;
}
+ } else if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) {
+ if (SimplifyResume(RI, Builder)) return true;
} else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
if (SimplifyReturn(RI, Builder)) return true;
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
new file mode 100644
index 0000000..76289c0
--- /dev/null
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -0,0 +1,432 @@
+//===-- SimplifyIndVar.cpp - Induction variable simplification ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements induction variable simplification. It does
+// not define any actual pass or policy, but provides a single function to
+// simplify a loop's induction variables based on ScalarEvolution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "indvars"
+
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/SimplifyIndVar.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
+STATISTIC(NumElimOperand, "Number of IV operands folded into a use");
+STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
+STATISTIC(NumElimCmp , "Number of IV comparisons eliminated");
+
+namespace {
+ /// SimplifyIndvar - This is a utility for simplifying induction variables
+ /// based on ScalarEvolution. It is the primary instrument of the
+ /// IndvarSimplify pass, but it may also be directly invoked to cleanup after
+ /// other loop passes that preserve SCEV.
+ class SimplifyIndvar {
+ Loop *L;
+ LoopInfo *LI;
+ DominatorTree *DT;
+ ScalarEvolution *SE;
+ IVUsers *IU; // NULL for DisableIVRewrite
+ const TargetData *TD; // May be NULL
+
+ SmallVectorImpl<WeakVH> &DeadInsts;
+
+ bool Changed;
+
+ public:
+ SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LPPassManager *LPM,
+ SmallVectorImpl<WeakVH> &Dead, IVUsers *IVU = NULL) :
+ L(Loop),
+ LI(LPM->getAnalysisIfAvailable<LoopInfo>()),
+ SE(SE),
+ IU(IVU),
+ TD(LPM->getAnalysisIfAvailable<TargetData>()),
+ DeadInsts(Dead),
+ Changed(false) {
+ assert(LI && "IV simplification requires LoopInfo");
+ }
+
+ bool hasChanged() const { return Changed; }
+
+ /// Iteratively perform simplification on a worklist of users of the
+ /// specified induction variable. This is the top-level driver that applies
+ /// all simplicitions to users of an IV.
+ void simplifyUsers(PHINode *CurrIV, IVVisitor *V = NULL);
+
+ Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand);
+
+ bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
+ void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
+ void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand,
+ bool IsSigned);
+ };
+}
+
+/// foldIVUser - Fold an IV operand into its use. This removes increments of an
+/// aligned IV when used by a instruction that ignores the low bits.
+///
+/// IVOperand is guaranteed SCEVable, but UseInst may not be.
+///
+/// Return the operand of IVOperand for this induction variable if IVOperand can
+/// be folded (in case more folding opportunities have been exposed).
+/// Otherwise return null.
+Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) {
+ Value *IVSrc = 0;
+ unsigned OperIdx = 0;
+ const SCEV *FoldedExpr = 0;
+ switch (UseInst->getOpcode()) {
+ default:
+ return 0;
+ case Instruction::UDiv:
+ case Instruction::LShr:
+ // We're only interested in the case where we know something about
+ // the numerator and have a constant denominator.
+ if (IVOperand != UseInst->getOperand(OperIdx) ||
+ !isa<ConstantInt>(UseInst->getOperand(1)))
+ return 0;
+
+ // Attempt to fold a binary operator with constant operand.
+ // e.g. ((I + 1) >> 2) => I >> 2
+ if (IVOperand->getNumOperands() != 2 ||
+ !isa<ConstantInt>(IVOperand->getOperand(1)))
+ return 0;
+
+ IVSrc = IVOperand->getOperand(0);
+ // IVSrc must be the (SCEVable) IV, since the other operand is const.
+ assert(SE->isSCEVable(IVSrc->getType()) && "Expect SCEVable IV operand");
+
+ ConstantInt *D = cast<ConstantInt>(UseInst->getOperand(1));
+ if (UseInst->getOpcode() == Instruction::LShr) {
+ // Get a constant for the divisor. See createSCEV.
+ uint32_t BitWidth = cast<IntegerType>(UseInst->getType())->getBitWidth();
+ if (D->getValue().uge(BitWidth))
+ return 0;
+
+ D = ConstantInt::get(UseInst->getContext(),
+ APInt(BitWidth, 1).shl(D->getZExtValue()));
+ }
+ FoldedExpr = SE->getUDivExpr(SE->getSCEV(IVSrc), SE->getSCEV(D));
+ }
+ // We have something that might fold it's operand. Compare SCEVs.
+ if (!SE->isSCEVable(UseInst->getType()))
+ return 0;
+
+ // Bypass the operand if SCEV can prove it has no effect.
+ if (SE->getSCEV(UseInst) != FoldedExpr)
+ return 0;
+
+ DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand
+ << " -> " << *UseInst << '\n');
+
+ UseInst->setOperand(OperIdx, IVSrc);
+ assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper");
+
+ ++NumElimOperand;
+ Changed = true;
+ if (IVOperand->use_empty())
+ DeadInsts.push_back(IVOperand);
+ return IVSrc;
+}
+
+/// eliminateIVComparison - SimplifyIVUsers helper for eliminating useless
+/// comparisons against an induction variable.
+void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
+ unsigned IVOperIdx = 0;
+ ICmpInst::Predicate Pred = ICmp->getPredicate();
+ if (IVOperand != ICmp->getOperand(0)) {
+ // Swapped
+ assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand");
+ IVOperIdx = 1;
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+
+ // Get the SCEVs for the ICmp operands.
+ const SCEV *S = SE->getSCEV(ICmp->getOperand(IVOperIdx));
+ const SCEV *X = SE->getSCEV(ICmp->getOperand(1 - IVOperIdx));
+
+ // Simplify unnecessary loops away.
+ const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
+ S = SE->getSCEVAtScope(S, ICmpLoop);
+ X = SE->getSCEVAtScope(X, ICmpLoop);
+
+ // If the condition is always true or always false, replace it with
+ // a constant value.
+ if (SE->isKnownPredicate(Pred, S, X))
+ ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext()));
+ else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X))
+ ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext()));
+ else
+ return;
+
+ DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
+ ++NumElimCmp;
+ Changed = true;
+ DeadInsts.push_back(ICmp);
+}
+
+/// eliminateIVRemainder - SimplifyIVUsers helper for eliminating useless
+/// remainder operations operating on an induction variable.
+void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem,
+ Value *IVOperand,
+ bool IsSigned) {
+ // We're only interested in the case where we know something about
+ // the numerator.
+ if (IVOperand != Rem->getOperand(0))
+ return;
+
+ // Get the SCEVs for the ICmp operands.
+ const SCEV *S = SE->getSCEV(Rem->getOperand(0));
+ const SCEV *X = SE->getSCEV(Rem->getOperand(1));
+
+ // Simplify unnecessary loops away.
+ const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent());
+ S = SE->getSCEVAtScope(S, ICmpLoop);
+ X = SE->getSCEVAtScope(X, ICmpLoop);
+
+ // i % n --> i if i is in [0,n).
+ if ((!IsSigned || SE->isKnownNonNegative(S)) &&
+ SE->isKnownPredicate(IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+ S, X))
+ Rem->replaceAllUsesWith(Rem->getOperand(0));
+ else {
+ // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n).
+ const SCEV *LessOne =
+ SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1));
+ if (IsSigned && !SE->isKnownNonNegative(LessOne))
+ return;
+
+ if (!SE->isKnownPredicate(IsSigned ?
+ ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+ LessOne, X))
+ return;
+
+ ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ,
+ Rem->getOperand(0), Rem->getOperand(1));
+ SelectInst *Sel =
+ SelectInst::Create(ICmp,
+ ConstantInt::get(Rem->getType(), 0),
+ Rem->getOperand(0), "tmp", Rem);
+ Rem->replaceAllUsesWith(Sel);
+ }
+
+ // Inform IVUsers about the new users.
+ if (IU) {
+ if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0)))
+ IU->AddUsersIfInteresting(I);
+ }
+ DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
+ ++NumElimRem;
+ Changed = true;
+ DeadInsts.push_back(Rem);
+}
+
+/// eliminateIVUser - Eliminate an operation that consumes a simple IV and has
+/// no observable side-effect given the range of IV values.
+/// IVOperand is guaranteed SCEVable, but UseInst may not be.
+bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
+ Instruction *IVOperand) {
+ if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
+ eliminateIVComparison(ICmp, IVOperand);
+ return true;
+ }
+ if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
+ bool IsSigned = Rem->getOpcode() == Instruction::SRem;
+ if (IsSigned || Rem->getOpcode() == Instruction::URem) {
+ eliminateIVRemainder(Rem, IVOperand, IsSigned);
+ return true;
+ }
+ }
+
+ // Eliminate any operation that SCEV can prove is an identity function.
+ if (!SE->isSCEVable(UseInst->getType()) ||
+ (UseInst->getType() != IVOperand->getType()) ||
+ (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand)))
+ return false;
+
+ DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');
+
+ UseInst->replaceAllUsesWith(IVOperand);
+ ++NumElimIdentity;
+ Changed = true;
+ DeadInsts.push_back(UseInst);
+ return true;
+}
+
+/// pushIVUsers - Add all uses of Def to the current IV's worklist.
+///
+static void pushIVUsers(
+ Instruction *Def,
+ SmallPtrSet<Instruction*,16> &Simplified,
+ SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) {
+
+ for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Avoid infinite or exponential worklist processing.
+ // Also ensure unique worklist users.
+ // If Def is a LoopPhi, it may not be in the Simplified set, so check for
+ // self edges first.
+ if (User != Def && Simplified.insert(User))
+ SimpleIVUsers.push_back(std::make_pair(User, Def));
+ }
+}
+
+/// isSimpleIVUser - Return true if this instruction generates a simple SCEV
+/// expression in terms of that IV.
+///
+/// This is similar to IVUsers' isInteresting() but processes each instruction
+/// non-recursively when the operand is already known to be a simpleIVUser.
+///
+static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) {
+ if (!SE->isSCEVable(I->getType()))
+ return false;
+
+ // Get the symbolic expression for this instruction.
+ const SCEV *S = SE->getSCEV(I);
+
+ // Only consider affine recurrences.
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
+ if (AR && AR->getLoop() == L)
+ return true;
+
+ return false;
+}
+
+/// simplifyUsers - Iteratively perform simplification on a worklist of users
+/// of the specified induction variable. Each successive simplification may push
+/// more users which may themselves be candidates for simplification.
+///
+/// This algorithm does not require IVUsers analysis. Instead, it simplifies
+/// instructions in-place during analysis. Rather than rewriting induction
+/// variables bottom-up from their users, it transforms a chain of IVUsers
+/// top-down, updating the IR only when it encouters a clear optimization
+/// opportunitiy.
+///
+/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
+///
+void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
+ if (!SE->isSCEVable(CurrIV->getType()))
+ return;
+
+ // Instructions processed by SimplifyIndvar for CurrIV.
+ SmallPtrSet<Instruction*,16> Simplified;
+
+ // Use-def pairs if IV users waiting to be processed for CurrIV.
+ SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers;
+
+ // Push users of the current LoopPhi. In rare cases, pushIVUsers may be
+ // called multiple times for the same LoopPhi. This is the proper thing to
+ // do for loop header phis that use each other.
+ pushIVUsers(CurrIV, Simplified, SimpleIVUsers);
+
+ while (!SimpleIVUsers.empty()) {
+ std::pair<Instruction*, Instruction*> UseOper =
+ SimpleIVUsers.pop_back_val();
+ // Bypass back edges to avoid extra work.
+ if (UseOper.first == CurrIV) continue;
+
+ Instruction *IVOperand = UseOper.second;
+ for (unsigned N = 0; IVOperand; ++N) {
+ assert(N <= Simplified.size() && "runaway iteration");
+
+ Value *NewOper = foldIVUser(UseOper.first, IVOperand);
+ if (!NewOper)
+ break; // done folding
+ IVOperand = dyn_cast<Instruction>(NewOper);
+ }
+ if (!IVOperand)
+ continue;
+
+ if (eliminateIVUser(UseOper.first, IVOperand)) {
+ pushIVUsers(IVOperand, Simplified, SimpleIVUsers);
+ continue;
+ }
+ CastInst *Cast = dyn_cast<CastInst>(UseOper.first);
+ if (V && Cast) {
+ V->visitCast(Cast);
+ continue;
+ }
+ if (isSimpleIVUser(UseOper.first, L, SE)) {
+ pushIVUsers(UseOper.first, Simplified, SimpleIVUsers);
+ }
+ }
+}
+
+namespace llvm {
+
+/// simplifyUsersOfIV - Simplify instructions that use this induction variable
+/// by using ScalarEvolution to analyze the IV's recurrence.
+bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM,
+ SmallVectorImpl<WeakVH> &Dead, IVVisitor *V)
+{
+ LoopInfo *LI = &LPM->getAnalysis<LoopInfo>();
+ SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, LPM, Dead);
+ SIV.simplifyUsers(CurrIV, V);
+ return SIV.hasChanged();
+}
+
+/// simplifyLoopIVs - Simplify users of induction variables within this
+/// loop. This does not actually change or add IVs.
+bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, LPPassManager *LPM,
+ SmallVectorImpl<WeakVH> &Dead) {
+ bool Changed = false;
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+ Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, LPM, Dead);
+ }
+ return Changed;
+}
+
+/// simplifyIVUsers - Perform simplification on instructions recorded by the
+/// IVUsers pass.
+///
+/// This is the old approach to IV simplification to be replaced by
+/// SimplifyLoopIVs.
+bool simplifyIVUsers(IVUsers *IU, ScalarEvolution *SE, LPPassManager *LPM,
+ SmallVectorImpl<WeakVH> &Dead) {
+ SimplifyIndvar SIV(IU->getLoop(), SE, LPM, Dead);
+
+ // Each round of simplification involves a round of eliminating operations
+ // followed by a round of widening IVs. A single IVUsers worklist is used
+ // across all rounds. The inner loop advances the user. If widening exposes
+ // more uses, then another pass through the outer loop is triggered.
+ for (IVUsers::iterator I = IU->begin(); I != IU->end(); ++I) {
+ Instruction *UseInst = I->getUser();
+ Value *IVOperand = I->getOperandValToReplace();
+
+ if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
+ SIV.eliminateIVComparison(ICmp, IVOperand);
+ continue;
+ }
+ if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
+ bool IsSigned = Rem->getOpcode() == Instruction::SRem;
+ if (IsSigned || Rem->getOpcode() == Instruction::URem) {
+ SIV.eliminateIVRemainder(Rem, IVOperand, IsSigned);
+ continue;
+ }
+ }
+ }
+ return SIV.hasChanged();
+}
+
+} // namespace llvm
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index 973b105..fc2538d 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -183,10 +183,9 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
}
}
- // Remap attached metadata. Don't bother remapping DebugLoc, it can never
- // have mappings to do.
+ // Remap attached metadata.
SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
- I->getAllMetadataOtherThanDebugLoc(MDs);
+ I->getAllMetadata(MDs);
for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) {
MDNode *Old = MI->second;
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index 94794c3..18308f2 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -58,7 +58,7 @@ static const Module *getModuleFromVal(const Value *V) {
const Function *M = I->getParent() ? I->getParent()->getParent() : 0;
return M ? M->getParent() : 0;
}
-
+
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
return GV->getParent();
return 0;
@@ -142,18 +142,18 @@ public:
/// NamedTypes - The named types that are used by the current module.
std::vector<StructType*> NamedTypes;
-
+
/// NumberedTypes - The numbered types, along with their value.
DenseMap<StructType*, unsigned> NumberedTypes;
-
+
TypePrinting() {}
~TypePrinting() {}
-
+
void incorporateTypes(const Module &M);
-
+
void print(Type *Ty, raw_ostream &OS);
-
+
void printStructBody(StructType *Ty, raw_ostream &OS);
};
} // end anonymous namespace.
@@ -161,25 +161,25 @@ public:
void TypePrinting::incorporateTypes(const Module &M) {
M.findUsedStructTypes(NamedTypes);
-
+
// The list of struct types we got back includes all the struct types, split
// the unnamed ones out to a numbering and remove the anonymous structs.
unsigned NextNumber = 0;
-
+
std::vector<StructType*>::iterator NextToUse = NamedTypes.begin(), I, E;
for (I = NamedTypes.begin(), E = NamedTypes.end(); I != E; ++I) {
StructType *STy = *I;
-
+
// Ignore anonymous types.
- if (STy->isAnonymous())
+ if (STy->isLiteral())
continue;
-
+
if (STy->getName().empty())
NumberedTypes[STy] = NextNumber++;
else
*NextToUse++ = STy;
}
-
+
NamedTypes.erase(NextToUse, NamedTypes.end());
}
@@ -220,13 +220,13 @@ void TypePrinting::print(Type *Ty, raw_ostream &OS) {
}
case Type::StructTyID: {
StructType *STy = cast<StructType>(Ty);
-
- if (STy->isAnonymous())
+
+ if (STy->isLiteral())
return printStructBody(STy, OS);
if (!STy->getName().empty())
return PrintLLVMName(OS, STy->getName(), LocalPrefix);
-
+
DenseMap<StructType*, unsigned>::iterator I = NumberedTypes.find(STy);
if (I != NumberedTypes.end())
OS << '%' << I->second;
@@ -267,10 +267,10 @@ void TypePrinting::printStructBody(StructType *STy, raw_ostream &OS) {
OS << "opaque";
return;
}
-
+
if (STy->isPacked())
OS << '<';
-
+
if (STy->getNumElements() == 0) {
OS << "{}";
} else {
@@ -281,7 +281,7 @@ void TypePrinting::printStructBody(StructType *STy, raw_ostream &OS) {
OS << ", ";
print(*I, OS);
}
-
+
OS << " }";
}
if (STy->isPacked())
@@ -386,7 +386,8 @@ static SlotTracker *createSlotTracker(const Value *V) {
return new SlotTracker(FA->getParent());
if (const Instruction *I = dyn_cast<Instruction>(V))
- return new SlotTracker(I->getParent()->getParent());
+ if (I->getParent())
+ return new SlotTracker(I->getParent()->getParent());
if (const BasicBlock *BB = dyn_cast<BasicBlock>(V))
return new SlotTracker(BB->getParent());
@@ -419,7 +420,7 @@ static SlotTracker *createSlotTracker(const Value *V) {
// Module level constructor. Causes the contents of the Module (sans functions)
// to be added to the slot table.
SlotTracker::SlotTracker(const Module *M)
- : TheModule(M), TheFunction(0), FunctionProcessed(false),
+ : TheModule(M), TheFunction(0), FunctionProcessed(false),
mNext(0), fNext(0), mdnNext(0) {
}
@@ -490,12 +491,12 @@ void SlotTracker::processFunction() {
E = TheFunction->end(); BB != E; ++BB) {
if (!BB->hasName())
CreateFunctionSlot(BB);
-
+
for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E;
++I) {
if (!I->getType()->isVoidTy() && !I->hasName())
CreateFunctionSlot(I);
-
+
// Intrinsics can directly use metadata. We allow direct calls to any
// llvm.foo function here, because the target may not be linked into the
// optimizer.
@@ -658,6 +659,23 @@ static const char *getPredicateText(unsigned predicate) {
return pred;
}
+static void writeAtomicRMWOperation(raw_ostream &Out,
+ AtomicRMWInst::BinOp Op) {
+ switch (Op) {
+ default: Out << " <unknown operation " << Op << ">"; break;
+ case AtomicRMWInst::Xchg: Out << " xchg"; break;
+ case AtomicRMWInst::Add: Out << " add"; break;
+ case AtomicRMWInst::Sub: Out << " sub"; break;
+ case AtomicRMWInst::And: Out << " and"; break;
+ case AtomicRMWInst::Nand: Out << " nand"; break;
+ case AtomicRMWInst::Or: Out << " or"; break;
+ case AtomicRMWInst::Xor: Out << " xor"; break;
+ case AtomicRMWInst::Max: Out << " max"; break;
+ case AtomicRMWInst::Min: Out << " min"; break;
+ case AtomicRMWInst::UMax: Out << " umax"; break;
+ case AtomicRMWInst::UMin: Out << " umin"; break;
+ }
+}
static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
if (const OverflowingBinaryOperator *OBO =
@@ -792,7 +810,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
Out << "zeroinitializer";
return;
}
-
+
if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) {
Out << "blockaddress(";
WriteAsOperandInternal(Out, BA->getFunction(), &TypePrinter, Machine,
@@ -939,13 +957,13 @@ static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node,
else {
TypePrinter->print(V->getType(), Out);
Out << ' ';
- WriteAsOperandInternal(Out, Node->getOperand(mi),
+ WriteAsOperandInternal(Out, Node->getOperand(mi),
TypePrinter, Machine, Context);
}
if (mi + 1 != me)
Out << ", ";
}
-
+
Out << "}";
}
@@ -990,7 +1008,7 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
WriteMDNodeBodyInternal(Out, N, TypePrinter, Machine, Context);
return;
}
-
+
if (!Machine) {
if (N->isFunctionLocal())
Machine = new SlotTracker(N->getFunction());
@@ -1020,26 +1038,35 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
char Prefix = '%';
int Slot;
+ // If we have a SlotTracker, use it.
if (Machine) {
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
Slot = Machine->getGlobalSlot(GV);
Prefix = '@';
} else {
Slot = Machine->getLocalSlot(V);
+
+ // If the local value didn't succeed, then we may be referring to a value
+ // from a different function. Translate it, as this can happen when using
+ // address of blocks.
+ if (Slot == -1)
+ if ((Machine = createSlotTracker(V))) {
+ Slot = Machine->getLocalSlot(V);
+ delete Machine;
+ }
}
- } else {
- Machine = createSlotTracker(V);
- if (Machine) {
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
- Slot = Machine->getGlobalSlot(GV);
- Prefix = '@';
- } else {
- Slot = Machine->getLocalSlot(V);
- }
- delete Machine;
+ } else if ((Machine = createSlotTracker(V))) {
+ // Otherwise, create one to get the # and then destroy it.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ Slot = Machine->getGlobalSlot(GV);
+ Prefix = '@';
} else {
- Slot = -1;
+ Slot = Machine->getLocalSlot(V);
}
+ delete Machine;
+ Machine = 0;
+ } else {
+ Slot = -1;
}
if (Slot != -1)
@@ -1081,7 +1108,7 @@ class AssemblyWriter {
const Module *TheModule;
TypePrinting TypePrinter;
AssemblyAnnotationWriter *AnnotationWriter;
-
+
public:
inline AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
const Module *M,
@@ -1093,11 +1120,12 @@ public:
void printMDNodeBody(const MDNode *MD);
void printNamedMDNode(const NamedMDNode *NMD);
-
+
void printModule(const Module *M);
void writeOperand(const Value *Op, bool PrintType);
void writeParamOperand(const Value *Operand, Attributes Attrs);
+ void writeAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope);
void writeAllMDNodes();
@@ -1128,6 +1156,28 @@ void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) {
WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule);
}
+void AssemblyWriter::writeAtomic(AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ if (Ordering == NotAtomic)
+ return;
+
+ switch (SynchScope) {
+ default: Out << " <bad scope " << int(SynchScope) << ">"; break;
+ case SingleThread: Out << " singlethread"; break;
+ case CrossThread: break;
+ }
+
+ switch (Ordering) {
+ default: Out << " <bad ordering " << int(Ordering) << ">"; break;
+ case Unordered: Out << " unordered"; break;
+ case Monotonic: Out << " monotonic"; break;
+ case Acquire: Out << " acquire"; break;
+ case Release: Out << " release"; break;
+ case AcquireRelease: Out << " acq_rel"; break;
+ case SequentiallyConsistent: Out << " seq_cst"; break;
+ }
+}
+
void AssemblyWriter::writeParamOperand(const Value *Operand,
Attributes Attrs) {
if (Operand == 0) {
@@ -1216,7 +1266,7 @@ void AssemblyWriter::printModule(const Module *M) {
// Output named metadata.
if (!M->named_metadata_empty()) Out << '\n';
-
+
for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
E = M->named_metadata_end(); I != E; ++I)
printNamedMDNode(I);
@@ -1357,26 +1407,8 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) {
if (Aliasee == 0) {
TypePrinter.print(GA->getType(), Out);
Out << " <<NULL ALIASEE>>";
- } else if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Aliasee)) {
- TypePrinter.print(GV->getType(), Out);
- Out << ' ';
- PrintLLVMName(Out, GV);
- } else if (const Function *F = dyn_cast<Function>(Aliasee)) {
- TypePrinter.print(F->getFunctionType(), Out);
- Out << "* ";
-
- WriteAsOperandInternal(Out, F, &TypePrinter, &Machine, F->getParent());
- } else if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(Aliasee)) {
- TypePrinter.print(GA->getType(), Out);
- Out << ' ';
- PrintLLVMName(Out, GA);
} else {
- const ConstantExpr *CE = cast<ConstantExpr>(Aliasee);
- // The only valid GEP is an all zero GEP.
- assert((CE->getOpcode() == Instruction::BitCast ||
- CE->getOpcode() == Instruction::GetElementPtr) &&
- "Unsupported aliasee");
- writeOperand(CE, false);
+ writeOperand(Aliasee, !isa<ConstantExpr>(Aliasee));
}
printInfoComment(*GA);
@@ -1387,29 +1419,29 @@ void AssemblyWriter::printTypeIdentities() {
if (TypePrinter.NumberedTypes.empty() &&
TypePrinter.NamedTypes.empty())
return;
-
+
Out << '\n';
-
+
// We know all the numbers that each type is used and we know that it is a
// dense assignment. Convert the map to an index table.
std::vector<StructType*> NumberedTypes(TypePrinter.NumberedTypes.size());
- for (DenseMap<StructType*, unsigned>::iterator I =
+ for (DenseMap<StructType*, unsigned>::iterator I =
TypePrinter.NumberedTypes.begin(), E = TypePrinter.NumberedTypes.end();
I != E; ++I) {
assert(I->second < NumberedTypes.size() && "Didn't get a dense numbering?");
NumberedTypes[I->second] = I->first;
}
-
+
// Emit all numbered types.
for (unsigned i = 0, e = NumberedTypes.size(); i != e; ++i) {
Out << '%' << i << " = type ";
-
+
// Make sure we print out at least one level of the type structure, so
// that we do not get %2 = type %2
TypePrinter.printStructBody(NumberedTypes[i], Out);
Out << '\n';
}
-
+
for (unsigned i = 0, e = TypePrinter.NamedTypes.size(); i != e; ++i) {
PrintLLVMName(Out, TypePrinter.NamedTypes[i]->getName(), LocalPrefix);
Out << " = type ";
@@ -1457,7 +1489,7 @@ void AssemblyWriter::printFunction(const Function *F) {
default: Out << "cc" << F->getCallingConv() << " "; break;
}
- const FunctionType *FT = F->getFunctionType();
+ FunctionType *FT = F->getFunctionType();
const AttrListPtr &Attrs = F->getAttributes();
Attributes RetAttrs = Attrs.getRetAttributes();
if (RetAttrs != Attribute::None)
@@ -1628,18 +1660,24 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
Out << '%' << SlotNum << " = ";
}
- // If this is a volatile load or store, print out the volatile marker.
- if ((isa<LoadInst>(I) && cast<LoadInst>(I).isVolatile()) ||
- (isa<StoreInst>(I) && cast<StoreInst>(I).isVolatile())) {
- Out << "volatile ";
- } else if (isa<CallInst>(I) && cast<CallInst>(I).isTailCall()) {
- // If this is a call, check if it's a tail call.
+ if (isa<CallInst>(I) && cast<CallInst>(I).isTailCall())
Out << "tail ";
- }
// Print out the opcode...
Out << I.getOpcodeName();
+ // If this is an atomic load or store, print out the atomic marker.
+ if ((isa<LoadInst>(I) && cast<LoadInst>(I).isAtomic()) ||
+ (isa<StoreInst>(I) && cast<StoreInst>(I).isAtomic()))
+ Out << " atomic";
+
+ // If this is a volatile operation, print out the volatile marker.
+ if ((isa<LoadInst>(I) && cast<LoadInst>(I).isVolatile()) ||
+ (isa<StoreInst>(I) && cast<StoreInst>(I).isVolatile()) ||
+ (isa<AtomicCmpXchgInst>(I) && cast<AtomicCmpXchgInst>(I).isVolatile()) ||
+ (isa<AtomicRMWInst>(I) && cast<AtomicRMWInst>(I).isVolatile()))
+ Out << " volatile";
+
// Print out optimization information.
WriteOptimizationInfo(Out, &I);
@@ -1647,6 +1685,10 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
if (const CmpInst *CI = dyn_cast<CmpInst>(&I))
Out << ' ' << getPredicateText(CI->getPredicate());
+ // Print out the atomicrmw operation
+ if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(&I))
+ writeAtomicRMWOperation(Out, RMWI->getOperation());
+
// Print out the type of the operands...
const Value *Operand = I.getNumOperands() ? I.getOperand(0) : 0;
@@ -1661,18 +1703,20 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
writeOperand(BI.getSuccessor(1), true);
} else if (isa<SwitchInst>(I)) {
+ SwitchInst& SI(cast<SwitchInst>(I));
// Special case switch instruction to get formatting nice and correct.
Out << ' ';
- writeOperand(Operand , true);
+ writeOperand(SI.getCondition(), true);
Out << ", ";
- writeOperand(I.getOperand(1), true);
+ writeOperand(SI.getDefaultDest(), true);
Out << " [";
-
- for (unsigned op = 2, Eop = I.getNumOperands(); op < Eop; op += 2) {
+ // Skip the first item since that's the default case.
+ unsigned NumCases = SI.getNumCases();
+ for (unsigned i = 1; i < NumCases; ++i) {
Out << "\n ";
- writeOperand(I.getOperand(op ), true);
+ writeOperand(SI.getCaseValue(i), true);
Out << ", ";
- writeOperand(I.getOperand(op+1), true);
+ writeOperand(SI.getSuccessor(i), true);
}
Out << "\n ]";
} else if (isa<IndirectBrInst>(I)) {
@@ -1680,7 +1724,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
Out << ' ';
writeOperand(Operand, true);
Out << ", [";
-
+
for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) {
if (i != 1)
Out << ", ";
@@ -1709,6 +1753,24 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
writeOperand(I.getOperand(1), true);
for (const unsigned *i = IVI->idx_begin(), *e = IVI->idx_end(); i != e; ++i)
Out << ", " << *i;
+ } else if (const LandingPadInst *LPI = dyn_cast<LandingPadInst>(&I)) {
+ Out << ' ';
+ TypePrinter.print(I.getType(), Out);
+ Out << " personality ";
+ writeOperand(I.getOperand(0), true); Out << '\n';
+
+ if (LPI->isCleanup())
+ Out << " cleanup";
+
+ for (unsigned i = 0, e = LPI->getNumClauses(); i != e; ++i) {
+ if (i != 0 || LPI->isCleanup()) Out << "\n";
+ if (LPI->isCatch(i))
+ Out << " catch ";
+ else
+ Out << " filter ";
+
+ writeOperand(LPI->getClause(i), true);
+ }
} else if (isa<ReturnInst>(I) && !Operand) {
Out << " void";
} else if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
@@ -1878,11 +1940,23 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
}
}
- // Print post operand alignment for load/store.
- if (isa<LoadInst>(I) && cast<LoadInst>(I).getAlignment()) {
- Out << ", align " << cast<LoadInst>(I).getAlignment();
- } else if (isa<StoreInst>(I) && cast<StoreInst>(I).getAlignment()) {
- Out << ", align " << cast<StoreInst>(I).getAlignment();
+ // Print atomic ordering/alignment for memory operations
+ if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+ if (LI->isAtomic())
+ writeAtomic(LI->getOrdering(), LI->getSynchScope());
+ if (LI->getAlignment())
+ Out << ", align " << LI->getAlignment();
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+ if (SI->isAtomic())
+ writeAtomic(SI->getOrdering(), SI->getSynchScope());
+ if (SI->getAlignment())
+ Out << ", align " << SI->getAlignment();
+ } else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(&I)) {
+ writeAtomic(CXI->getOrdering(), CXI->getSynchScope());
+ } else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(&I)) {
+ writeAtomic(RMWI->getOrdering(), RMWI->getSynchScope());
+ } else if (const FenceInst *FI = dyn_cast<FenceInst>(&I)) {
+ writeAtomic(FI->getOrdering(), FI->getSynchScope());
}
// Print Metadata info.
@@ -1916,7 +1990,7 @@ static void WriteMDNodeComment(const MDNode *Node,
APInt Tag = Val & ~APInt(Val.getBitWidth(), LLVMDebugVersionMask);
if (Val.ult(LLVMDebugVersion))
return;
-
+
Out.PadToColumn(50);
if (Tag == dwarf::DW_TAG_user_base)
Out << "; [ DW_TAG_user_base ]";
@@ -1932,7 +2006,7 @@ void AssemblyWriter::writeAllMDNodes() {
for (SlotTracker::mdn_iterator I = Machine.mdn_begin(), E = Machine.mdn_end();
I != E; ++I)
Nodes[I->second] = cast<MDNode>(I->first);
-
+
for (unsigned i = 0, e = Nodes.size(); i != e; ++i) {
Out << '!' << i << " = metadata ";
printMDNodeBody(Nodes[i]);
@@ -1970,10 +2044,10 @@ void Type::print(raw_ostream &OS) const {
}
TypePrinting TP;
TP.print(const_cast<Type*>(this), OS);
-
+
// If the type is a named struct type, print the body as well.
if (StructType *STy = dyn_cast<StructType>(const_cast<Type*>(this)))
- if (!STy->isAnonymous()) {
+ if (!STy->isLiteral()) {
OS << " = type ";
TP.printStructBody(STy, OS);
}
diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp
index bf6efa1..485be75 100644
--- a/lib/VMCore/Attributes.cpp
+++ b/lib/VMCore/Attributes.cpp
@@ -38,6 +38,8 @@ std::string Attribute::getAsString(Attributes Attrs) {
Result += "nounwind ";
if (Attrs & Attribute::UWTable)
Result += "uwtable ";
+ if (Attrs & Attribute::ReturnsTwice)
+ Result += "returns_twice ";
if (Attrs & Attribute::InReg)
Result += "inreg ";
if (Attrs & Attribute::NoAlias)
@@ -72,8 +74,6 @@ std::string Attribute::getAsString(Attributes Attrs) {
Result += "noimplicitfloat ";
if (Attrs & Attribute::Naked)
Result += "naked ";
- if (Attrs & Attribute::Hotpatch)
- Result += "hotpatch ";
if (Attrs & Attribute::NonLazyBind)
Result += "nonlazybind ";
if (Attrs & Attribute::StackAlignment) {
@@ -92,7 +92,7 @@ std::string Attribute::getAsString(Attributes Attrs) {
return Result;
}
-Attributes Attribute::typeIncompatible(const Type *Ty) {
+Attributes Attribute::typeIncompatible(Type *Ty) {
Attributes Incompatible = None;
if (!Ty->isIntegerTy())
diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp
index 9e93ff3..b849d3e 100644
--- a/lib/VMCore/AutoUpgrade.cpp
+++ b/lib/VMCore/AutoUpgrade.cpp
@@ -14,11 +14,15 @@
#include "llvm/AutoUpgrade.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
+#include "llvm/Instruction.h"
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/IRBuilder.h"
#include <cstring>
@@ -34,11 +38,48 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
return false;
Name = Name.substr(5); // Strip off "llvm."
- const FunctionType *FTy = F->getFunctionType();
+ FunctionType *FTy = F->getFunctionType();
Module *M = F->getParent();
switch (Name[0]) {
default: break;
+ case 'a':
+ if (Name.startswith("atomic.cmp.swap") ||
+ Name.startswith("atomic.swap") ||
+ Name.startswith("atomic.load.add") ||
+ Name.startswith("atomic.load.sub") ||
+ Name.startswith("atomic.load.and") ||
+ Name.startswith("atomic.load.nand") ||
+ Name.startswith("atomic.load.or") ||
+ Name.startswith("atomic.load.xor") ||
+ Name.startswith("atomic.load.max") ||
+ Name.startswith("atomic.load.min") ||
+ Name.startswith("atomic.load.umax") ||
+ Name.startswith("atomic.load.umin"))
+ return true;
+ case 'i':
+ // This upgrades the old llvm.init.trampoline to the new
+ // llvm.init.trampoline and llvm.adjust.trampoline pair.
+ if (Name == "init.trampoline") {
+ // The new llvm.init.trampoline returns nothing.
+ if (FTy->getReturnType()->isVoidTy())
+ break;
+
+ assert(FTy->getNumParams() == 3 && "old init.trampoline takes 3 args!");
+
+ // Change the name of the old intrinsic so that we can play with its type.
+ std::string NameTmp = F->getName();
+ F->setName("");
+ NewFn = cast<Function>(M->getOrInsertFunction(
+ NameTmp,
+ Type::getVoidTy(M->getContext()),
+ FTy->getParamType(0), FTy->getParamType(1),
+ FTy->getParamType(2), (Type *)0));
+ return true;
+ }
+ case 'm':
+ if (Name == "memory.barrier")
+ return true;
case 'p':
// This upgrades the llvm.prefetch intrinsic to accept one more parameter,
// which is a instruction / data cache identifier. The old version only
@@ -139,8 +180,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
F->getName() == "llvm.x86.sse2.loadu.dq" ||
F->getName() == "llvm.x86.sse2.loadu.pd") {
// Convert to a native, unaligned load.
- const Type *VecTy = CI->getType();
- const Type *IntTy = IntegerType::get(C, 128);
+ Type *VecTy = CI->getType();
+ Type *IntTy = IntegerType::get(C, 128);
IRBuilder<> Builder(C);
Builder.SetInsertPoint(CI->getParent(), CI);
@@ -182,6 +223,80 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
// Remove intrinsic.
CI->eraseFromParent();
+ } else if (F->getName().startswith("llvm.atomic.cmp.swap")) {
+ IRBuilder<> Builder(C);
+ Builder.SetInsertPoint(CI->getParent(), CI);
+ Value *Val = Builder.CreateAtomicCmpXchg(CI->getArgOperand(0),
+ CI->getArgOperand(1),
+ CI->getArgOperand(2),
+ Monotonic);
+
+ // Replace intrinsic.
+ Val->takeName(CI);
+ if (!CI->use_empty())
+ CI->replaceAllUsesWith(Val);
+ CI->eraseFromParent();
+ } else if (F->getName().startswith("llvm.atomic")) {
+ IRBuilder<> Builder(C);
+ Builder.SetInsertPoint(CI->getParent(), CI);
+
+ AtomicRMWInst::BinOp Op;
+ if (F->getName().startswith("llvm.atomic.swap"))
+ Op = AtomicRMWInst::Xchg;
+ else if (F->getName().startswith("llvm.atomic.load.add"))
+ Op = AtomicRMWInst::Add;
+ else if (F->getName().startswith("llvm.atomic.load.sub"))
+ Op = AtomicRMWInst::Sub;
+ else if (F->getName().startswith("llvm.atomic.load.and"))
+ Op = AtomicRMWInst::And;
+ else if (F->getName().startswith("llvm.atomic.load.nand"))
+ Op = AtomicRMWInst::Nand;
+ else if (F->getName().startswith("llvm.atomic.load.or"))
+ Op = AtomicRMWInst::Or;
+ else if (F->getName().startswith("llvm.atomic.load.xor"))
+ Op = AtomicRMWInst::Xor;
+ else if (F->getName().startswith("llvm.atomic.load.max"))
+ Op = AtomicRMWInst::Max;
+ else if (F->getName().startswith("llvm.atomic.load.min"))
+ Op = AtomicRMWInst::Min;
+ else if (F->getName().startswith("llvm.atomic.load.umax"))
+ Op = AtomicRMWInst::UMax;
+ else if (F->getName().startswith("llvm.atomic.load.umin"))
+ Op = AtomicRMWInst::UMin;
+ else
+ llvm_unreachable("Unknown atomic");
+
+ Value *Val = Builder.CreateAtomicRMW(Op, CI->getArgOperand(0),
+ CI->getArgOperand(1),
+ Monotonic);
+
+ // Replace intrinsic.
+ Val->takeName(CI);
+ if (!CI->use_empty())
+ CI->replaceAllUsesWith(Val);
+ CI->eraseFromParent();
+ } else if (F->getName() == "llvm.memory.barrier") {
+ IRBuilder<> Builder(C);
+ Builder.SetInsertPoint(CI->getParent(), CI);
+
+ // Note that this conversion ignores the "device" bit; it was not really
+ // well-defined, and got abused because nobody paid enough attention to
+ // get it right. In practice, this probably doesn't matter; application
+ // code generally doesn't need anything stronger than
+ // SequentiallyConsistent (and realistically, SequentiallyConsistent
+ // is lowered to a strong enough barrier for almost anything).
+
+ if (cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue())
+ Builder.CreateFence(SequentiallyConsistent);
+ else if (!cast<ConstantInt>(CI->getArgOperand(0))->getZExtValue())
+ Builder.CreateFence(Release);
+ else if (!cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue())
+ Builder.CreateFence(Acquire);
+ else
+ Builder.CreateFence(AcquireRelease);
+
+ // Remove intrinsic.
+ CI->eraseFromParent();
} else {
llvm_unreachable("Unknown function for CallInst upgrade.");
}
@@ -192,7 +307,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::prefetch: {
IRBuilder<> Builder(C);
Builder.SetInsertPoint(CI->getParent(), CI);
- const llvm::Type *I32Ty = llvm::Type::getInt32Ty(CI->getContext());
+ llvm::Type *I32Ty = llvm::Type::getInt32Ty(CI->getContext());
// Add the extra "data cache" argument
Value *Operands[4] = { CI->getArgOperand(0), CI->getArgOperand(1),
@@ -212,6 +327,32 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
CI->eraseFromParent();
break;
}
+ case Intrinsic::init_trampoline: {
+
+ // Transform
+ // %tramp = call i8* llvm.init.trampoline (i8* x, i8* y, i8* z)
+ // to
+ // call void llvm.init.trampoline (i8* %x, i8* %y, i8* %z)
+ // %tramp = call i8* llvm.adjust.trampoline (i8* %x)
+
+ Function *AdjustTrampolineFn =
+ cast<Function>(Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::adjust_trampoline));
+
+ IRBuilder<> Builder(C);
+ Builder.SetInsertPoint(CI);
+
+ Builder.CreateCall3(NewFn, CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2));
+
+ CallInst *AdjustCall = Builder.CreateCall(AdjustTrampolineFn,
+ CI->getArgOperand(0),
+ CI->getName());
+ if (!CI->use_empty())
+ CI->replaceAllUsesWith(AdjustCall);
+ CI->eraseFromParent();
+ break;
+ }
}
}
@@ -279,3 +420,249 @@ void llvm::CheckDebugInfoIntrinsics(Module *M) {
}
}
}
+
+/// FindExnAndSelIntrinsics - Find the eh_exception and eh_selector intrinsic
+/// calls reachable from the unwind basic block.
+static void FindExnAndSelIntrinsics(BasicBlock *BB, CallInst *&Exn,
+ CallInst *&Sel,
+ SmallPtrSet<BasicBlock*, 8> &Visited) {
+ if (!Visited.insert(BB)) return;
+
+ for (BasicBlock::iterator
+ I = BB->begin(), E = BB->end(); I != E; ++I) {
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ switch (CI->getCalledFunction()->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::eh_exception:
+ assert(!Exn && "Found more than one eh.exception call!");
+ Exn = CI;
+ break;
+ case Intrinsic::eh_selector:
+ assert(!Sel && "Found more than one eh.selector call!");
+ Sel = CI;
+ break;
+ }
+
+ if (Exn && Sel) return;
+ }
+ }
+
+ if (Exn && Sel) return;
+
+ for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+ FindExnAndSelIntrinsics(*I, Exn, Sel, Visited);
+ if (Exn && Sel) return;
+ }
+}
+
+/// TransferClausesToLandingPadInst - Transfer the exception handling clauses
+/// from the eh_selector call to the new landingpad instruction.
+static void TransferClausesToLandingPadInst(LandingPadInst *LPI,
+ CallInst *EHSel) {
+ LLVMContext &Context = LPI->getContext();
+ unsigned N = EHSel->getNumArgOperands();
+
+ for (unsigned i = N - 1; i > 1; --i) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(EHSel->getArgOperand(i))){
+ unsigned FilterLength = CI->getZExtValue();
+ unsigned FirstCatch = i + FilterLength + !FilterLength;
+ assert(FirstCatch <= N && "Invalid filter length");
+
+ if (FirstCatch < N)
+ for (unsigned j = FirstCatch; j < N; ++j) {
+ Value *Val = EHSel->getArgOperand(j);
+ if (!Val->hasName() || Val->getName() != "llvm.eh.catch.all.value") {
+ LPI->addClause(EHSel->getArgOperand(j));
+ } else {
+ GlobalVariable *GV = cast<GlobalVariable>(Val);
+ LPI->addClause(GV->getInitializer());
+ }
+ }
+
+ if (!FilterLength) {
+ // Cleanup.
+ LPI->setCleanup(true);
+ } else {
+ // Filter.
+ SmallVector<Constant *, 4> TyInfo;
+ TyInfo.reserve(FilterLength - 1);
+ for (unsigned j = i + 1; j < FirstCatch; ++j)
+ TyInfo.push_back(cast<Constant>(EHSel->getArgOperand(j)));
+ ArrayType *AType =
+ ArrayType::get(!TyInfo.empty() ? TyInfo[0]->getType() :
+ PointerType::getUnqual(Type::getInt8Ty(Context)),
+ TyInfo.size());
+ LPI->addClause(ConstantArray::get(AType, TyInfo));
+ }
+
+ N = i;
+ }
+ }
+
+ if (N > 2)
+ for (unsigned j = 2; j < N; ++j) {
+ Value *Val = EHSel->getArgOperand(j);
+ if (!Val->hasName() || Val->getName() != "llvm.eh.catch.all.value") {
+ LPI->addClause(EHSel->getArgOperand(j));
+ } else {
+ GlobalVariable *GV = cast<GlobalVariable>(Val);
+ LPI->addClause(GV->getInitializer());
+ }
+ }
+}
+
+/// This function upgrades the old pre-3.0 exception handling system to the new
+/// one. N.B. This will be removed in 3.1.
+void llvm::UpgradeExceptionHandling(Module *M) {
+ Function *EHException = M->getFunction("llvm.eh.exception");
+ Function *EHSelector = M->getFunction("llvm.eh.selector");
+ if (!EHException || !EHSelector)
+ return;
+
+ LLVMContext &Context = M->getContext();
+ Type *ExnTy = PointerType::getUnqual(Type::getInt8Ty(Context));
+ Type *SelTy = Type::getInt32Ty(Context);
+ Type *LPadSlotTy = StructType::get(ExnTy, SelTy, NULL);
+
+ // This map links the invoke instruction with the eh.exception and eh.selector
+ // calls associated with it.
+ DenseMap<InvokeInst*, std::pair<Value*, Value*> > InvokeToIntrinsicsMap;
+ for (Module::iterator
+ I = M->begin(), E = M->end(); I != E; ++I) {
+ Function &F = *I;
+
+ for (Function::iterator
+ II = F.begin(), IE = F.end(); II != IE; ++II) {
+ BasicBlock *BB = &*II;
+ InvokeInst *Inst = dyn_cast<InvokeInst>(BB->getTerminator());
+ if (!Inst) continue;
+ BasicBlock *UnwindDest = Inst->getUnwindDest();
+ if (UnwindDest->isLandingPad()) continue; // Already converted.
+
+ SmallPtrSet<BasicBlock*, 8> Visited;
+ CallInst *Exn = 0;
+ CallInst *Sel = 0;
+ FindExnAndSelIntrinsics(UnwindDest, Exn, Sel, Visited);
+ assert(Exn && Sel && "Cannot find eh.exception and eh.selector calls!");
+ InvokeToIntrinsicsMap[Inst] = std::make_pair(Exn, Sel);
+ }
+ }
+
+ // This map stores the slots where the exception object and selector value are
+ // stored within a function.
+ DenseMap<Function*, std::pair<Value*, Value*> > FnToLPadSlotMap;
+ SmallPtrSet<Instruction*, 32> DeadInsts;
+ for (DenseMap<InvokeInst*, std::pair<Value*, Value*> >::iterator
+ I = InvokeToIntrinsicsMap.begin(), E = InvokeToIntrinsicsMap.end();
+ I != E; ++I) {
+ InvokeInst *Invoke = I->first;
+ BasicBlock *UnwindDest = Invoke->getUnwindDest();
+ Function *F = UnwindDest->getParent();
+ std::pair<Value*, Value*> EHIntrinsics = I->second;
+ CallInst *Exn = cast<CallInst>(EHIntrinsics.first);
+ CallInst *Sel = cast<CallInst>(EHIntrinsics.second);
+
+ // Store the exception object and selector value in the entry block.
+ Value *ExnSlot = 0;
+ Value *SelSlot = 0;
+ if (!FnToLPadSlotMap[F].first) {
+ BasicBlock *Entry = &F->front();
+ ExnSlot = new AllocaInst(ExnTy, "exn", Entry->getTerminator());
+ SelSlot = new AllocaInst(SelTy, "sel", Entry->getTerminator());
+ FnToLPadSlotMap[F] = std::make_pair(ExnSlot, SelSlot);
+ } else {
+ ExnSlot = FnToLPadSlotMap[F].first;
+ SelSlot = FnToLPadSlotMap[F].second;
+ }
+
+ if (!UnwindDest->getSinglePredecessor()) {
+ // The unwind destination doesn't have a single predecessor. Create an
+ // unwind destination which has only one predecessor.
+ BasicBlock *NewBB = BasicBlock::Create(Context, "new.lpad",
+ UnwindDest->getParent());
+ BranchInst::Create(UnwindDest, NewBB);
+ Invoke->setUnwindDest(NewBB);
+
+ // Fix up any PHIs in the original unwind destination block.
+ for (BasicBlock::iterator
+ II = UnwindDest->begin(); isa<PHINode>(II); ++II) {
+ PHINode *PN = cast<PHINode>(II);
+ int Idx = PN->getBasicBlockIndex(Invoke->getParent());
+ if (Idx == -1) continue;
+ PN->setIncomingBlock(Idx, NewBB);
+ }
+
+ UnwindDest = NewBB;
+ }
+
+ IRBuilder<> Builder(Context);
+ Builder.SetInsertPoint(UnwindDest, UnwindDest->getFirstInsertionPt());
+
+ Value *PersFn = Sel->getArgOperand(1);
+ LandingPadInst *LPI = Builder.CreateLandingPad(LPadSlotTy, PersFn, 0);
+ Value *LPExn = Builder.CreateExtractValue(LPI, 0);
+ Value *LPSel = Builder.CreateExtractValue(LPI, 1);
+ Builder.CreateStore(LPExn, ExnSlot);
+ Builder.CreateStore(LPSel, SelSlot);
+
+ TransferClausesToLandingPadInst(LPI, Sel);
+
+ DeadInsts.insert(Exn);
+ DeadInsts.insert(Sel);
+ }
+
+ // Replace the old intrinsic calls with the values from the landingpad
+ // instruction(s). These values were stored in allocas for us to use here.
+ for (DenseMap<InvokeInst*, std::pair<Value*, Value*> >::iterator
+ I = InvokeToIntrinsicsMap.begin(), E = InvokeToIntrinsicsMap.end();
+ I != E; ++I) {
+ std::pair<Value*, Value*> EHIntrinsics = I->second;
+ CallInst *Exn = cast<CallInst>(EHIntrinsics.first);
+ CallInst *Sel = cast<CallInst>(EHIntrinsics.second);
+ BasicBlock *Parent = Exn->getParent();
+
+ std::pair<Value*,Value*> ExnSelSlots = FnToLPadSlotMap[Parent->getParent()];
+
+ IRBuilder<> Builder(Context);
+ Builder.SetInsertPoint(Parent, Exn);
+ LoadInst *LPExn = Builder.CreateLoad(ExnSelSlots.first, "exn.load");
+ LoadInst *LPSel = Builder.CreateLoad(ExnSelSlots.second, "sel.load");
+
+ Exn->replaceAllUsesWith(LPExn);
+ Sel->replaceAllUsesWith(LPSel);
+ }
+
+ // Remove the dead instructions.
+ for (SmallPtrSet<Instruction*, 32>::iterator
+ I = DeadInsts.begin(), E = DeadInsts.end(); I != E; ++I) {
+ Instruction *Inst = *I;
+ Inst->eraseFromParent();
+ }
+
+ // Replace calls to "llvm.eh.resume" with the 'resume' instruction. Load the
+ // exception and selector values from the stored place.
+ Function *EHResume = M->getFunction("llvm.eh.resume");
+ if (!EHResume) return;
+
+ while (!EHResume->use_empty()) {
+ CallInst *Resume = cast<CallInst>(EHResume->use_back());
+ BasicBlock *BB = Resume->getParent();
+
+ IRBuilder<> Builder(Context);
+ Builder.SetInsertPoint(BB, Resume);
+
+ Value *LPadVal =
+ Builder.CreateInsertValue(UndefValue::get(LPadSlotTy),
+ Resume->getArgOperand(0), 0, "lpad.val");
+ LPadVal = Builder.CreateInsertValue(LPadVal, Resume->getArgOperand(1),
+ 1, "lpad.val");
+ Builder.CreateResume(LPadVal);
+
+ // Remove all instructions after the 'resume.'
+ BasicBlock::iterator I = Resume;
+ while (I != BB->end()) {
+ Instruction *Inst = &*I++;
+ Inst->eraseFromParent();
+ }
+ }
+}
diff --git a/lib/VMCore/BasicBlock.cpp b/lib/VMCore/BasicBlock.cpp
index 70265c8..d0aa275 100644
--- a/lib/VMCore/BasicBlock.cpp
+++ b/lib/VMCore/BasicBlock.cpp
@@ -53,7 +53,7 @@ BasicBlock::BasicBlock(LLVMContext &C, const Twine &Name, Function *NewParent,
} else if (NewParent) {
NewParent->getBasicBlockList().push_back(this);
}
-
+
setName(Name);
}
@@ -76,7 +76,7 @@ BasicBlock::~BasicBlock() {
BA->destroyConstant();
}
}
-
+
assert(getParent() == 0 && "BasicBlock still linked into the program!");
dropAllReferences();
InstList.clear();
@@ -167,6 +167,12 @@ Instruction* BasicBlock::getFirstNonPHIOrDbgOrLifetime() {
return &*i;
}
+BasicBlock::iterator BasicBlock::getFirstInsertionPt() {
+ iterator InsertPt = getFirstNonPHI();
+ if (isa<LandingPadInst>(InsertPt)) ++InsertPt;
+ return InsertPt;
+}
+
void BasicBlock::dropAllReferences() {
for(iterator I = begin(), E = end(); I != E; ++I)
I->dropAllReferences();
@@ -184,8 +190,8 @@ BasicBlock *BasicBlock::getSinglePredecessor() {
/// getUniquePredecessor - If this basic block has a unique predecessor block,
/// return the block, otherwise return a null pointer.
-/// Note that unique predecessor doesn't mean single edge, there can be
-/// multiple edges from the unique predecessor to this block (for example
+/// Note that unique predecessor doesn't mean single edge, there can be
+/// multiple edges from the unique predecessor to this block (for example
/// a switch statement with multiple cases having the same destination).
BasicBlock *BasicBlock::getUniquePredecessor() {
pred_iterator PI = pred_begin(this), E = pred_end(this);
@@ -336,11 +342,27 @@ void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *New) {
return;
for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
BasicBlock *Succ = TI->getSuccessor(i);
- for (iterator II = Succ->begin(); PHINode *PN = dyn_cast<PHINode>(II);
- ++II) {
+ // N.B. Succ might not be a complete BasicBlock, so don't assume
+ // that it ends with a non-phi instruction.
+ for (iterator II = Succ->begin(), IE = Succ->end(); II != IE; ++II) {
+ PHINode *PN = dyn_cast<PHINode>(II);
+ if (!PN)
+ break;
int i;
while ((i = PN->getBasicBlockIndex(this)) >= 0)
PN->setIncomingBlock(i, New);
}
}
}
+
+/// isLandingPad - Return true if this basic block is a landing pad. I.e., it's
+/// the destination of the 'unwind' edge of an invoke instruction.
+bool BasicBlock::isLandingPad() const {
+ return isa<LandingPadInst>(getFirstNonPHI());
+}
+
+/// getLandingPadInst() - Return the landingpad instruction associated with
+/// the landing pad.
+LandingPadInst *BasicBlock::getLandingPadInst() {
+ return dyn_cast<LandingPadInst>(getFirstNonPHI());
+}
diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt
index f60dd06..0404297 100644
--- a/lib/VMCore/CMakeLists.txt
+++ b/lib/VMCore/CMakeLists.txt
@@ -8,10 +8,11 @@ add_llvm_library(LLVMCore
ConstantFold.cpp
Constants.cpp
Core.cpp
- DebugLoc.cpp
DebugInfoProbe.cpp
+ DebugLoc.cpp
Dominators.cpp
Function.cpp
+ GCOV.cpp
GVMaterializer.cpp
Globals.cpp
IRBuilder.cpp
@@ -36,3 +37,5 @@ add_llvm_library(LLVMCore
ValueTypes.cpp
Verifier.cpp
)
+
+add_llvm_library_dependencies(LLVMCore LLVMSupport)
diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp
index 323e2a2..30bae71 100644
--- a/lib/VMCore/ConstantFold.cpp
+++ b/lib/VMCore/ConstantFold.cpp
@@ -42,7 +42,7 @@ using namespace llvm;
/// specified vector type. At this point, we know that the elements of the
/// input vector constant are all simple integer or FP values.
static Constant *BitCastConstantVector(ConstantVector *CV,
- const VectorType *DstTy) {
+ VectorType *DstTy) {
if (CV->isAllOnesValue()) return Constant::getAllOnesValue(DstTy);
if (CV->isNullValue()) return Constant::getNullValue(DstTy);
@@ -63,7 +63,7 @@ static Constant *BitCastConstantVector(ConstantVector *CV,
// Bitcast each element now.
std::vector<Constant*> Result;
- const Type *DstEltTy = DstTy->getElementType();
+ Type *DstEltTy = DstTy->getElementType();
for (unsigned i = 0; i != NumElts; ++i)
Result.push_back(ConstantExpr::getBitCast(CV->getOperand(i),
DstEltTy));
@@ -78,15 +78,15 @@ static unsigned
foldConstantCastPair(
unsigned opc, ///< opcode of the second cast constant expression
ConstantExpr *Op, ///< the first cast constant expression
- const Type *DstTy ///< desintation type of the first cast
+ Type *DstTy ///< desintation type of the first cast
) {
assert(Op && Op->isCast() && "Can't fold cast of cast without a cast!");
assert(DstTy && DstTy->isFirstClassType() && "Invalid cast destination type");
assert(CastInst::isCast(opc) && "Invalid cast opcode");
// The the types and opcodes for the two Cast constant expressions
- const Type *SrcTy = Op->getOperand(0)->getType();
- const Type *MidTy = Op->getType();
+ Type *SrcTy = Op->getOperand(0)->getType();
+ Type *MidTy = Op->getType();
Instruction::CastOps firstOp = Instruction::CastOps(Op->getOpcode());
Instruction::CastOps secondOp = Instruction::CastOps(opc);
@@ -95,27 +95,27 @@ foldConstantCastPair(
Type::getInt64Ty(DstTy->getContext()));
}
-static Constant *FoldBitCast(Constant *V, const Type *DestTy) {
- const Type *SrcTy = V->getType();
+static Constant *FoldBitCast(Constant *V, Type *DestTy) {
+ Type *SrcTy = V->getType();
if (SrcTy == DestTy)
return V; // no-op cast
// Check to see if we are casting a pointer to an aggregate to a pointer to
// the first element. If so, return the appropriate GEP instruction.
- if (const PointerType *PTy = dyn_cast<PointerType>(V->getType()))
- if (const PointerType *DPTy = dyn_cast<PointerType>(DestTy))
+ if (PointerType *PTy = dyn_cast<PointerType>(V->getType()))
+ if (PointerType *DPTy = dyn_cast<PointerType>(DestTy))
if (PTy->getAddressSpace() == DPTy->getAddressSpace()) {
SmallVector<Value*, 8> IdxList;
Value *Zero =
Constant::getNullValue(Type::getInt32Ty(DPTy->getContext()));
IdxList.push_back(Zero);
- const Type *ElTy = PTy->getElementType();
+ Type *ElTy = PTy->getElementType();
while (ElTy != DPTy->getElementType()) {
- if (const StructType *STy = dyn_cast<StructType>(ElTy)) {
+ if (StructType *STy = dyn_cast<StructType>(ElTy)) {
if (STy->getNumElements() == 0) break;
ElTy = STy->getElementType(0);
IdxList.push_back(Zero);
- } else if (const SequentialType *STy =
+ } else if (SequentialType *STy =
dyn_cast<SequentialType>(ElTy)) {
if (ElTy->isPointerTy()) break; // Can't index into pointers!
ElTy = STy->getElementType();
@@ -127,14 +127,13 @@ static Constant *FoldBitCast(Constant *V, const Type *DestTy) {
if (ElTy == DPTy->getElementType())
// This GEP is inbounds because all indices are zero.
- return ConstantExpr::getInBoundsGetElementPtr(V, &IdxList[0],
- IdxList.size());
+ return ConstantExpr::getInBoundsGetElementPtr(V, IdxList);
}
// Handle casts from one vector constant to another. We know that the src
// and dest type have the same size (otherwise its an illegal cast).
- if (const VectorType *DestPTy = dyn_cast<VectorType>(DestTy)) {
- if (const VectorType *SrcTy = dyn_cast<VectorType>(V->getType())) {
+ if (VectorType *DestPTy = dyn_cast<VectorType>(DestTy)) {
+ if (VectorType *SrcTy = dyn_cast<VectorType>(V->getType())) {
assert(DestPTy->getBitWidth() == SrcTy->getBitWidth() &&
"Not cast between same sized vectors!");
SrcTy = NULL;
@@ -332,15 +331,15 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
/// return null if no factoring was possible, to avoid endlessly
/// bouncing an unfoldable expression back into the top-level folder.
///
-static Constant *getFoldedSizeOf(const Type *Ty, const Type *DestTy,
+static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy,
bool Folded) {
- if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
Constant *N = ConstantInt::get(DestTy, ATy->getNumElements());
Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true);
return ConstantExpr::getNUWMul(E, N);
}
- if (const StructType *STy = dyn_cast<StructType>(Ty))
+ if (StructType *STy = dyn_cast<StructType>(Ty))
if (!STy->isPacked()) {
unsigned NumElems = STy->getNumElements();
// An empty struct has size zero.
@@ -364,7 +363,7 @@ static Constant *getFoldedSizeOf(const Type *Ty, const Type *DestTy,
// Pointer size doesn't depend on the pointee type, so canonicalize them
// to an arbitrary pointee.
- if (const PointerType *PTy = dyn_cast<PointerType>(Ty))
+ if (PointerType *PTy = dyn_cast<PointerType>(Ty))
if (!PTy->getElementType()->isIntegerTy(1))
return
getFoldedSizeOf(PointerType::get(IntegerType::get(PTy->getContext(), 1),
@@ -389,11 +388,11 @@ static Constant *getFoldedSizeOf(const Type *Ty, const Type *DestTy,
/// return null if no factoring was possible, to avoid endlessly
/// bouncing an unfoldable expression back into the top-level folder.
///
-static Constant *getFoldedAlignOf(const Type *Ty, const Type *DestTy,
+static Constant *getFoldedAlignOf(Type *Ty, Type *DestTy,
bool Folded) {
// The alignment of an array is equal to the alignment of the
// array element. Note that this is not always true for vectors.
- if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
Constant *C = ConstantExpr::getAlignOf(ATy->getElementType());
C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
DestTy,
@@ -402,7 +401,7 @@ static Constant *getFoldedAlignOf(const Type *Ty, const Type *DestTy,
return C;
}
- if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ if (StructType *STy = dyn_cast<StructType>(Ty)) {
// Packed structs always have an alignment of 1.
if (STy->isPacked())
return ConstantInt::get(DestTy, 1);
@@ -429,7 +428,7 @@ static Constant *getFoldedAlignOf(const Type *Ty, const Type *DestTy,
// Pointer alignment doesn't depend on the pointee type, so canonicalize them
// to an arbitrary pointee.
- if (const PointerType *PTy = dyn_cast<PointerType>(Ty))
+ if (PointerType *PTy = dyn_cast<PointerType>(Ty))
if (!PTy->getElementType()->isIntegerTy(1))
return
getFoldedAlignOf(PointerType::get(IntegerType::get(PTy->getContext(),
@@ -455,10 +454,10 @@ static Constant *getFoldedAlignOf(const Type *Ty, const Type *DestTy,
/// return null if no factoring was possible, to avoid endlessly
/// bouncing an unfoldable expression back into the top-level folder.
///
-static Constant *getFoldedOffsetOf(const Type *Ty, Constant *FieldNo,
- const Type *DestTy,
+static Constant *getFoldedOffsetOf(Type *Ty, Constant *FieldNo,
+ Type *DestTy,
bool Folded) {
- if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, false,
DestTy, false),
FieldNo, DestTy);
@@ -466,7 +465,7 @@ static Constant *getFoldedOffsetOf(const Type *Ty, Constant *FieldNo,
return ConstantExpr::getNUWMul(E, N);
}
- if (const StructType *STy = dyn_cast<StructType>(Ty))
+ if (StructType *STy = dyn_cast<StructType>(Ty))
if (!STy->isPacked()) {
unsigned NumElems = STy->getNumElements();
// An empty struct has no members.
@@ -506,7 +505,7 @@ static Constant *getFoldedOffsetOf(const Type *Ty, Constant *FieldNo,
}
Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
- const Type *DestTy) {
+ Type *DestTy) {
if (isa<UndefValue>(V)) {
// zext(undef) = 0, because the top bits will be zero.
// sext(undef) = 0, because the top bits will all be the same.
@@ -554,8 +553,8 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
cast<VectorType>(DestTy)->getNumElements() ==
CV->getType()->getNumElements()) {
std::vector<Constant*> res;
- const VectorType *DestVecTy = cast<VectorType>(DestTy);
- const Type *DstEltTy = DestVecTy->getElementType();
+ VectorType *DestVecTy = cast<VectorType>(DestTy);
+ Type *DstEltTy = DestVecTy->getElementType();
for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
res.push_back(ConstantExpr::getCast(opc,
CV->getOperand(i), DstEltTy));
@@ -590,7 +589,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
uint32_t DestBitWidth = cast<IntegerType>(DestTy)->getBitWidth();
(void) V.convertToInteger(x, DestBitWidth, opc==Instruction::FPToSI,
APFloat::rmTowardZero, &ignored);
- APInt Val(DestBitWidth, 2, x);
+ APInt Val(DestBitWidth, x);
return ConstantInt::get(FPC->getContext(), Val);
}
return 0; // Can't fold.
@@ -608,7 +607,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
if (CE->getOpcode() == Instruction::GetElementPtr &&
CE->getOperand(0)->isNullValue()) {
- const Type *Ty =
+ Type *Ty =
cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
if (CE->getNumOperands() == 2) {
// Handle a sizeof-like expression.
@@ -623,7 +622,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
} else if (CE->getNumOperands() == 3 &&
CE->getOperand(1)->isNullValue()) {
// Handle an alignof-like expression.
- if (const StructType *STy = dyn_cast<StructType>(Ty))
+ if (StructType *STy = dyn_cast<StructType>(Ty))
if (!STy->isPacked()) {
ConstantInt *CI = cast<ConstantInt>(CE->getOperand(2));
if (CI->isOne() &&
@@ -701,7 +700,7 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond,
if (CondV->isAllOnesValue()) return V1;
- const VectorType *VTy = cast<VectorType>(V1->getType());
+ VectorType *VTy = cast<VectorType>(V1->getType());
ConstantVector *CP1 = dyn_cast<ConstantVector>(V1);
ConstantVector *CP2 = dyn_cast<ConstantVector>(V2);
@@ -709,7 +708,7 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond,
(CP2 || isa<ConstantAggregateZero>(V2))) {
// Find the element type of the returned vector
- const Type *EltTy = VTy->getElementType();
+ Type *EltTy = VTy->getElementType();
unsigned NumElem = VTy->getNumElements();
std::vector<Constant*> Res(NumElem);
@@ -762,10 +761,14 @@ Constant *llvm::ConstantFoldExtractElementInstruction(Constant *Val,
if (ConstantVector *CVal = dyn_cast<ConstantVector>(Val)) {
if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx)) {
+ uint64_t Index = CIdx->getZExtValue();
+ if (Index >= CVal->getNumOperands())
+ // ee({w,x,y,z}, wrong_value) -> undef
+ return UndefValue::get(cast<VectorType>(Val->getType())->getElementType());
return CVal->getOperand(CIdx->getZExtValue());
} else if (isa<UndefValue>(Idx)) {
- // ee({w,x,y,z}, undef) -> w (an arbitrary value).
- return CVal->getOperand(0);
+ // ee({w,x,y,z}, undef) -> undef
+ return UndefValue::get(cast<VectorType>(Val->getType())->getElementType());
}
}
return 0;
@@ -834,7 +837,7 @@ static Constant *GetVectorElement(Constant *C, unsigned EltNo) {
if (ConstantVector *CV = dyn_cast<ConstantVector>(C))
return CV->getOperand(EltNo);
- const Type *EltTy = cast<VectorType>(C->getType())->getElementType();
+ Type *EltTy = cast<VectorType>(C->getType())->getElementType();
if (isa<ConstantAggregateZero>(C))
return Constant::getNullValue(EltTy);
if (isa<UndefValue>(C))
@@ -850,7 +853,7 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1,
unsigned MaskNumElts = cast<VectorType>(Mask->getType())->getNumElements();
unsigned SrcNumElts = cast<VectorType>(V1->getType())->getNumElements();
- const Type *EltTy = cast<VectorType>(V1->getType())->getElementType();
+ Type *EltTy = cast<VectorType>(V1->getType())->getElementType();
// Loop over the shuffle mask, evaluating each element.
SmallVector<Constant*, 32> Result;
@@ -922,16 +925,16 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
// Otherwise break the aggregate undef into multiple undefs and do
// the insertion.
- const CompositeType *AggTy = cast<CompositeType>(Agg->getType());
+ CompositeType *AggTy = cast<CompositeType>(Agg->getType());
unsigned numOps;
- if (const ArrayType *AR = dyn_cast<ArrayType>(AggTy))
+ if (ArrayType *AR = dyn_cast<ArrayType>(AggTy))
numOps = AR->getNumElements();
else
numOps = cast<StructType>(AggTy)->getNumElements();
std::vector<Constant*> Ops(numOps);
for (unsigned i = 0; i < numOps; ++i) {
- const Type *MemberTy = AggTy->getTypeAtIndex(i);
+ Type *MemberTy = AggTy->getTypeAtIndex(i);
Constant *Op =
(Idxs[0] == i) ?
ConstantFoldInsertValueInstruction(UndefValue::get(MemberTy),
@@ -940,7 +943,7 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
Ops[i] = Op;
}
- if (const StructType* ST = dyn_cast<StructType>(AggTy))
+ if (StructType* ST = dyn_cast<StructType>(AggTy))
return ConstantStruct::get(ST, Ops);
return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
}
@@ -953,16 +956,16 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
// Otherwise break the aggregate zero into multiple zeros and do
// the insertion.
- const CompositeType *AggTy = cast<CompositeType>(Agg->getType());
+ CompositeType *AggTy = cast<CompositeType>(Agg->getType());
unsigned numOps;
- if (const ArrayType *AR = dyn_cast<ArrayType>(AggTy))
+ if (ArrayType *AR = dyn_cast<ArrayType>(AggTy))
numOps = AR->getNumElements();
else
numOps = cast<StructType>(AggTy)->getNumElements();
std::vector<Constant*> Ops(numOps);
for (unsigned i = 0; i < numOps; ++i) {
- const Type *MemberTy = AggTy->getTypeAtIndex(i);
+ Type *MemberTy = AggTy->getTypeAtIndex(i);
Constant *Op =
(Idxs[0] == i) ?
ConstantFoldInsertValueInstruction(Constant::getNullValue(MemberTy),
@@ -971,7 +974,7 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
Ops[i] = Op;
}
- if (const StructType *ST = dyn_cast<StructType>(AggTy))
+ if (StructType *ST = dyn_cast<StructType>(AggTy))
return ConstantStruct::get(ST, Ops);
return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
}
@@ -986,7 +989,7 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
Ops[i] = Op;
}
- if (const StructType* ST = dyn_cast<StructType>(Agg->getType()))
+ if (StructType* ST = dyn_cast<StructType>(Agg->getType()))
return ConstantStruct::get(ST, Ops);
return ConstantArray::get(cast<ArrayType>(Agg->getType()), Ops);
}
@@ -1265,13 +1268,13 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
return ConstantFP::get(C1->getContext(), C3V);
}
}
- } else if (const VectorType *VTy = dyn_cast<VectorType>(C1->getType())) {
+ } else if (VectorType *VTy = dyn_cast<VectorType>(C1->getType())) {
ConstantVector *CP1 = dyn_cast<ConstantVector>(C1);
ConstantVector *CP2 = dyn_cast<ConstantVector>(C2);
if ((CP1 != NULL || isa<ConstantAggregateZero>(C1)) &&
(CP2 != NULL || isa<ConstantAggregateZero>(C2))) {
std::vector<Constant*> Res;
- const Type* EltTy = VTy->getElementType();
+ Type* EltTy = VTy->getElementType();
Constant *C1 = 0;
Constant *C2 = 0;
switch (Opcode) {
@@ -1461,8 +1464,8 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
/// isZeroSizedType - This type is zero sized if its an array or structure of
/// zero sized types. The only leaf zero sized type is an empty structure.
-static bool isMaybeZeroSizedType(const Type *Ty) {
- if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+static bool isMaybeZeroSizedType(Type *Ty) {
+ if (StructType *STy = dyn_cast<StructType>(Ty)) {
if (STy->isOpaque()) return true; // Can't say.
// If all of elements have zero size, this does too.
@@ -1470,7 +1473,7 @@ static bool isMaybeZeroSizedType(const Type *Ty) {
if (!isMaybeZeroSizedType(STy->getElementType(i))) return false;
return true;
- } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
return isMaybeZeroSizedType(ATy->getElementType());
}
return false;
@@ -1483,7 +1486,7 @@ static bool isMaybeZeroSizedType(const Type *Ty) {
/// first is less than the second, return -1, if the second is less than the
/// first, return 1. If the constants are not integral, return -2.
///
-static int IdxCompare(Constant *C1, Constant *C2, const Type *ElTy) {
+static int IdxCompare(Constant *C1, Constant *C2, Type *ElTy) {
if (C1 == C2) return 0;
// Ok, we found a different index. If they are not ConstantInt, we can't do
@@ -1832,8 +1835,8 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2,
Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
Constant *C1, Constant *C2) {
- const Type *ResultTy;
- if (const VectorType *VT = dyn_cast<VectorType>(C1->getType()))
+ Type *ResultTy;
+ if (VectorType *VT = dyn_cast<VectorType>(C1->getType()))
ResultTy = VectorType::get(Type::getInt1Ty(C1->getContext()),
VT->getNumElements());
else
@@ -2146,9 +2149,9 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
/// isInBoundsIndices - Test whether the given sequence of *normalized* indices
/// is "inbounds".
template<typename IndexTy>
-static bool isInBoundsIndices(IndexTy const *Idxs, size_t NumIdx) {
+static bool isInBoundsIndices(ArrayRef<IndexTy> Idxs) {
// No indices means nothing that could be out of bounds.
- if (NumIdx == 0) return true;
+ if (Idxs.empty()) return true;
// If the first index is zero, it's in bounds.
if (cast<Constant>(Idxs[0])->isNullValue()) return true;
@@ -2157,7 +2160,7 @@ static bool isInBoundsIndices(IndexTy const *Idxs, size_t NumIdx) {
// by the one-past-the-end rule.
if (!cast<ConstantInt>(Idxs[0])->isOne())
return false;
- for (unsigned i = 1, e = NumIdx; i != e; ++i)
+ for (unsigned i = 1, e = Idxs.size(); i != e; ++i)
if (!cast<Constant>(Idxs[i])->isNullValue())
return false;
return true;
@@ -2166,31 +2169,29 @@ static bool isInBoundsIndices(IndexTy const *Idxs, size_t NumIdx) {
template<typename IndexTy>
static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
bool inBounds,
- IndexTy const *Idxs,
- unsigned NumIdx) {
- if (NumIdx == 0) return C;
+ ArrayRef<IndexTy> Idxs) {
+ if (Idxs.empty()) return C;
Constant *Idx0 = cast<Constant>(Idxs[0]);
- if ((NumIdx == 1 && Idx0->isNullValue()))
+ if ((Idxs.size() == 1 && Idx0->isNullValue()))
return C;
if (isa<UndefValue>(C)) {
- const PointerType *Ptr = cast<PointerType>(C->getType());
- const Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs, Idxs+NumIdx);
+ PointerType *Ptr = cast<PointerType>(C->getType());
+ Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs);
assert(Ty != 0 && "Invalid indices for GEP!");
return UndefValue::get(PointerType::get(Ty, Ptr->getAddressSpace()));
}
if (C->isNullValue()) {
bool isNull = true;
- for (unsigned i = 0, e = NumIdx; i != e; ++i)
+ for (unsigned i = 0, e = Idxs.size(); i != e; ++i)
if (!cast<Constant>(Idxs[i])->isNullValue()) {
isNull = false;
break;
}
if (isNull) {
- const PointerType *Ptr = cast<PointerType>(C->getType());
- const Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs,
- Idxs+NumIdx);
+ PointerType *Ptr = cast<PointerType>(C->getType());
+ Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs);
assert(Ty != 0 && "Invalid indices for GEP!");
return ConstantPointerNull::get(PointerType::get(Ty,
Ptr->getAddressSpace()));
@@ -2203,14 +2204,14 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
// getelementptr instructions into a single instruction.
//
if (CE->getOpcode() == Instruction::GetElementPtr) {
- const Type *LastTy = 0;
+ Type *LastTy = 0;
for (gep_type_iterator I = gep_type_begin(CE), E = gep_type_end(CE);
I != E; ++I)
LastTy = *I;
if ((LastTy && LastTy->isArrayTy()) || Idx0->isNullValue()) {
SmallVector<Value*, 16> NewIndices;
- NewIndices.reserve(NumIdx + CE->getNumOperands());
+ NewIndices.reserve(Idxs.size() + CE->getNumOperands());
for (unsigned i = 1, e = CE->getNumOperands()-1; i != e; ++i)
NewIndices.push_back(CE->getOperand(i));
@@ -2219,9 +2220,9 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
Constant *Combined = CE->getOperand(CE->getNumOperands()-1);
// Otherwise it must be an array.
if (!Idx0->isNullValue()) {
- const Type *IdxTy = Combined->getType();
+ Type *IdxTy = Combined->getType();
if (IdxTy != Idx0->getType()) {
- const Type *Int64Ty = Type::getInt64Ty(IdxTy->getContext());
+ Type *Int64Ty = Type::getInt64Ty(IdxTy->getContext());
Constant *C1 = ConstantExpr::getSExtOrBitCast(Idx0, Int64Ty);
Constant *C2 = ConstantExpr::getSExtOrBitCast(Combined, Int64Ty);
Combined = ConstantExpr::get(Instruction::Add, C1, C2);
@@ -2232,14 +2233,11 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
}
NewIndices.push_back(Combined);
- NewIndices.append(Idxs+1, Idxs+NumIdx);
- return (inBounds && cast<GEPOperator>(CE)->isInBounds()) ?
- ConstantExpr::getInBoundsGetElementPtr(CE->getOperand(0),
- &NewIndices[0],
- NewIndices.size()) :
- ConstantExpr::getGetElementPtr(CE->getOperand(0),
- &NewIndices[0],
- NewIndices.size());
+ NewIndices.append(Idxs.begin() + 1, Idxs.end());
+ return
+ ConstantExpr::getGetElementPtr(CE->getOperand(0), NewIndices,
+ inBounds &&
+ cast<GEPOperator>(CE)->isInBounds());
}
}
@@ -2248,18 +2246,16 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
// i64 0, i64 0)
// To: i32* getelementptr ([3 x i32]* %X, i64 0, i64 0)
//
- if (CE->isCast() && NumIdx > 1 && Idx0->isNullValue()) {
- if (const PointerType *SPT =
+ if (CE->isCast() && Idxs.size() > 1 && Idx0->isNullValue()) {
+ if (PointerType *SPT =
dyn_cast<PointerType>(CE->getOperand(0)->getType()))
- if (const ArrayType *SAT = dyn_cast<ArrayType>(SPT->getElementType()))
- if (const ArrayType *CAT =
+ if (ArrayType *SAT = dyn_cast<ArrayType>(SPT->getElementType()))
+ if (ArrayType *CAT =
dyn_cast<ArrayType>(cast<PointerType>(C->getType())->getElementType()))
if (CAT->getElementType() == SAT->getElementType())
- return inBounds ?
- ConstantExpr::getInBoundsGetElementPtr(
- (Constant*)CE->getOperand(0), Idxs, NumIdx) :
- ConstantExpr::getGetElementPtr(
- (Constant*)CE->getOperand(0), Idxs, NumIdx);
+ return
+ ConstantExpr::getGetElementPtr((Constant*)CE->getOperand(0),
+ Idxs, inBounds);
}
}
@@ -2268,19 +2264,19 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
// out into preceding dimensions.
bool Unknown = false;
SmallVector<Constant *, 8> NewIdxs;
- const Type *Ty = C->getType();
- const Type *Prev = 0;
- for (unsigned i = 0; i != NumIdx;
+ Type *Ty = C->getType();
+ Type *Prev = 0;
+ for (unsigned i = 0, e = Idxs.size(); i != e;
Prev = Ty, Ty = cast<CompositeType>(Ty)->getTypeAtIndex(Idxs[i]), ++i) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Idxs[i])) {
- if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty))
+ if (ArrayType *ATy = dyn_cast<ArrayType>(Ty))
if (ATy->getNumElements() <= INT64_MAX &&
ATy->getNumElements() != 0 &&
CI->getSExtValue() >= (int64_t)ATy->getNumElements()) {
if (isa<SequentialType>(Prev)) {
// It's out of range, but we can factor it into the prior
// dimension.
- NewIdxs.resize(NumIdx);
+ NewIdxs.resize(Idxs.size());
ConstantInt *Factor = ConstantInt::get(CI->getType(),
ATy->getNumElements());
NewIdxs[i] = ConstantExpr::getSRem(CI, Factor);
@@ -2312,33 +2308,28 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
// If we did any factoring, start over with the adjusted indices.
if (!NewIdxs.empty()) {
- for (unsigned i = 0; i != NumIdx; ++i)
+ for (unsigned i = 0, e = Idxs.size(); i != e; ++i)
if (!NewIdxs[i]) NewIdxs[i] = cast<Constant>(Idxs[i]);
- return inBounds ?
- ConstantExpr::getInBoundsGetElementPtr(C, NewIdxs.data(),
- NewIdxs.size()) :
- ConstantExpr::getGetElementPtr(C, NewIdxs.data(), NewIdxs.size());
+ return ConstantExpr::getGetElementPtr(C, NewIdxs, inBounds);
}
// If all indices are known integers and normalized, we can do a simple
// check for the "inbounds" property.
if (!Unknown && !inBounds &&
- isa<GlobalVariable>(C) && isInBoundsIndices(Idxs, NumIdx))
- return ConstantExpr::getInBoundsGetElementPtr(C, Idxs, NumIdx);
+ isa<GlobalVariable>(C) && isInBoundsIndices(Idxs))
+ return ConstantExpr::getInBoundsGetElementPtr(C, Idxs);
return 0;
}
Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
bool inBounds,
- Constant* const *Idxs,
- unsigned NumIdx) {
- return ConstantFoldGetElementPtrImpl(C, inBounds, Idxs, NumIdx);
+ ArrayRef<Constant *> Idxs) {
+ return ConstantFoldGetElementPtrImpl(C, inBounds, Idxs);
}
Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
bool inBounds,
- Value* const *Idxs,
- unsigned NumIdx) {
- return ConstantFoldGetElementPtrImpl(C, inBounds, Idxs, NumIdx);
+ ArrayRef<Value *> Idxs) {
+ return ConstantFoldGetElementPtrImpl(C, inBounds, Idxs);
}
diff --git a/lib/VMCore/ConstantFold.h b/lib/VMCore/ConstantFold.h
index 653a1c3..e12f27a 100644
--- a/lib/VMCore/ConstantFold.h
+++ b/lib/VMCore/ConstantFold.h
@@ -30,7 +30,7 @@ namespace llvm {
Constant *ConstantFoldCastInstruction(
unsigned opcode, ///< The opcode of the cast
Constant *V, ///< The source constant
- const Type *DestTy ///< The destination type
+ Type *DestTy ///< The destination type
);
Constant *ConstantFoldSelectInstruction(Constant *Cond,
Constant *V1, Constant *V2);
@@ -48,9 +48,9 @@ namespace llvm {
Constant *ConstantFoldCompareInstruction(unsigned short predicate,
Constant *C1, Constant *C2);
Constant *ConstantFoldGetElementPtr(Constant *C, bool inBounds,
- Constant* const *Idxs, unsigned NumIdx);
+ ArrayRef<Constant *> Idxs);
Constant *ConstantFoldGetElementPtr(Constant *C, bool inBounds,
- Value* const *Idxs, unsigned NumIdx);
+ ArrayRef<Value *> Idxs);
} // End llvm namespace
#endif
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index 316c884..a84a046 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -62,8 +62,23 @@ bool Constant::isNullValue() const {
return isa<ConstantAggregateZero>(this) || isa<ConstantPointerNull>(this);
}
+bool Constant::isAllOnesValue() const {
+ // Check for -1 integers
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(this))
+ return CI->isMinusOne();
+
+ // Check for FP which are bitcasted from -1 integers
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
+ return CFP->getValueAPF().bitcastToAPInt().isAllOnesValue();
+
+ // Check for constant vectors
+ if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
+ return CV->isAllOnesValue();
+
+ return false;
+}
// Constructor to create a '0' constant of arbitrary type...
-Constant *Constant::getNullValue(const Type *Ty) {
+Constant *Constant::getNullValue(Type *Ty) {
switch (Ty->getTypeID()) {
case Type::IntegerTyID:
return ConstantInt::get(Ty, 0);
@@ -90,30 +105,30 @@ Constant *Constant::getNullValue(const Type *Ty) {
return ConstantAggregateZero::get(Ty);
default:
// Function, Label, or Opaque type?
- assert(!"Cannot create a null constant of that type!");
+ assert(0 && "Cannot create a null constant of that type!");
return 0;
}
}
-Constant *Constant::getIntegerValue(const Type *Ty, const APInt &V) {
- const Type *ScalarTy = Ty->getScalarType();
+Constant *Constant::getIntegerValue(Type *Ty, const APInt &V) {
+ Type *ScalarTy = Ty->getScalarType();
// Create the base integer constant.
Constant *C = ConstantInt::get(Ty->getContext(), V);
// Convert an integer to a pointer, if necessary.
- if (const PointerType *PTy = dyn_cast<PointerType>(ScalarTy))
+ if (PointerType *PTy = dyn_cast<PointerType>(ScalarTy))
C = ConstantExpr::getIntToPtr(C, PTy);
// Broadcast a scalar to a vector, if necessary.
- if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty))
C = ConstantVector::get(std::vector<Constant *>(VTy->getNumElements(), C));
return C;
}
-Constant *Constant::getAllOnesValue(const Type *Ty) {
- if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty))
+Constant *Constant::getAllOnesValue(Type *Ty) {
+ if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
return ConstantInt::get(Ty->getContext(),
APInt::getAllOnesValue(ITy->getBitWidth()));
@@ -124,9 +139,9 @@ Constant *Constant::getAllOnesValue(const Type *Ty) {
}
SmallVector<Constant*, 16> Elts;
- const VectorType *VTy = cast<VectorType>(Ty);
+ VectorType *VTy = cast<VectorType>(Ty);
Elts.resize(VTy->getNumElements(), getAllOnesValue(VTy->getElementType()));
- assert(Elts[0] && "Not a vector integer type!");
+ assert(Elts[0] && "Invalid AllOnes value!");
return cast<ConstantVector>(ConstantVector::get(Elts));
}
@@ -269,7 +284,7 @@ void Constant::getVectorElements(SmallVectorImpl<Constant*> &Elts) const {
return;
}
- const VectorType *VT = cast<VectorType>(getType());
+ VectorType *VT = cast<VectorType>(getType());
if (isa<ConstantAggregateZero>(this)) {
Elts.assign(VT->getNumElements(),
Constant::getNullValue(VT->getElementType()));
@@ -343,7 +358,7 @@ void Constant::removeDeadConstantUsers() const {
// ConstantInt
//===----------------------------------------------------------------------===//
-ConstantInt::ConstantInt(const IntegerType *Ty, const APInt& V)
+ConstantInt::ConstantInt(IntegerType *Ty, const APInt& V)
: Constant(Ty, ConstantIntVal, 0, 0), Val(V) {
assert(V.getBitWidth() == Ty->getBitWidth() && "Invalid constant for type");
}
@@ -362,8 +377,8 @@ ConstantInt *ConstantInt::getFalse(LLVMContext &Context) {
return pImpl->TheFalseVal;
}
-Constant *ConstantInt::getTrue(const Type *Ty) {
- const VectorType *VTy = dyn_cast<VectorType>(Ty);
+Constant *ConstantInt::getTrue(Type *Ty) {
+ VectorType *VTy = dyn_cast<VectorType>(Ty);
if (!VTy) {
assert(Ty->isIntegerTy(1) && "True must be i1 or vector of i1.");
return ConstantInt::getTrue(Ty->getContext());
@@ -375,8 +390,8 @@ Constant *ConstantInt::getTrue(const Type *Ty) {
return ConstantVector::get(Splat);
}
-Constant *ConstantInt::getFalse(const Type *Ty) {
- const VectorType *VTy = dyn_cast<VectorType>(Ty);
+Constant *ConstantInt::getFalse(Type *Ty) {
+ VectorType *VTy = dyn_cast<VectorType>(Ty);
if (!VTy) {
assert(Ty->isIntegerTy(1) && "False must be i1 or vector of i1.");
return ConstantInt::getFalse(Ty->getContext());
@@ -396,7 +411,7 @@ Constant *ConstantInt::getFalse(const Type *Ty) {
// invariant which generates an assertion.
ConstantInt *ConstantInt::get(LLVMContext &Context, const APInt &V) {
// Get the corresponding integer type for the bit width of the value.
- const IntegerType *ITy = IntegerType::get(Context, V.getBitWidth());
+ IntegerType *ITy = IntegerType::get(Context, V.getBitWidth());
// get an existing value or the insertion position
DenseMapAPIntKeyInfo::KeyTy Key(V, ITy);
ConstantInt *&Slot = Context.pImpl->IntConstants[Key];
@@ -404,44 +419,44 @@ ConstantInt *ConstantInt::get(LLVMContext &Context, const APInt &V) {
return Slot;
}
-Constant *ConstantInt::get(const Type *Ty, uint64_t V, bool isSigned) {
+Constant *ConstantInt::get(Type *Ty, uint64_t V, bool isSigned) {
Constant *C = get(cast<IntegerType>(Ty->getScalarType()), V, isSigned);
// For vectors, broadcast the value.
- if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty))
return ConstantVector::get(SmallVector<Constant*,
16>(VTy->getNumElements(), C));
return C;
}
-ConstantInt* ConstantInt::get(const IntegerType* Ty, uint64_t V,
+ConstantInt* ConstantInt::get(IntegerType* Ty, uint64_t V,
bool isSigned) {
return get(Ty->getContext(), APInt(Ty->getBitWidth(), V, isSigned));
}
-ConstantInt* ConstantInt::getSigned(const IntegerType* Ty, int64_t V) {
+ConstantInt* ConstantInt::getSigned(IntegerType* Ty, int64_t V) {
return get(Ty, V, true);
}
-Constant *ConstantInt::getSigned(const Type *Ty, int64_t V) {
+Constant *ConstantInt::getSigned(Type *Ty, int64_t V) {
return get(Ty, V, true);
}
-Constant *ConstantInt::get(const Type* Ty, const APInt& V) {
+Constant *ConstantInt::get(Type* Ty, const APInt& V) {
ConstantInt *C = get(Ty->getContext(), V);
assert(C->getType() == Ty->getScalarType() &&
"ConstantInt type doesn't match the type implied by its value!");
// For vectors, broadcast the value.
- if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty))
return ConstantVector::get(
SmallVector<Constant *, 16>(VTy->getNumElements(), C));
return C;
}
-ConstantInt* ConstantInt::get(const IntegerType* Ty, StringRef Str,
+ConstantInt* ConstantInt::get(IntegerType* Ty, StringRef Str,
uint8_t radix) {
return get(Ty->getContext(), APInt(Ty->getBitWidth(), Str, radix));
}
@@ -450,7 +465,7 @@ ConstantInt* ConstantInt::get(const IntegerType* Ty, StringRef Str,
// ConstantFP
//===----------------------------------------------------------------------===//
-static const fltSemantics *TypeToFloatSemantics(const Type *Ty) {
+static const fltSemantics *TypeToFloatSemantics(Type *Ty) {
if (Ty->isFloatTy())
return &APFloat::IEEEsingle;
if (Ty->isDoubleTy())
@@ -467,7 +482,7 @@ static const fltSemantics *TypeToFloatSemantics(const Type *Ty) {
/// get() - This returns a constant fp for the specified value in the
/// specified type. This should only be used for simple constant values like
/// 2.0/1.0 etc, that are known-valid both as double and as the target format.
-Constant *ConstantFP::get(const Type* Ty, double V) {
+Constant *ConstantFP::get(Type* Ty, double V) {
LLVMContext &Context = Ty->getContext();
APFloat FV(V);
@@ -477,7 +492,7 @@ Constant *ConstantFP::get(const Type* Ty, double V) {
Constant *C = get(Context, FV);
// For vectors, broadcast the value.
- if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty))
return ConstantVector::get(
SmallVector<Constant *, 16>(VTy->getNumElements(), C));
@@ -485,14 +500,14 @@ Constant *ConstantFP::get(const Type* Ty, double V) {
}
-Constant *ConstantFP::get(const Type* Ty, StringRef Str) {
+Constant *ConstantFP::get(Type* Ty, StringRef Str) {
LLVMContext &Context = Ty->getContext();
APFloat FV(*TypeToFloatSemantics(Ty->getScalarType()), Str);
Constant *C = get(Context, FV);
// For vectors, broadcast the value.
- if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty))
return ConstantVector::get(
SmallVector<Constant *, 16>(VTy->getNumElements(), C));
@@ -500,7 +515,7 @@ Constant *ConstantFP::get(const Type* Ty, StringRef Str) {
}
-ConstantFP* ConstantFP::getNegativeZero(const Type* Ty) {
+ConstantFP* ConstantFP::getNegativeZero(Type* Ty) {
LLVMContext &Context = Ty->getContext();
APFloat apf = cast <ConstantFP>(Constant::getNullValue(Ty))->getValueAPF();
apf.changeSign();
@@ -508,8 +523,8 @@ ConstantFP* ConstantFP::getNegativeZero(const Type* Ty) {
}
-Constant *ConstantFP::getZeroValueForNegation(const Type* Ty) {
- if (const VectorType *PTy = dyn_cast<VectorType>(Ty))
+Constant *ConstantFP::getZeroValueForNegation(Type* Ty) {
+ if (VectorType *PTy = dyn_cast<VectorType>(Ty))
if (PTy->getElementType()->isFloatingPointTy()) {
SmallVector<Constant*, 16> zeros(PTy->getNumElements(),
getNegativeZero(PTy->getElementType()));
@@ -532,7 +547,7 @@ ConstantFP* ConstantFP::get(LLVMContext &Context, const APFloat& V) {
ConstantFP *&Slot = pImpl->FPConstants[Key];
if (!Slot) {
- const Type *Ty;
+ Type *Ty;
if (&V.getSemantics() == &APFloat::IEEEsingle)
Ty = Type::getFloatTy(Context);
else if (&V.getSemantics() == &APFloat::IEEEdouble)
@@ -552,13 +567,13 @@ ConstantFP* ConstantFP::get(LLVMContext &Context, const APFloat& V) {
return Slot;
}
-ConstantFP *ConstantFP::getInfinity(const Type *Ty, bool Negative) {
+ConstantFP *ConstantFP::getInfinity(Type *Ty, bool Negative) {
const fltSemantics &Semantics = *TypeToFloatSemantics(Ty);
return ConstantFP::get(Ty->getContext(),
APFloat::getInf(Semantics, Negative));
}
-ConstantFP::ConstantFP(const Type *Ty, const APFloat& V)
+ConstantFP::ConstantFP(Type *Ty, const APFloat& V)
: Constant(Ty, ConstantFPVal, 0, 0), Val(V) {
assert(&V.getSemantics() == TypeToFloatSemantics(Ty) &&
"FP type Mismatch");
@@ -573,24 +588,19 @@ bool ConstantFP::isExactlyValue(const APFloat &V) const {
//===----------------------------------------------------------------------===//
-ConstantArray::ConstantArray(const ArrayType *T,
- const std::vector<Constant*> &V)
+ConstantArray::ConstantArray(ArrayType *T, ArrayRef<Constant *> V)
: Constant(T, ConstantArrayVal,
OperandTraits<ConstantArray>::op_end(this) - V.size(),
V.size()) {
assert(V.size() == T->getNumElements() &&
"Invalid initializer vector for constant array");
- Use *OL = OperandList;
- for (std::vector<Constant*>::const_iterator I = V.begin(), E = V.end();
- I != E; ++I, ++OL) {
- Constant *C = *I;
- assert(C->getType() == T->getElementType() &&
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ assert(V[i]->getType() == T->getElementType() &&
"Initializer for array element doesn't match array element type!");
- *OL = C;
- }
+ std::copy(V.begin(), V.end(), op_begin());
}
-Constant *ConstantArray::get(const ArrayType *Ty, ArrayRef<Constant*> V) {
+Constant *ConstantArray::get(ArrayType *Ty, ArrayRef<Constant*> V) {
for (unsigned i = 0, e = V.size(); i != e; ++i) {
assert(V[i]->getType() == Ty->getElementType() &&
"Wrong type in array element initializer");
@@ -653,25 +663,20 @@ StructType *ConstantStruct::getTypeForElements(ArrayRef<Constant*> V,
}
-ConstantStruct::ConstantStruct(const StructType *T,
- const std::vector<Constant*> &V)
+ConstantStruct::ConstantStruct(StructType *T, ArrayRef<Constant *> V)
: Constant(T, ConstantStructVal,
OperandTraits<ConstantStruct>::op_end(this) - V.size(),
V.size()) {
- assert((T->isOpaque() || V.size() == T->getNumElements()) &&
+ assert(V.size() == T->getNumElements() &&
"Invalid initializer vector for constant structure");
- Use *OL = OperandList;
- for (std::vector<Constant*>::const_iterator I = V.begin(), E = V.end();
- I != E; ++I, ++OL) {
- Constant *C = *I;
- assert((T->isOpaque() || C->getType() == T->getElementType(I-V.begin())) &&
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ assert((T->isOpaque() || V[i]->getType() == T->getElementType(i)) &&
"Initializer for struct element doesn't match struct element type!");
- *OL = C;
- }
+ std::copy(V.begin(), V.end(), op_begin());
}
// ConstantStruct accessors.
-Constant *ConstantStruct::get(const StructType *ST, ArrayRef<Constant*> V) {
+Constant *ConstantStruct::get(StructType *ST, ArrayRef<Constant*> V) {
// Create a ConstantAggregateZero value if all elements are zeros.
for (unsigned i = 0, e = V.size(); i != e; ++i)
if (!V[i]->isNullValue())
@@ -682,7 +687,7 @@ Constant *ConstantStruct::get(const StructType *ST, ArrayRef<Constant*> V) {
return ConstantAggregateZero::get(ST);
}
-Constant* ConstantStruct::get(const StructType *T, ...) {
+Constant *ConstantStruct::get(StructType *T, ...) {
va_list ap;
SmallVector<Constant*, 8> Values;
va_start(ap, T);
@@ -692,25 +697,20 @@ Constant* ConstantStruct::get(const StructType *T, ...) {
return get(T, Values);
}
-ConstantVector::ConstantVector(const VectorType *T,
- const std::vector<Constant*> &V)
+ConstantVector::ConstantVector(VectorType *T, ArrayRef<Constant *> V)
: Constant(T, ConstantVectorVal,
OperandTraits<ConstantVector>::op_end(this) - V.size(),
V.size()) {
- Use *OL = OperandList;
- for (std::vector<Constant*>::const_iterator I = V.begin(), E = V.end();
- I != E; ++I, ++OL) {
- Constant *C = *I;
- assert(C->getType() == T->getElementType() &&
+ for (size_t i = 0, e = V.size(); i != e; i++)
+ assert(V[i]->getType() == T->getElementType() &&
"Initializer for vector element doesn't match vector element type!");
- *OL = C;
- }
+ std::copy(V.begin(), V.end(), op_begin());
}
// ConstantVector accessors.
Constant *ConstantVector::get(ArrayRef<Constant*> V) {
assert(!V.empty() && "Vectors can't be empty");
- const VectorType *T = VectorType::get(V.front()->getType(), V.size());
+ VectorType *T = VectorType::get(V.front()->getType(), V.size());
LLVMContextImpl *pImpl = T->getContext().pImpl;
// If this is an all-undef or all-zero vector, return a
@@ -761,7 +761,7 @@ bool ConstantExpr::isGEPWithNoNotionalOverIndexing() const {
for (; GEPI != E; ++GEPI, ++OI) {
ConstantInt *CI = dyn_cast<ConstantInt>(*OI);
if (!CI) return false;
- if (const ArrayType *ATy = dyn_cast<ArrayType>(*GEPI))
+ if (ArrayType *ATy = dyn_cast<ArrayType>(*GEPI))
if (CI->getValue().getActiveBits() > 64 ||
CI->getZExtValue() >= ATy->getNumElements())
return false;
@@ -839,13 +839,13 @@ ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const {
for (unsigned i = 1, e = getNumOperands(); i != e; ++i)
Ops[i-1] = getOperand(i);
if (OpNo == 0)
- return cast<GEPOperator>(this)->isInBounds() ?
- ConstantExpr::getInBoundsGetElementPtr(Op, &Ops[0], Ops.size()) :
- ConstantExpr::getGetElementPtr(Op, &Ops[0], Ops.size());
+ return
+ ConstantExpr::getGetElementPtr(Op, Ops,
+ cast<GEPOperator>(this)->isInBounds());
Ops[OpNo-1] = Op;
- return cast<GEPOperator>(this)->isInBounds() ?
- ConstantExpr::getInBoundsGetElementPtr(getOperand(0), &Ops[0],Ops.size()):
- ConstantExpr::getGetElementPtr(getOperand(0), &Ops[0], Ops.size());
+ return
+ ConstantExpr::getGetElementPtr(getOperand(0), Ops,
+ cast<GEPOperator>(this)->isInBounds());
}
default:
assert(getNumOperands() == 2 && "Must be binary operator?");
@@ -859,7 +859,7 @@ ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const {
/// operands replaced with the specified values. The specified array must
/// have the same number of operands as our current one.
Constant *ConstantExpr::
-getWithOperands(ArrayRef<Constant*> Ops, const Type *Ty) const {
+getWithOperands(ArrayRef<Constant*> Ops, Type *Ty) const {
assert(Ops.size() == getNumOperands() && "Operand count mismatch!");
bool AnyChange = Ty != getType();
for (unsigned i = 0; i != Ops.size(); ++i)
@@ -891,9 +891,9 @@ getWithOperands(ArrayRef<Constant*> Ops, const Type *Ty) const {
case Instruction::ShuffleVector:
return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
case Instruction::GetElementPtr:
- return cast<GEPOperator>(this)->isInBounds() ?
- ConstantExpr::getInBoundsGetElementPtr(Ops[0], &Ops[1], Ops.size()-1) :
- ConstantExpr::getGetElementPtr(Ops[0], &Ops[1], Ops.size()-1);
+ return
+ ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1),
+ cast<GEPOperator>(this)->isInBounds());
case Instruction::ICmp:
case Instruction::FCmp:
return ConstantExpr::getCompare(getPredicate(), Ops[0], Ops[1]);
@@ -907,7 +907,7 @@ getWithOperands(ArrayRef<Constant*> Ops, const Type *Ty) const {
//===----------------------------------------------------------------------===//
// isValueValidForType implementations
-bool ConstantInt::isValueValidForType(const Type *Ty, uint64_t Val) {
+bool ConstantInt::isValueValidForType(Type *Ty, uint64_t Val) {
unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); // assert okay
if (Ty == Type::getInt1Ty(Ty->getContext()))
return Val == 0 || Val == 1;
@@ -917,7 +917,7 @@ bool ConstantInt::isValueValidForType(const Type *Ty, uint64_t Val) {
return Val <= Max;
}
-bool ConstantInt::isValueValidForType(const Type *Ty, int64_t Val) {
+bool ConstantInt::isValueValidForType(Type *Ty, int64_t Val) {
unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); // assert okay
if (Ty == Type::getInt1Ty(Ty->getContext()))
return Val == 0 || Val == 1 || Val == -1;
@@ -928,7 +928,7 @@ bool ConstantInt::isValueValidForType(const Type *Ty, int64_t Val) {
return (Val >= Min && Val <= Max);
}
-bool ConstantFP::isValueValidForType(const Type *Ty, const APFloat& Val) {
+bool ConstantFP::isValueValidForType(Type *Ty, const APFloat& Val) {
// convert modifies in place, so make a copy.
APFloat Val2 = APFloat(Val);
bool losesInfo;
@@ -968,7 +968,7 @@ bool ConstantFP::isValueValidForType(const Type *Ty, const APFloat& Val) {
//===----------------------------------------------------------------------===//
// Factory Function Implementation
-ConstantAggregateZero* ConstantAggregateZero::get(const Type* Ty) {
+ConstantAggregateZero* ConstantAggregateZero::get(Type* Ty) {
assert((Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()) &&
"Cannot create an aggregate zero of non-aggregate type!");
@@ -1079,13 +1079,16 @@ bool ConstantVector::isAllOnesValue() const {
// Check out first element.
const Constant *Elt = getOperand(0);
const ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
- if (!CI || !CI->isAllOnesValue()) return false;
+ const ConstantFP *CF = dyn_cast<ConstantFP>(Elt);
+
// Then make sure all remaining elements point to the same value.
for (unsigned I = 1, E = getNumOperands(); I < E; ++I)
if (getOperand(I) != Elt)
return false;
- return true;
+ // First value is all-ones.
+ return (CI && CI->isAllOnesValue()) ||
+ (CF && CF->isAllOnesValue());
}
/// getSplatValue - If this is a splat constant, where all of the
@@ -1103,7 +1106,7 @@ Constant *ConstantVector::getSplatValue() const {
//---- ConstantPointerNull::get() implementation.
//
-ConstantPointerNull *ConstantPointerNull::get(const PointerType *Ty) {
+ConstantPointerNull *ConstantPointerNull::get(PointerType *Ty) {
return Ty->getContext().pImpl->NullPtrConstants.getOrCreate(Ty, 0);
}
@@ -1118,7 +1121,7 @@ void ConstantPointerNull::destroyConstant() {
//---- UndefValue::get() implementation.
//
-UndefValue *UndefValue::get(const Type *Ty) {
+UndefValue *UndefValue::get(Type *Ty) {
return Ty->getContext().pImpl->UndefValueConstants.getOrCreate(Ty, 0);
}
@@ -1209,7 +1212,7 @@ void BlockAddress::replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) {
/// This is a utility function to handle folding of casts and lookup of the
/// cast in the ExprConstants map. It is used by the various get* methods below.
static inline Constant *getFoldedCast(
- Instruction::CastOps opc, Constant *C, const Type *Ty) {
+ Instruction::CastOps opc, Constant *C, Type *Ty) {
assert(Ty->isFirstClassType() && "Cannot cast to an aggregate type!");
// Fold a few common cases
if (Constant *FC = ConstantFoldCastInstruction(opc, C, Ty))
@@ -1224,7 +1227,7 @@ static inline Constant *getFoldedCast(
return pImpl->ExprConstants.getOrCreate(Ty, Key);
}
-Constant *ConstantExpr::getCast(unsigned oc, Constant *C, const Type *Ty) {
+Constant *ConstantExpr::getCast(unsigned oc, Constant *C, Type *Ty) {
Instruction::CastOps opc = Instruction::CastOps(oc);
assert(Instruction::isCast(opc) && "opcode out of range");
assert(C && Ty && "Null arguments to getCast");
@@ -1250,25 +1253,25 @@ Constant *ConstantExpr::getCast(unsigned oc, Constant *C, const Type *Ty) {
return 0;
}
-Constant *ConstantExpr::getZExtOrBitCast(Constant *C, const Type *Ty) {
+Constant *ConstantExpr::getZExtOrBitCast(Constant *C, Type *Ty) {
if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
return getBitCast(C, Ty);
return getZExt(C, Ty);
}
-Constant *ConstantExpr::getSExtOrBitCast(Constant *C, const Type *Ty) {
+Constant *ConstantExpr::getSExtOrBitCast(Constant *C, Type *Ty) {
if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
return getBitCast(C, Ty);
return getSExt(C, Ty);
}
-Constant *ConstantExpr::getTruncOrBitCast(Constant *C, const Type *Ty) {
+Constant *ConstantExpr::getTruncOrBitCast(Constant *C, Type *Ty) {
if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
return getBitCast(C, Ty);
return getTrunc(C, Ty);
}
-Constant *ConstantExpr::getPointerCast(Constant *S, const Type *Ty) {
+Constant *ConstantExpr::getPointerCast(Constant *S, Type *Ty) {
assert(S->getType()->isPointerTy() && "Invalid cast");
assert((Ty->isIntegerTy() || Ty->isPointerTy()) && "Invalid cast");
@@ -1277,7 +1280,7 @@ Constant *ConstantExpr::getPointerCast(Constant *S, const Type *Ty) {
return getBitCast(S, Ty);
}
-Constant *ConstantExpr::getIntegerCast(Constant *C, const Type *Ty,
+Constant *ConstantExpr::getIntegerCast(Constant *C, Type *Ty,
bool isSigned) {
assert(C->getType()->isIntOrIntVectorTy() &&
Ty->isIntOrIntVectorTy() && "Invalid cast");
@@ -1290,7 +1293,7 @@ Constant *ConstantExpr::getIntegerCast(Constant *C, const Type *Ty,
return getCast(opcode, C, Ty);
}
-Constant *ConstantExpr::getFPCast(Constant *C, const Type *Ty) {
+Constant *ConstantExpr::getFPCast(Constant *C, Type *Ty) {
assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
"Invalid cast");
unsigned SrcBits = C->getType()->getScalarSizeInBits();
@@ -1302,7 +1305,7 @@ Constant *ConstantExpr::getFPCast(Constant *C, const Type *Ty) {
return getCast(opcode, C, Ty);
}
-Constant *ConstantExpr::getTrunc(Constant *C, const Type *Ty) {
+Constant *ConstantExpr::getTrunc(Constant *C, Type *Ty) {
#ifndef NDEBUG
bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
bool toVec = Ty->getTypeID() == Type::VectorTyID;
@@ -1316,7 +1319,7 @@ Constant *ConstantExpr::getTrunc(Constant *C, const Type *Ty) {
return getFoldedCast(Instruction::Trunc, C, Ty);
}
-Constant *ConstantExpr::getSExt(Constant *C, const Type *Ty) {
+Constant *ConstantExpr::getSExt(Constant *C, Type *Ty) {
#ifndef NDEBUG
bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
bool toVec = Ty->getTypeID() == Type::VectorTyID;
@@ -1330,7 +1333,7 @@ Constant *ConstantExpr::getSExt(Constant *C, const Type *Ty) {
return getFoldedCast(Instruction::SExt, C, Ty);
}
-Constant *ConstantExpr::getZExt(Constant *C, const Type *Ty) {
+Constant *ConstantExpr::getZExt(Constant *C, Type *Ty) {
#ifndef NDEBUG
bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
bool toVec = Ty->getTypeID() == Type::VectorTyID;
@@ -1344,7 +1347,7 @@ Constant *ConstantExpr::getZExt(Constant *C, const Type *Ty) {
return getFoldedCast(Instruction::ZExt, C, Ty);
}
-Constant *ConstantExpr::getFPTrunc(Constant *C, const Type *Ty) {
+Constant *ConstantExpr::getFPTrunc(Constant *C, Type *Ty) {
#ifndef NDEBUG
bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
bool toVec = Ty->getTypeID() == Type::VectorTyID;
@@ -1356,7 +1359,7 @@ Constant *ConstantExpr::getFPTrunc(Constant *C, const Type *Ty) {
return getFoldedCast(Instruction::FPTrunc, C, Ty);
}
-Constant *ConstantExpr::getFPExtend(Constant *C, const Type *Ty) {
+Constant *ConstantExpr::getFPExtend(Constant *C, Type *Ty) {
#ifndef NDEBUG
bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
bool toVec = Ty->getTypeID() == Type::VectorTyID;
@@ -1368,7 +1371,7 @@ Constant *ConstantExpr::getFPExtend(Constant *C, const Type *Ty) {
return getFoldedCast(Instruction::FPExt, C, Ty);
}
-Constant *ConstantExpr::getUIToFP(Constant *C, const Type *Ty) {
+Constant *ConstantExpr::getUIToFP(Constant *C, Type *Ty) {
#ifndef NDEBUG
bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
bool toVec = Ty->getTypeID() == Type::VectorTyID;
@@ -1379,7 +1382,7 @@ Constant *ConstantExpr::getUIToFP(Constant *C, const Type *Ty) {
return getFoldedCast(Instruction::UIToFP, C, Ty);
}
-Constant *ConstantExpr::getSIToFP(Constant *C, const Type *Ty) {
+Constant *ConstantExpr::getSIToFP(Constant *C, Type *Ty) {
#ifndef NDEBUG
bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
bool toVec = Ty->getTypeID() == Type::VectorTyID;
@@ -1390,7 +1393,7 @@ Constant *ConstantExpr::getSIToFP(Constant *C, const Type *Ty) {
return getFoldedCast(Instruction::SIToFP, C, Ty);
}
-Constant *ConstantExpr::getFPToUI(Constant *C, const Type *Ty) {
+Constant *ConstantExpr::getFPToUI(Constant *C, Type *Ty) {
#ifndef NDEBUG
bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
bool toVec = Ty->getTypeID() == Type::VectorTyID;
@@ -1401,7 +1404,7 @@ Constant *ConstantExpr::getFPToUI(Constant *C, const Type *Ty) {
return getFoldedCast(Instruction::FPToUI, C, Ty);
}
-Constant *ConstantExpr::getFPToSI(Constant *C, const Type *Ty) {
+Constant *ConstantExpr::getFPToSI(Constant *C, Type *Ty) {
#ifndef NDEBUG
bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
bool toVec = Ty->getTypeID() == Type::VectorTyID;
@@ -1412,19 +1415,19 @@ Constant *ConstantExpr::getFPToSI(Constant *C, const Type *Ty) {
return getFoldedCast(Instruction::FPToSI, C, Ty);
}
-Constant *ConstantExpr::getPtrToInt(Constant *C, const Type *DstTy) {
+Constant *ConstantExpr::getPtrToInt(Constant *C, Type *DstTy) {
assert(C->getType()->isPointerTy() && "PtrToInt source must be pointer");
assert(DstTy->isIntegerTy() && "PtrToInt destination must be integral");
return getFoldedCast(Instruction::PtrToInt, C, DstTy);
}
-Constant *ConstantExpr::getIntToPtr(Constant *C, const Type *DstTy) {
+Constant *ConstantExpr::getIntToPtr(Constant *C, Type *DstTy) {
assert(C->getType()->isIntegerTy() && "IntToPtr source must be integral");
assert(DstTy->isPointerTy() && "IntToPtr destination must be a pointer");
return getFoldedCast(Instruction::IntToPtr, C, DstTy);
}
-Constant *ConstantExpr::getBitCast(Constant *C, const Type *DstTy) {
+Constant *ConstantExpr::getBitCast(Constant *C, Type *DstTy) {
assert(CastInst::castIsValid(Instruction::BitCast, C, DstTy) &&
"Invalid constantexpr bitcast!");
@@ -1513,36 +1516,36 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
return pImpl->ExprConstants.getOrCreate(C1->getType(), Key);
}
-Constant *ConstantExpr::getSizeOf(const Type* Ty) {
+Constant *ConstantExpr::getSizeOf(Type* Ty) {
// sizeof is implemented as: (i64) gep (Ty*)null, 1
// Note that a non-inbounds gep is used, as null isn't within any object.
Constant *GEPIdx = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
Constant *GEP = getGetElementPtr(
- Constant::getNullValue(PointerType::getUnqual(Ty)), &GEPIdx, 1);
+ Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx);
return getPtrToInt(GEP,
Type::getInt64Ty(Ty->getContext()));
}
-Constant *ConstantExpr::getAlignOf(const Type* Ty) {
+Constant *ConstantExpr::getAlignOf(Type* Ty) {
// alignof is implemented as: (i64) gep ({i1,Ty}*)null, 0, 1
// Note that a non-inbounds gep is used, as null isn't within any object.
- const Type *AligningTy =
+ Type *AligningTy =
StructType::get(Type::getInt1Ty(Ty->getContext()), Ty, NULL);
Constant *NullPtr = Constant::getNullValue(AligningTy->getPointerTo());
Constant *Zero = ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0);
Constant *One = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
Constant *Indices[2] = { Zero, One };
- Constant *GEP = getGetElementPtr(NullPtr, Indices, 2);
+ Constant *GEP = getGetElementPtr(NullPtr, Indices);
return getPtrToInt(GEP,
Type::getInt64Ty(Ty->getContext()));
}
-Constant *ConstantExpr::getOffsetOf(const StructType* STy, unsigned FieldNo) {
+Constant *ConstantExpr::getOffsetOf(StructType* STy, unsigned FieldNo) {
return getOffsetOf(STy, ConstantInt::get(Type::getInt32Ty(STy->getContext()),
FieldNo));
}
-Constant *ConstantExpr::getOffsetOf(const Type* Ty, Constant *FieldNo) {
+Constant *ConstantExpr::getOffsetOf(Type* Ty, Constant *FieldNo) {
// offsetof is implemented as: (i64) gep (Ty*)null, 0, FieldNo
// Note that a non-inbounds gep is used, as null isn't within any object.
Constant *GEPIdx[] = {
@@ -1550,7 +1553,7 @@ Constant *ConstantExpr::getOffsetOf(const Type* Ty, Constant *FieldNo) {
FieldNo
};
Constant *GEP = getGetElementPtr(
- Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx, 2);
+ Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx);
return getPtrToInt(GEP,
Type::getInt64Ty(Ty->getContext()));
}
@@ -1592,14 +1595,13 @@ Constant *ConstantExpr::getSelect(Constant *C, Constant *V1, Constant *V2) {
return pImpl->ExprConstants.getOrCreate(V1->getType(), Key);
}
-Constant *ConstantExpr::getGetElementPtr(Constant *C, Value* const *Idxs,
- unsigned NumIdx, bool InBounds) {
- if (Constant *FC = ConstantFoldGetElementPtr(C, InBounds, Idxs, NumIdx))
+Constant *ConstantExpr::getGetElementPtr(Constant *C, ArrayRef<Value *> Idxs,
+ bool InBounds) {
+ if (Constant *FC = ConstantFoldGetElementPtr(C, InBounds, Idxs))
return FC; // Fold a few common cases.
// Get the result type of the getelementptr!
- const Type *Ty =
- GetElementPtrInst::getIndexedType(C->getType(), Idxs, Idxs+NumIdx);
+ Type *Ty = GetElementPtrInst::getIndexedType(C->getType(), Idxs);
assert(Ty && "GEP indices invalid!");
unsigned AS = cast<PointerType>(C->getType())->getAddressSpace();
Type *ReqTy = Ty->getPointerTo(AS);
@@ -1608,9 +1610,9 @@ Constant *ConstantExpr::getGetElementPtr(Constant *C, Value* const *Idxs,
"Non-pointer type for constant GetElementPtr expression");
// Look up the constant in the table first to ensure uniqueness
std::vector<Constant*> ArgVec;
- ArgVec.reserve(NumIdx+1);
+ ArgVec.reserve(1 + Idxs.size());
ArgVec.push_back(C);
- for (unsigned i = 0; i != NumIdx; ++i)
+ for (unsigned i = 0, e = Idxs.size(); i != e; ++i)
ArgVec.push_back(cast<Constant>(Idxs[i]));
const ExprMapKeyType Key(Instruction::GetElementPtr, ArgVec, 0,
InBounds ? GEPOperator::IsInBounds : 0);
@@ -1635,8 +1637,8 @@ ConstantExpr::getICmp(unsigned short pred, Constant *LHS, Constant *RHS) {
// Get the key type with both the opcode and predicate
const ExprMapKeyType Key(Instruction::ICmp, ArgVec, pred);
- const Type *ResultTy = Type::getInt1Ty(LHS->getContext());
- if (const VectorType *VT = dyn_cast<VectorType>(LHS->getType()))
+ Type *ResultTy = Type::getInt1Ty(LHS->getContext());
+ if (VectorType *VT = dyn_cast<VectorType>(LHS->getType()))
ResultTy = VectorType::get(ResultTy, VT->getNumElements());
LLVMContextImpl *pImpl = LHS->getType()->getContext().pImpl;
@@ -1658,8 +1660,8 @@ ConstantExpr::getFCmp(unsigned short pred, Constant *LHS, Constant *RHS) {
// Get the key type with both the opcode and predicate
const ExprMapKeyType Key(Instruction::FCmp, ArgVec, pred);
- const Type *ResultTy = Type::getInt1Ty(LHS->getContext());
- if (const VectorType *VT = dyn_cast<VectorType>(LHS->getType()))
+ Type *ResultTy = Type::getInt1Ty(LHS->getContext());
+ if (VectorType *VT = dyn_cast<VectorType>(LHS->getType()))
ResultTy = VectorType::get(ResultTy, VT->getNumElements());
LLVMContextImpl *pImpl = LHS->getType()->getContext().pImpl;
@@ -1715,8 +1717,8 @@ Constant *ConstantExpr::getShuffleVector(Constant *V1, Constant *V2,
return FC; // Fold a few common cases.
unsigned NElts = cast<VectorType>(Mask->getType())->getNumElements();
- const Type *EltTy = cast<VectorType>(V1->getType())->getElementType();
- const Type *ShufTy = VectorType::get(EltTy, NElts);
+ Type *EltTy = cast<VectorType>(V1->getType())->getElementType();
+ Type *ShufTy = VectorType::get(EltTy, NElts);
// Look up the constant in the table first to ensure uniqueness
std::vector<Constant*> ArgVec(1, V1);
@@ -1745,7 +1747,7 @@ Constant *ConstantExpr::getExtractValue(Constant *Agg,
assert(Agg->getType()->isFirstClassType() &&
"Tried to create extractelement operation on non-first-class type!");
- const Type *ReqTy = ExtractValueInst::getIndexedType(Agg->getType(), Idxs);
+ Type *ReqTy = ExtractValueInst::getIndexedType(Agg->getType(), Idxs);
(void)ReqTy;
assert(ReqTy && "extractvalue indices invalid!");
@@ -1878,7 +1880,7 @@ const char *ConstantExpr::getOpcodeName() const {
GetElementPtrConstantExpr::
GetElementPtrConstantExpr(Constant *C, const std::vector<Constant*> &IdxList,
- const Type *DestTy)
+ Type *DestTy)
: ConstantExpr(DestTy, Instruction::GetElementPtr,
OperandTraits<GetElementPtrConstantExpr>::op_end(this)
- (IdxList.size()+1), IdxList.size()+1) {
@@ -2091,8 +2093,7 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
if (Val == From) Val = To;
Indices.push_back(Val);
}
- Replacement = ConstantExpr::getGetElementPtr(Pointer,
- &Indices[0], Indices.size(),
+ Replacement = ConstantExpr::getGetElementPtr(Pointer, Indices,
cast<GEPOperator>(this)->isInBounds());
} else if (getOpcode() == Instruction::ExtractValue) {
Constant *Agg = getOperand(0);
diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h
index bd134d9..1077004 100644
--- a/lib/VMCore/ConstantsContext.h
+++ b/lib/VMCore/ConstantsContext.h
@@ -36,7 +36,7 @@ public:
void *operator new(size_t s) {
return User::operator new(s, 1);
}
- UnaryConstantExpr(unsigned Opcode, Constant *C, const Type *Ty)
+ UnaryConstantExpr(unsigned Opcode, Constant *C, Type *Ty)
: ConstantExpr(Ty, Opcode, &Op<0>(), 1) {
Op<0>() = C;
}
@@ -159,7 +159,7 @@ public:
}
ExtractValueConstantExpr(Constant *Agg,
const SmallVector<unsigned, 4> &IdxList,
- const Type *DestTy)
+ Type *DestTy)
: ConstantExpr(DestTy, Instruction::ExtractValue, &Op<0>(), 1),
Indices(IdxList) {
Op<0>() = Agg;
@@ -184,7 +184,7 @@ public:
}
InsertValueConstantExpr(Constant *Agg, Constant *Val,
const SmallVector<unsigned, 4> &IdxList,
- const Type *DestTy)
+ Type *DestTy)
: ConstantExpr(DestTy, Instruction::InsertValue, &Op<0>(), 2),
Indices(IdxList) {
Op<0>() = Agg;
@@ -203,11 +203,11 @@ public:
/// used behind the scenes to implement getelementpr constant exprs.
class GetElementPtrConstantExpr : public ConstantExpr {
GetElementPtrConstantExpr(Constant *C, const std::vector<Constant*> &IdxList,
- const Type *DestTy);
+ Type *DestTy);
public:
static GetElementPtrConstantExpr *Create(Constant *C,
const std::vector<Constant*>&IdxList,
- const Type *DestTy,
+ Type *DestTy,
unsigned Flags) {
GetElementPtrConstantExpr *Result =
new(IdxList.size() + 1) GetElementPtrConstantExpr(C, IdxList, DestTy);
@@ -228,7 +228,7 @@ struct CompareConstantExpr : public ConstantExpr {
return User::operator new(s, 2);
}
unsigned short predicate;
- CompareConstantExpr(const Type *ty, Instruction::OtherOps opc,
+ CompareConstantExpr(Type *ty, Instruction::OtherOps opc,
unsigned short pred, Constant* LHS, Constant* RHS)
: ConstantExpr(ty, opc, &Op<0>(), 2), predicate(pred) {
Op<0>() = LHS;
@@ -392,7 +392,7 @@ struct ConstantTraits<Constant *> {
template<class ConstantClass, class TypeClass, class ValType>
struct ConstantCreator {
- static ConstantClass *create(const TypeClass *Ty, const ValType &V) {
+ static ConstantClass *create(TypeClass *Ty, const ValType &V) {
return new(ConstantTraits<ValType>::uses(V)) ConstantClass(Ty, V);
}
};
@@ -407,7 +407,7 @@ struct ConstantKeyData {
template<>
struct ConstantCreator<ConstantExpr, Type, ExprMapKeyType> {
- static ConstantExpr *create(const Type *Ty, const ExprMapKeyType &V,
+ static ConstantExpr *create(Type *Ty, const ExprMapKeyType &V,
unsigned short pred = 0) {
if (Instruction::isCast(V.opcode))
return new UnaryConstantExpr(V.opcode, V.operands[0], Ty);
@@ -470,7 +470,7 @@ struct ConstantKeyData<ConstantExpr> {
// ConstantAggregateZero does not take extra "value" argument...
template<class ValType>
struct ConstantCreator<ConstantAggregateZero, Type, ValType> {
- static ConstantAggregateZero *create(const Type *Ty, const ValType &V){
+ static ConstantAggregateZero *create(Type *Ty, const ValType &V){
return new ConstantAggregateZero(Ty);
}
};
@@ -522,7 +522,7 @@ struct ConstantKeyData<ConstantStruct> {
// ConstantPointerNull does not take extra "value" argument...
template<class ValType>
struct ConstantCreator<ConstantPointerNull, PointerType, ValType> {
- static ConstantPointerNull *create(const PointerType *Ty, const ValType &V){
+ static ConstantPointerNull *create(PointerType *Ty, const ValType &V){
return new ConstantPointerNull(Ty);
}
};
@@ -538,7 +538,7 @@ struct ConstantKeyData<ConstantPointerNull> {
// UndefValue does not take extra "value" argument...
template<class ValType>
struct ConstantCreator<UndefValue, Type, ValType> {
- static UndefValue *create(const Type *Ty, const ValType &V) {
+ static UndefValue *create(Type *Ty, const ValType &V) {
return new UndefValue(Ty);
}
};
@@ -553,7 +553,7 @@ struct ConstantKeyData<UndefValue> {
template<>
struct ConstantCreator<InlineAsm, PointerType, InlineAsmKeyType> {
- static InlineAsm *create(const PointerType *Ty, const InlineAsmKeyType &Key) {
+ static InlineAsm *create(PointerType *Ty, const InlineAsmKeyType &Key) {
return new InlineAsm(Ty, Key.asm_string, Key.constraints,
Key.has_side_effects, Key.is_align_stack);
}
@@ -572,7 +572,7 @@ template<class ValType, class ValRefType, class TypeClass, class ConstantClass,
bool HasLargeKey = false /*true for arrays and structs*/ >
class ConstantUniqueMap {
public:
- typedef std::pair<const TypeClass*, ValType> MapKey;
+ typedef std::pair<TypeClass*, ValType> MapKey;
typedef std::map<MapKey, ConstantClass *> MapTy;
typedef std::map<ConstantClass *, typename MapTy::iterator> InverseMapTy;
private:
@@ -623,7 +623,7 @@ private:
}
typename MapTy::iterator I =
- Map.find(MapKey(static_cast<const TypeClass*>(CP->getType()),
+ Map.find(MapKey(static_cast<TypeClass*>(CP->getType()),
ConstantKeyData<ConstantClass>::getValType(CP)));
if (I == Map.end() || I->second != CP) {
// FIXME: This should not use a linear scan. If this gets to be a
@@ -634,7 +634,7 @@ private:
return I;
}
- ConstantClass *Create(const TypeClass *Ty, ValRefType V,
+ ConstantClass *Create(TypeClass *Ty, ValRefType V,
typename MapTy::iterator I) {
ConstantClass* Result =
ConstantCreator<ConstantClass,TypeClass,ValType>::create(Ty, V);
@@ -651,7 +651,7 @@ public:
/// getOrCreate - Return the specified constant from the map, creating it if
/// necessary.
- ConstantClass *getOrCreate(const TypeClass *Ty, ValRefType V) {
+ ConstantClass *getOrCreate(TypeClass *Ty, ValRefType V) {
MapKey Lookup(Ty, V);
ConstantClass* Result = 0;
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index 2a816e1..a505e4b 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -167,6 +167,11 @@ LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) {
}
}
+LLVMBool LLVMTypeIsSized(LLVMTypeRef Ty)
+{
+ return unwrap(Ty)->isSized();
+}
+
LLVMContextRef LLVMGetTypeContext(LLVMTypeRef Ty) {
return wrap(&unwrap(Ty)->getContext());
}
@@ -299,7 +304,15 @@ LLVMTypeRef LLVMStructType(LLVMTypeRef *ElementTypes,
LLVMTypeRef LLVMStructCreateNamed(LLVMContextRef C, const char *Name)
{
- return wrap(StructType::createNamed(*unwrap(C), Name));
+ return wrap(StructType::create(*unwrap(C), Name));
+}
+
+const char *LLVMGetStructName(LLVMTypeRef Ty)
+{
+ StructType *Type = unwrap<StructType>(Ty);
+ if (!Type->hasName())
+ return 0;
+ return Type->getName().data();
}
void LLVMStructSetBody(LLVMTypeRef StructTy, LLVMTypeRef *ElementTypes,
@@ -448,7 +461,10 @@ LLVMValueRef LLVMGetUsedValue(LLVMUseRef U) {
/*--.. Operations on Users .................................................--*/
LLVMValueRef LLVMGetOperand(LLVMValueRef Val, unsigned Index) {
- return wrap(unwrap<User>(Val)->getOperand(Index));
+ Value *V = unwrap(Val);
+ if (MDNode *MD = dyn_cast<MDNode>(V))
+ return wrap(MD->getOperand(Index));
+ return wrap(cast<User>(V)->getOperand(Index));
}
void LLVMSetOperand(LLVMValueRef Val, unsigned Index, LLVMValueRef Op) {
@@ -456,7 +472,10 @@ void LLVMSetOperand(LLVMValueRef Val, unsigned Index, LLVMValueRef Op) {
}
int LLVMGetNumOperands(LLVMValueRef Val) {
- return unwrap<User>(Val)->getNumOperands();
+ Value *V = unwrap(Val);
+ if (MDNode *MD = dyn_cast<MDNode>(V))
+ return MD->getNumOperands();
+ return cast<User>(V)->getNumOperands();
}
/*--.. Operations on constants of any type .................................--*/
@@ -506,13 +525,39 @@ LLVMValueRef LLVMMDString(const char *Str, unsigned SLen) {
LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals,
unsigned Count) {
return wrap(MDNode::get(*unwrap(C),
- ArrayRef<Value*>(unwrap<Value>(Vals, Count), Count)));
+ makeArrayRef(unwrap<Value>(Vals, Count), Count)));
}
LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count) {
return LLVMMDNodeInContext(LLVMGetGlobalContext(), Vals, Count);
}
+const char *LLVMGetMDString(LLVMValueRef V, unsigned* Len) {
+ if (const MDString *S = dyn_cast<MDString>(unwrap(V))) {
+ *Len = S->getString().size();
+ return S->getString().data();
+ }
+ *Len = 0;
+ return 0;
+}
+
+unsigned LLVMGetNamedMetadataNumOperands(LLVMModuleRef M, const char* name)
+{
+ if (NamedMDNode *N = unwrap(M)->getNamedMetadata(name)) {
+ return N->getNumOperands();
+ }
+ return 0;
+}
+
+void LLVMGetNamedMetadataOperands(LLVMModuleRef M, const char* name, LLVMValueRef *Dest)
+{
+ NamedMDNode *N = unwrap(M)->getNamedMetadata(name);
+ if (!N)
+ return;
+ for (unsigned i=0;i<N->getNumOperands();i++)
+ Dest[i] = wrap(N->getOperand(i));
+}
+
/*--.. Operations on scalar constants ......................................--*/
LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N,
@@ -525,7 +570,8 @@ LLVMValueRef LLVMConstIntOfArbitraryPrecision(LLVMTypeRef IntTy,
const uint64_t Words[]) {
IntegerType *Ty = unwrap<IntegerType>(IntTy);
return wrap(ConstantInt::get(Ty->getContext(),
- APInt(Ty->getBitWidth(), NumWords, Words)));
+ APInt(Ty->getBitWidth(),
+ makeArrayRef(Words, NumWords))));
}
LLVMValueRef LLVMConstIntOfString(LLVMTypeRef IntTy, const char Str[],
@@ -575,8 +621,7 @@ LLVMValueRef LLVMConstStructInContext(LLVMContextRef C,
LLVMValueRef *ConstantVals,
unsigned Count, LLVMBool Packed) {
Constant **Elements = unwrap<Constant>(ConstantVals, Count);
- return wrap(ConstantStruct::getAnon(*unwrap(C),
- ArrayRef<Constant*>(Elements, Count),
+ return wrap(ConstantStruct::getAnon(*unwrap(C), makeArrayRef(Elements, Count),
Packed != 0));
}
@@ -600,19 +645,44 @@ LLVMValueRef LLVMConstNamedStruct(LLVMTypeRef StructTy,
LLVMValueRef *ConstantVals,
unsigned Count) {
Constant **Elements = unwrap<Constant>(ConstantVals, Count);
- const StructType *Ty = cast<StructType>(unwrap(StructTy));
+ StructType *Ty = cast<StructType>(unwrap(StructTy));
- return wrap(ConstantStruct::get(Ty, ArrayRef<Constant*>(Elements, Count)));
+ return wrap(ConstantStruct::get(Ty, makeArrayRef(Elements, Count)));
}
LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size) {
- return wrap(ConstantVector::get(ArrayRef<Constant*>(
+ return wrap(ConstantVector::get(makeArrayRef(
unwrap<Constant>(ScalarConstantVals, Size), Size)));
}
+
+/*-- Opcode mapping */
+
+static LLVMOpcode map_to_llvmopcode(int opcode)
+{
+ switch (opcode) {
+ default:
+ assert(0 && "Unhandled Opcode.");
+#define HANDLE_INST(num, opc, clas) case num: return LLVM##opc;
+#include "llvm/Instruction.def"
+#undef HANDLE_INST
+ }
+}
+
+static int map_from_llvmopcode(LLVMOpcode code)
+{
+ switch (code) {
+ default:
+ assert(0 && "Unhandled Opcode.");
+#define HANDLE_INST(num, opc, clas) case LLVM##opc: return num;
+#include "llvm/Instruction.def"
+#undef HANDLE_INST
+ }
+}
+
/*--.. Constant expressions ................................................--*/
LLVMOpcode LLVMGetConstOpcode(LLVMValueRef ConstantVal) {
- return (LLVMOpcode)unwrap<ConstantExpr>(ConstantVal)->getOpcode();
+ return map_to_llvmopcode(unwrap<ConstantExpr>(ConstantVal)->getOpcode());
}
LLVMValueRef LLVMAlignOf(LLVMTypeRef Ty) {
@@ -792,18 +862,19 @@ LLVMValueRef LLVMConstAShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
LLVMValueRef LLVMConstGEP(LLVMValueRef ConstantVal,
LLVMValueRef *ConstantIndices, unsigned NumIndices) {
+ ArrayRef<Constant *> IdxList(unwrap<Constant>(ConstantIndices, NumIndices),
+ NumIndices);
return wrap(ConstantExpr::getGetElementPtr(unwrap<Constant>(ConstantVal),
- unwrap<Constant>(ConstantIndices,
- NumIndices),
- NumIndices));
+ IdxList));
}
LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal,
LLVMValueRef *ConstantIndices,
unsigned NumIndices) {
Constant* Val = unwrap<Constant>(ConstantVal);
- Constant** Idxs = unwrap<Constant>(ConstantIndices, NumIndices);
- return wrap(ConstantExpr::getInBoundsGetElementPtr(Val, Idxs, NumIndices));
+ ArrayRef<Constant *> IdxList(unwrap<Constant>(ConstantIndices, NumIndices),
+ NumIndices);
+ return wrap(ConstantExpr::getInBoundsGetElementPtr(Val, IdxList));
}
LLVMValueRef LLVMConstTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
@@ -934,8 +1005,7 @@ LLVMValueRef LLVMConstShuffleVector(LLVMValueRef VectorAConstant,
LLVMValueRef LLVMConstExtractValue(LLVMValueRef AggConstant, unsigned *IdxList,
unsigned NumIdx) {
return wrap(ConstantExpr::getExtractValue(unwrap<Constant>(AggConstant),
- ArrayRef<unsigned>(IdxList,
- NumIdx)));
+ makeArrayRef(IdxList, NumIdx)));
}
LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant,
@@ -943,8 +1013,7 @@ LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant,
unsigned *IdxList, unsigned NumIdx) {
return wrap(ConstantExpr::getInsertValue(unwrap<Constant>(AggConstant),
unwrap<Constant>(ElementValueConstant),
- ArrayRef<unsigned>(IdxList,
- NumIdx)));
+ makeArrayRef(IdxList, NumIdx)));
}
LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString,
@@ -1383,6 +1452,10 @@ LLVMValueRef LLVMGetBasicBlockParent(LLVMBasicBlockRef BB) {
return wrap(unwrap(BB)->getParent());
}
+LLVMValueRef LLVMGetBasicBlockTerminator(LLVMBasicBlockRef BB) {
+ return wrap(unwrap(BB)->getTerminator());
+}
+
unsigned LLVMCountBasicBlocks(LLVMValueRef FnRef) {
return unwrap<Function>(FnRef)->size();
}
@@ -1455,6 +1528,10 @@ void LLVMDeleteBasicBlock(LLVMBasicBlockRef BBRef) {
unwrap(BBRef)->eraseFromParent();
}
+void LLVMRemoveBasicBlockFromParent(LLVMBasicBlockRef BBRef) {
+ unwrap(BBRef)->removeFromParent();
+}
+
void LLVMMoveBasicBlockBefore(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos) {
unwrap(BB)->moveBefore(unwrap(MovePos));
}
@@ -1501,6 +1578,25 @@ LLVMValueRef LLVMGetPreviousInstruction(LLVMValueRef Inst) {
return wrap(--I);
}
+void LLVMInstructionEraseFromParent(LLVMValueRef Inst) {
+ unwrap<Instruction>(Inst)->eraseFromParent();
+}
+
+LLVMIntPredicate LLVMGetICmpPredicate(LLVMValueRef Inst) {
+ if (ICmpInst *I = dyn_cast<ICmpInst>(unwrap(Inst)))
+ return (LLVMIntPredicate)I->getPredicate();
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(unwrap(Inst)))
+ if (CE->getOpcode() == Instruction::ICmp)
+ return (LLVMIntPredicate)CE->getPredicate();
+ return (LLVMIntPredicate)0;
+}
+
+LLVMOpcode LLVMGetInstructionOpcode(LLVMValueRef Inst) {
+ if (Instruction *C = dyn_cast<Instruction>(unwrap(Inst)))
+ return map_to_llvmopcode(C->getOpcode());
+ return (LLVMOpcode)0;
+}
+
/*--.. Call and invoke instructions ........................................--*/
unsigned LLVMGetInstructionCallConv(LLVMValueRef Instr) {
@@ -1554,6 +1650,12 @@ void LLVMSetTailCall(LLVMValueRef Call, LLVMBool isTailCall) {
unwrap<CallInst>(Call)->setTailCall(isTailCall);
}
+/*--.. Operations on switch instructions (only) ............................--*/
+
+LLVMBasicBlockRef LLVMGetSwitchDefaultDest(LLVMValueRef Switch) {
+ return wrap(unwrap<SwitchInst>(Switch)->getDefaultDest());
+}
+
/*--.. Operations on phi nodes .............................................--*/
void LLVMAddIncoming(LLVMValueRef PhiNode, LLVMValueRef *IncomingValues,
@@ -1680,12 +1782,20 @@ LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn,
LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch,
const char *Name) {
return wrap(unwrap(B)->CreateInvoke(unwrap(Fn), unwrap(Then), unwrap(Catch),
- ArrayRef<Value *>(unwrap(Args), NumArgs),
+ makeArrayRef(unwrap(Args), NumArgs),
Name));
}
-LLVMValueRef LLVMBuildUnwind(LLVMBuilderRef B) {
- return wrap(unwrap(B)->CreateUnwind());
+LLVMValueRef LLVMBuildLandingPad(LLVMBuilderRef B, LLVMTypeRef Ty,
+ LLVMValueRef PersFn, unsigned NumClauses,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateLandingPad(unwrap(Ty),
+ cast<Function>(unwrap(PersFn)),
+ NumClauses, Name));
+}
+
+LLVMValueRef LLVMBuildResume(LLVMBuilderRef B, LLVMValueRef Exn) {
+ return wrap(unwrap(B)->CreateResume(unwrap(Exn)));
}
LLVMValueRef LLVMBuildUnreachable(LLVMBuilderRef B) {
@@ -1701,6 +1811,15 @@ void LLVMAddDestination(LLVMValueRef IndirectBr, LLVMBasicBlockRef Dest) {
unwrap<IndirectBrInst>(IndirectBr)->addDestination(unwrap(Dest));
}
+void LLVMAddClause(LLVMValueRef LandingPad, LLVMValueRef ClauseVal) {
+ unwrap<LandingPadInst>(LandingPad)->
+ addClause(cast<Constant>(unwrap(ClauseVal)));
+}
+
+void LLVMSetCleanup(LLVMValueRef LandingPad, LLVMBool Val) {
+ unwrap<LandingPadInst>(LandingPad)->setCleanup(Val);
+}
+
/*--.. Arithmetic ..........................................................--*/
LLVMValueRef LLVMBuildAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
@@ -1831,7 +1950,7 @@ LLVMValueRef LLVMBuildXor(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
LLVMValueRef LLVMBuildBinOp(LLVMBuilderRef B, LLVMOpcode Op,
LLVMValueRef LHS, LLVMValueRef RHS,
const char *Name) {
- return wrap(unwrap(B)->CreateBinOp(Instruction::BinaryOps(Op), unwrap(LHS),
+ return wrap(unwrap(B)->CreateBinOp(Instruction::BinaryOps(map_from_llvmopcode(Op)), unwrap(LHS),
unwrap(RHS), Name));
}
@@ -1861,7 +1980,7 @@ LLVMValueRef LLVMBuildNot(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
const char *Name) {
- const Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
+ Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty));
AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy);
Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(),
@@ -1872,7 +1991,7 @@ LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
LLVMValueRef Val, const char *Name) {
- const Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
+ Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty));
AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy);
Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(),
@@ -1910,15 +2029,15 @@ LLVMValueRef LLVMBuildStore(LLVMBuilderRef B, LLVMValueRef Val,
LLVMValueRef LLVMBuildGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
LLVMValueRef *Indices, unsigned NumIndices,
const char *Name) {
- return wrap(unwrap(B)->CreateGEP(unwrap(Pointer), unwrap(Indices),
- unwrap(Indices) + NumIndices, Name));
+ ArrayRef<Value *> IdxList(unwrap(Indices), NumIndices);
+ return wrap(unwrap(B)->CreateGEP(unwrap(Pointer), IdxList, Name));
}
LLVMValueRef LLVMBuildInBoundsGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
LLVMValueRef *Indices, unsigned NumIndices,
const char *Name) {
- return wrap(unwrap(B)->CreateInBoundsGEP(unwrap(Pointer), unwrap(Indices),
- unwrap(Indices) + NumIndices, Name));
+ ArrayRef<Value *> IdxList(unwrap(Indices), NumIndices);
+ return wrap(unwrap(B)->CreateInBoundsGEP(unwrap(Pointer), IdxList, Name));
}
LLVMValueRef LLVMBuildStructGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
@@ -2018,7 +2137,7 @@ LLVMValueRef LLVMBuildTruncOrBitCast(LLVMBuilderRef B, LLVMValueRef Val,
LLVMValueRef LLVMBuildCast(LLVMBuilderRef B, LLVMOpcode Op, LLVMValueRef Val,
LLVMTypeRef DestTy, const char *Name) {
- return wrap(unwrap(B)->CreateCast(Instruction::CastOps(Op), unwrap(Val),
+ return wrap(unwrap(B)->CreateCast(Instruction::CastOps(map_from_llvmopcode(Op)), unwrap(Val),
unwrap(DestTy), Name));
}
@@ -2064,7 +2183,7 @@ LLVMValueRef LLVMBuildCall(LLVMBuilderRef B, LLVMValueRef Fn,
LLVMValueRef *Args, unsigned NumArgs,
const char *Name) {
return wrap(unwrap(B)->CreateCall(unwrap(Fn),
- ArrayRef<Value *>(unwrap(Args), NumArgs),
+ makeArrayRef(unwrap(Args), NumArgs),
Name));
}
diff --git a/lib/VMCore/DebugLoc.cpp b/lib/VMCore/DebugLoc.cpp
index 4ff6b2c..328244f 100644
--- a/lib/VMCore/DebugLoc.cpp
+++ b/lib/VMCore/DebugLoc.cpp
@@ -104,7 +104,7 @@ MDNode *DebugLoc::getAsMDNode(const LLVMContext &Ctx) const {
assert(Scope && "If scope is null, this should be isUnknown()");
LLVMContext &Ctx2 = Scope->getContext();
- const Type *Int32 = Type::getInt32Ty(Ctx2);
+ Type *Int32 = Type::getInt32Ty(Ctx2);
Value *Elts[] = {
ConstantInt::get(Int32, getLine()), ConstantInt::get(Int32, getCol()),
Scope, IA
@@ -240,7 +240,7 @@ int LLVMContextImpl::getOrAddScopeInlinedAtIdxEntry(MDNode *Scope, MDNode *IA,
/// deleted - The MDNode this is pointing to got deleted, so this pointer needs
/// to drop to null and we need remove our entry from the DenseMap.
void DebugRecVH::deleted() {
- // If this is a non-canonical reference, just drop the value to null, we know
+ // If this is a non-canonical reference, just drop the value to null, we know
// it doesn't have a map entry.
if (Idx == 0) {
setValPtr(0);
diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp
index 6536bcd..1215e6a 100644
--- a/lib/VMCore/Function.cpp
+++ b/lib/VMCore/Function.cpp
@@ -17,6 +17,7 @@
#include "llvm/LLVMContext.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/Support/CallSite.h"
+#include "llvm/Support/InstIterator.h"
#include "llvm/Support/LeakDetector.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/StringPool.h"
@@ -38,7 +39,7 @@ template class llvm::SymbolTableListTraits<BasicBlock, Function>;
// Argument Implementation
//===----------------------------------------------------------------------===//
-Argument::Argument(const Type *Ty, const Twine &Name, Function *Par)
+Argument::Argument(Type *Ty, const Twine &Name, Function *Par)
: Value(Ty, Value::ArgumentVal) {
Parent = 0;
@@ -158,7 +159,7 @@ void Function::eraseFromParent() {
// Function Implementation
//===----------------------------------------------------------------------===//
-Function::Function(const FunctionType *Ty, LinkageTypes Linkage,
+Function::Function(FunctionType *Ty, LinkageTypes Linkage,
const Twine &name, Module *ParentModule)
: GlobalValue(PointerType::getUnqual(Ty),
Value::FunctionVal, 0, 0, Linkage, name) {
@@ -195,7 +196,7 @@ Function::~Function() {
void Function::BuildLazyArguments() const {
// Create the arguments vector, all arguments start out unnamed.
- const FunctionType *FT = getFunctionType();
+ FunctionType *FT = getFunctionType();
for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) {
assert(!FT->getParamType(i)->isVoidTy() &&
"Cannot have void typed arguments!");
@@ -345,7 +346,7 @@ std::string Intrinsic::getName(ID id, ArrayRef<Type*> Tys) {
return Table[id];
std::string Result(Table[id]);
for (unsigned i = 0; i < Tys.size(); ++i) {
- if (const PointerType* PTyp = dyn_cast<PointerType>(Tys[i])) {
+ if (PointerType* PTyp = dyn_cast<PointerType>(Tys[i])) {
Result += ".p" + llvm::utostr(PTyp->getAddressSpace()) +
EVT::getEVT(PTyp->getElementType()).getEVTString();
}
@@ -355,9 +356,9 @@ std::string Intrinsic::getName(ID id, ArrayRef<Type*> Tys) {
return Result;
}
-const FunctionType *Intrinsic::getType(LLVMContext &Context,
+FunctionType *Intrinsic::getType(LLVMContext &Context,
ID id, ArrayRef<Type*> Tys) {
- const Type *ResultTy = NULL;
+ Type *ResultTy = NULL;
std::vector<Type*> ArgTys;
bool IsVarArg = false;
@@ -416,8 +417,7 @@ bool Function::hasAddressTaken(const User* *PutOffender) const {
/// FIXME: Remove after <rdar://problem/8031714> is fixed.
/// FIXME: Is the above FIXME valid?
bool Function::callsFunctionThatReturnsTwice() const {
- const Module *M = this->getParent();
- static const char *ReturnsTwiceFns[] = {
+ static const char *const ReturnsTwiceFns[] = {
"_setjmp",
"setjmp",
"sigsetjmp",
@@ -428,16 +428,25 @@ bool Function::callsFunctionThatReturnsTwice() const {
"getcontext"
};
- for (unsigned I = 0; I < array_lengthof(ReturnsTwiceFns); ++I)
- if (const Function *Callee = M->getFunction(ReturnsTwiceFns[I])) {
- if (!Callee->use_empty())
- for (Value::const_use_iterator
- I = Callee->use_begin(), E = Callee->use_end();
- I != E; ++I)
- if (const CallInst *CI = dyn_cast<CallInst>(*I))
- if (CI->getParent()->getParent() == this)
- return true;
+ for (const_inst_iterator I = inst_begin(this), E = inst_end(this); I != E;
+ ++I) {
+ const CallInst* callInst = dyn_cast<CallInst>(&*I);
+ if (!callInst)
+ continue;
+ if (callInst->canReturnTwice())
+ return true;
+
+ // check for known function names.
+ // FIXME: move this to clang.
+ Function *F = callInst->getCalledFunction();
+ if (!F)
+ continue;
+ StringRef Name = F->getName();
+ for (unsigned J = 0, e = array_lengthof(ReturnsTwiceFns); J != e; ++J) {
+ if (Name == ReturnsTwiceFns[J])
+ return true;
}
+ }
return false;
}
diff --git a/lib/VMCore/GCOV.cpp b/lib/VMCore/GCOV.cpp
new file mode 100644
index 0000000..fc7f96f
--- /dev/null
+++ b/lib/VMCore/GCOV.cpp
@@ -0,0 +1,281 @@
+//===- GCOVr.cpp - LLVM coverage tool -------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// GCOV implements the interface to read and write coverage files that use
+// 'gcov' format.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/GCOV.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/system_error.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// GCOVFile implementation.
+
+/// ~GCOVFile - Delete GCOVFile and its content.
+GCOVFile::~GCOVFile() {
+ DeleteContainerPointers(Functions);
+}
+
+/// isGCDAFile - Return true if Format identifies a .gcda file.
+static bool isGCDAFile(GCOVFormat Format) {
+ return Format == GCDA_402 || Format == GCDA_404;
+}
+
+/// isGCNOFile - Return true if Format identifies a .gcno file.
+static bool isGCNOFile(GCOVFormat Format) {
+ return Format == GCNO_402 || Format == GCNO_404;
+}
+
+/// read - Read GCOV buffer.
+bool GCOVFile::read(GCOVBuffer &Buffer) {
+ GCOVFormat Format = Buffer.readGCOVFormat();
+ if (Format == InvalidGCOV)
+ return false;
+
+ unsigned i = 0;
+ while (1) {
+ GCOVFunction *GFun = NULL;
+ if (isGCDAFile(Format)) {
+ // Use existing function while reading .gcda file.
+ assert (i < Functions.size() && ".gcda data does not match .gcno data");
+ GFun = Functions[i];
+ } else if (isGCNOFile(Format)){
+ GFun = new GCOVFunction();
+ Functions.push_back(GFun);
+ }
+ if (!GFun || !GFun->read(Buffer, Format))
+ break;
+ ++i;
+ }
+ return true;
+}
+
+/// dump - Dump GCOVFile content on standard out for debugging purposes.
+void GCOVFile::dump() {
+ for (SmallVector<GCOVFunction *, 16>::iterator I = Functions.begin(),
+ E = Functions.end(); I != E; ++I)
+ (*I)->dump();
+}
+
+/// collectLineCounts - Collect line counts. This must be used after
+/// reading .gcno and .gcda files.
+void GCOVFile::collectLineCounts(FileInfo &FI) {
+ for (SmallVector<GCOVFunction *, 16>::iterator I = Functions.begin(),
+ E = Functions.end(); I != E; ++I)
+ (*I)->collectLineCounts(FI);
+ FI.print();
+}
+
+//===----------------------------------------------------------------------===//
+// GCOVFunction implementation.
+
+/// ~GCOVFunction - Delete GCOVFunction and its content.
+GCOVFunction::~GCOVFunction() {
+ DeleteContainerPointers(Blocks);
+}
+
+/// read - Read a aunction from the buffer. Return false if buffer cursor
+/// does not point to a function tag.
+bool GCOVFunction::read(GCOVBuffer &Buff, GCOVFormat Format) {
+ if (!Buff.readFunctionTag())
+ return false;
+
+ Buff.readInt(); // Function header length
+ Ident = Buff.readInt();
+ Buff.readInt(); // Checksum #1
+ if (Format != GCNO_402)
+ Buff.readInt(); // Checksum #2
+
+ Name = Buff.readString();
+ if (Format == GCNO_402 || Format == GCNO_404)
+ Filename = Buff.readString();
+
+ if (Format == GCDA_402 || Format == GCDA_404) {
+ Buff.readArcTag();
+ uint32_t Count = Buff.readInt() / 2;
+ for (unsigned i = 0, e = Count; i != e; ++i) {
+ Blocks[i]->addCount(Buff.readInt64());
+ }
+ return true;;
+ }
+
+ LineNumber = Buff.readInt();
+
+ // read blocks.
+ assert (Buff.readBlockTag() && "Block Tag not found!");
+ uint32_t BlockCount = Buff.readInt();
+ for (int i = 0, e = BlockCount; i != e; ++i) {
+ Buff.readInt(); // Block flags;
+ Blocks.push_back(new GCOVBlock(i));
+ }
+
+ // read edges.
+ while (Buff.readEdgeTag()) {
+ uint32_t EdgeCount = (Buff.readInt() - 1) / 2;
+ uint32_t BlockNo = Buff.readInt();
+ assert (BlockNo < BlockCount && "Unexpected Block number!");
+ for (int i = 0, e = EdgeCount; i != e; ++i) {
+ Blocks[BlockNo]->addEdge(Buff.readInt());
+ Buff.readInt(); // Edge flag
+ }
+ }
+
+ // read line table.
+ while (Buff.readLineTag()) {
+ uint32_t LineTableLength = Buff.readInt();
+ uint32_t Size = Buff.getCursor() + LineTableLength*4;
+ uint32_t BlockNo = Buff.readInt();
+ assert (BlockNo < BlockCount && "Unexpected Block number!");
+ GCOVBlock *Block = Blocks[BlockNo];
+ Buff.readInt(); // flag
+ while (Buff.getCursor() != (Size - 4)) {
+ StringRef Filename = Buff.readString();
+ if (Buff.getCursor() == (Size - 4)) break;
+ while (uint32_t L = Buff.readInt())
+ Block->addLine(Filename, L);
+ }
+ Buff.readInt(); // flag
+ }
+ return true;
+}
+
+/// dump - Dump GCOVFunction content on standard out for debugging purposes.
+void GCOVFunction::dump() {
+ outs() << "===== " << Name << " @ " << Filename << ":" << LineNumber << "\n";
+ for (SmallVector<GCOVBlock *, 16>::iterator I = Blocks.begin(),
+ E = Blocks.end(); I != E; ++I)
+ (*I)->dump();
+}
+
+/// collectLineCounts - Collect line counts. This must be used after
+/// reading .gcno and .gcda files.
+void GCOVFunction::collectLineCounts(FileInfo &FI) {
+ for (SmallVector<GCOVBlock *, 16>::iterator I = Blocks.begin(),
+ E = Blocks.end(); I != E; ++I)
+ (*I)->collectLineCounts(FI);
+}
+
+//===----------------------------------------------------------------------===//
+// GCOVBlock implementation.
+
+/// ~GCOVBlock - Delete GCOVBlock and its content.
+GCOVBlock::~GCOVBlock() {
+ Edges.clear();
+ DeleteContainerSeconds(Lines);
+}
+
+void GCOVBlock::addLine(StringRef Filename, uint32_t LineNo) {
+ GCOVLines *&LinesForFile = Lines[Filename];
+ if (!LinesForFile)
+ LinesForFile = new GCOVLines();
+ LinesForFile->add(LineNo);
+}
+
+/// collectLineCounts - Collect line counts. This must be used after
+/// reading .gcno and .gcda files.
+void GCOVBlock::collectLineCounts(FileInfo &FI) {
+ for (StringMap<GCOVLines *>::iterator I = Lines.begin(),
+ E = Lines.end(); I != E; ++I)
+ I->second->collectLineCounts(FI, I->first(), Counter);
+}
+
+/// dump - Dump GCOVBlock content on standard out for debugging purposes.
+void GCOVBlock::dump() {
+ outs() << "Block : " << Number << " Counter : " << Counter << "\n";
+ if (!Edges.empty()) {
+ outs() << "\tEdges : ";
+ for (SmallVector<uint32_t, 16>::iterator I = Edges.begin(), E = Edges.end();
+ I != E; ++I)
+ outs() << (*I) << ",";
+ outs() << "\n";
+ }
+ if (!Lines.empty()) {
+ outs() << "\tLines : ";
+ for (StringMap<GCOVLines *>::iterator LI = Lines.begin(),
+ LE = Lines.end(); LI != LE; ++LI) {
+ outs() << LI->first() << " -> ";
+ LI->second->dump();
+ outs() << "\n";
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// GCOVLines implementation.
+
+/// collectLineCounts - Collect line counts. This must be used after
+/// reading .gcno and .gcda files.
+void GCOVLines::collectLineCounts(FileInfo &FI, StringRef Filename,
+ uint32_t Count) {
+ for (SmallVector<uint32_t, 16>::iterator I = Lines.begin(),
+ E = Lines.end(); I != E; ++I)
+ FI.addLineCount(Filename, *I, Count);
+}
+
+/// dump - Dump GCOVLines content on standard out for debugging purposes.
+void GCOVLines::dump() {
+ for (SmallVector<uint32_t, 16>::iterator I = Lines.begin(),
+ E = Lines.end(); I != E; ++I)
+ outs() << (*I) << ",";
+}
+
+//===----------------------------------------------------------------------===//
+// FileInfo implementation.
+
+/// addLineCount - Add line count for the given line number in a file.
+void FileInfo::addLineCount(StringRef Filename, uint32_t Line, uint32_t Count) {
+ if (LineInfo.find(Filename) == LineInfo.end()) {
+ OwningPtr<MemoryBuffer> Buff;
+ if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buff)) {
+ errs() << Filename << ": " << ec.message() << "\n";
+ return;
+ }
+ StringRef AllLines = Buff.take()->getBuffer();
+ LineCounts L(AllLines.count('\n')+2);
+ L[Line-1] = Count;
+ LineInfo[Filename] = L;
+ return;
+ }
+ LineCounts &L = LineInfo[Filename];
+ L[Line-1] = Count;
+}
+
+/// print - Print source files with collected line count information.
+void FileInfo::print() {
+ for (StringMap<LineCounts>::iterator I = LineInfo.begin(), E = LineInfo.end();
+ I != E; ++I) {
+ StringRef Filename = I->first();
+ outs() << Filename << "\n";
+ LineCounts &L = LineInfo[Filename];
+ OwningPtr<MemoryBuffer> Buff;
+ if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buff)) {
+ errs() << Filename << ": " << ec.message() << "\n";
+ return;
+ }
+ StringRef AllLines = Buff.take()->getBuffer();
+ for (unsigned i = 0, e = L.size(); i != e; ++i) {
+ if (L[i])
+ outs() << L[i] << ":\t";
+ else
+ outs() << " :\t";
+ std::pair<StringRef, StringRef> P = AllLines.split('\n');
+ if (AllLines != P.first)
+ outs() << P.first;
+ outs() << "\n";
+ AllLines = P.second;
+ }
+ }
+}
+
+
diff --git a/lib/VMCore/Globals.cpp b/lib/VMCore/Globals.cpp
index db008e0..4254fb2 100644
--- a/lib/VMCore/Globals.cpp
+++ b/lib/VMCore/Globals.cpp
@@ -80,7 +80,7 @@ bool GlobalValue::isDeclaration() const {
// GlobalVariable Implementation
//===----------------------------------------------------------------------===//
-GlobalVariable::GlobalVariable(const Type *Ty, bool constant, LinkageTypes Link,
+GlobalVariable::GlobalVariable(Type *Ty, bool constant, LinkageTypes Link,
Constant *InitVal, const Twine &Name,
bool ThreadLocal, unsigned AddressSpace)
: GlobalValue(PointerType::get(Ty, AddressSpace),
@@ -97,7 +97,7 @@ GlobalVariable::GlobalVariable(const Type *Ty, bool constant, LinkageTypes Link,
LeakDetector::addGarbageObject(this);
}
-GlobalVariable::GlobalVariable(Module &M, const Type *Ty, bool constant,
+GlobalVariable::GlobalVariable(Module &M, Type *Ty, bool constant,
LinkageTypes Link, Constant *InitVal,
const Twine &Name,
GlobalVariable *Before, bool ThreadLocal,
@@ -186,7 +186,7 @@ void GlobalVariable::copyAttributesFrom(const GlobalValue *Src) {
// GlobalAlias Implementation
//===----------------------------------------------------------------------===//
-GlobalAlias::GlobalAlias(const Type *Ty, LinkageTypes Link,
+GlobalAlias::GlobalAlias(Type *Ty, LinkageTypes Link,
const Twine &Name, Constant* aliasee,
Module *ParentModule)
: GlobalValue(Ty, Value::GlobalAliasVal, &Op<0>(), 1, Link, Name) {
@@ -235,7 +235,7 @@ const GlobalValue *GlobalAlias::getAliasedGlobal() const {
CE->getOpcode() == Instruction::GetElementPtr) &&
"Unsupported aliasee");
- return dyn_cast<GlobalValue>(CE->getOperand(0));
+ return cast<GlobalValue>(CE->getOperand(0));
}
const GlobalValue *GlobalAlias::resolveAliasedGlobal(bool stopOnWeak) const {
diff --git a/lib/VMCore/IRBuilder.cpp b/lib/VMCore/IRBuilder.cpp
index ffe961f..5114e2d 100644
--- a/lib/VMCore/IRBuilder.cpp
+++ b/lib/VMCore/IRBuilder.cpp
@@ -40,7 +40,7 @@ Type *IRBuilderBase::getCurrentFunctionReturnType() const {
}
Value *IRBuilderBase::getCastedInt8PtrValue(Value *Ptr) {
- const PointerType *PT = cast<PointerType>(Ptr->getType());
+ PointerType *PT = cast<PointerType>(Ptr->getType());
if (PT->getElementType()->isIntegerTy(8))
return Ptr;
diff --git a/lib/VMCore/InlineAsm.cpp b/lib/VMCore/InlineAsm.cpp
index 4a03b39..736e370 100644
--- a/lib/VMCore/InlineAsm.cpp
+++ b/lib/VMCore/InlineAsm.cpp
@@ -25,7 +25,7 @@ InlineAsm::~InlineAsm() {
}
-InlineAsm *InlineAsm::get(const FunctionType *Ty, StringRef AsmString,
+InlineAsm *InlineAsm::get(FunctionType *Ty, StringRef AsmString,
StringRef Constraints, bool hasSideEffects,
bool isAlignStack) {
InlineAsmKeyType Key(AsmString, Constraints, hasSideEffects, isAlignStack);
@@ -33,7 +33,7 @@ InlineAsm *InlineAsm::get(const FunctionType *Ty, StringRef AsmString,
return pImpl->InlineAsms.getOrCreate(PointerType::getUnqual(Ty), Key);
}
-InlineAsm::InlineAsm(const PointerType *Ty, const std::string &asmString,
+InlineAsm::InlineAsm(PointerType *Ty, const std::string &asmString,
const std::string &constraints, bool hasSideEffects,
bool isAlignStack)
: Value(Ty, Value::InlineAsmVal),
@@ -242,7 +242,7 @@ InlineAsm::ParseConstraints(StringRef Constraints) {
/// Verify - Verify that the specified constraint string is reasonable for the
/// specified function type, and otherwise validate the constraint string.
-bool InlineAsm::Verify(const FunctionType *Ty, StringRef ConstStr) {
+bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) {
if (Ty->isVarArg()) return false;
ConstraintInfoVector Constraints = ParseConstraints(ConstStr);
@@ -282,7 +282,7 @@ bool InlineAsm::Verify(const FunctionType *Ty, StringRef ConstStr) {
if (Ty->getReturnType()->isStructTy()) return false;
break;
default:
- const StructType *STy = dyn_cast<StructType>(Ty->getReturnType());
+ StructType *STy = dyn_cast<StructType>(Ty->getReturnType());
if (STy == 0 || STy->getNumElements() != NumOutputs)
return false;
break;
diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp
index 02c0757..73191c1 100644
--- a/lib/VMCore/Instruction.cpp
+++ b/lib/VMCore/Instruction.cpp
@@ -20,7 +20,7 @@
#include "llvm/Support/LeakDetector.h"
using namespace llvm;
-Instruction::Instruction(const Type *ty, unsigned it, Use *Ops, unsigned NumOps,
+Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps,
Instruction *InsertBefore)
: User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(0) {
// Make sure that we get added to a basicblock
@@ -34,7 +34,7 @@ Instruction::Instruction(const Type *ty, unsigned it, Use *Ops, unsigned NumOps,
}
}
-Instruction::Instruction(const Type *ty, unsigned it, Use *Ops, unsigned NumOps,
+Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps,
BasicBlock *InsertAtEnd)
: User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(0) {
// Make sure that we get added to a basicblock
@@ -101,6 +101,7 @@ const char *Instruction::getOpcodeName(unsigned OpCode) {
case Switch: return "switch";
case IndirectBr: return "indirectbr";
case Invoke: return "invoke";
+ case Resume: return "resume";
case Unwind: return "unwind";
case Unreachable: return "unreachable";
@@ -127,6 +128,9 @@ const char *Instruction::getOpcodeName(unsigned OpCode) {
case Alloca: return "alloca";
case Load: return "load";
case Store: return "store";
+ case AtomicCmpXchg: return "cmpxchg";
+ case AtomicRMW: return "atomicrmw";
+ case Fence: return "fence";
case GetElementPtr: return "getelementptr";
// Convert instructions...
@@ -158,6 +162,7 @@ const char *Instruction::getOpcodeName(unsigned OpCode) {
case ShuffleVector: return "shufflevector";
case ExtractValue: return "extractvalue";
case InsertValue: return "insertvalue";
+ case LandingPad: return "landingpad";
default: return "<Invalid operator> ";
}
@@ -191,10 +196,14 @@ bool Instruction::isIdenticalToWhenDefined(const Instruction *I) const {
// Check special state that is a part of some instructions.
if (const LoadInst *LI = dyn_cast<LoadInst>(this))
return LI->isVolatile() == cast<LoadInst>(I)->isVolatile() &&
- LI->getAlignment() == cast<LoadInst>(I)->getAlignment();
+ LI->getAlignment() == cast<LoadInst>(I)->getAlignment() &&
+ LI->getOrdering() == cast<LoadInst>(I)->getOrdering() &&
+ LI->getSynchScope() == cast<LoadInst>(I)->getSynchScope();
if (const StoreInst *SI = dyn_cast<StoreInst>(this))
return SI->isVolatile() == cast<StoreInst>(I)->isVolatile() &&
- SI->getAlignment() == cast<StoreInst>(I)->getAlignment();
+ SI->getAlignment() == cast<StoreInst>(I)->getAlignment() &&
+ SI->getOrdering() == cast<StoreInst>(I)->getOrdering() &&
+ SI->getSynchScope() == cast<StoreInst>(I)->getSynchScope();
if (const CmpInst *CI = dyn_cast<CmpInst>(this))
return CI->getPredicate() == cast<CmpInst>(I)->getPredicate();
if (const CallInst *CI = dyn_cast<CallInst>(this))
@@ -208,6 +217,18 @@ bool Instruction::isIdenticalToWhenDefined(const Instruction *I) const {
return IVI->getIndices() == cast<InsertValueInst>(I)->getIndices();
if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this))
return EVI->getIndices() == cast<ExtractValueInst>(I)->getIndices();
+ if (const FenceInst *FI = dyn_cast<FenceInst>(this))
+ return FI->getOrdering() == cast<FenceInst>(FI)->getOrdering() &&
+ FI->getSynchScope() == cast<FenceInst>(FI)->getSynchScope();
+ if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(this))
+ return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I)->isVolatile() &&
+ CXI->getOrdering() == cast<AtomicCmpXchgInst>(I)->getOrdering() &&
+ CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I)->getSynchScope();
+ if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(this))
+ return RMWI->getOperation() == cast<AtomicRMWInst>(I)->getOperation() &&
+ RMWI->isVolatile() == cast<AtomicRMWInst>(I)->isVolatile() &&
+ RMWI->getOrdering() == cast<AtomicRMWInst>(I)->getOrdering() &&
+ RMWI->getSynchScope() == cast<AtomicRMWInst>(I)->getSynchScope();
return true;
}
@@ -230,10 +251,14 @@ bool Instruction::isSameOperationAs(const Instruction *I) const {
// Check special state that is a part of some instructions.
if (const LoadInst *LI = dyn_cast<LoadInst>(this))
return LI->isVolatile() == cast<LoadInst>(I)->isVolatile() &&
- LI->getAlignment() == cast<LoadInst>(I)->getAlignment();
+ LI->getAlignment() == cast<LoadInst>(I)->getAlignment() &&
+ LI->getOrdering() == cast<LoadInst>(I)->getOrdering() &&
+ LI->getSynchScope() == cast<LoadInst>(I)->getSynchScope();
if (const StoreInst *SI = dyn_cast<StoreInst>(this))
return SI->isVolatile() == cast<StoreInst>(I)->isVolatile() &&
- SI->getAlignment() == cast<StoreInst>(I)->getAlignment();
+ SI->getAlignment() == cast<StoreInst>(I)->getAlignment() &&
+ SI->getOrdering() == cast<StoreInst>(I)->getOrdering() &&
+ SI->getSynchScope() == cast<StoreInst>(I)->getSynchScope();
if (const CmpInst *CI = dyn_cast<CmpInst>(this))
return CI->getPredicate() == cast<CmpInst>(I)->getPredicate();
if (const CallInst *CI = dyn_cast<CallInst>(this))
@@ -248,6 +273,18 @@ bool Instruction::isSameOperationAs(const Instruction *I) const {
return IVI->getIndices() == cast<InsertValueInst>(I)->getIndices();
if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this))
return EVI->getIndices() == cast<ExtractValueInst>(I)->getIndices();
+ if (const FenceInst *FI = dyn_cast<FenceInst>(this))
+ return FI->getOrdering() == cast<FenceInst>(I)->getOrdering() &&
+ FI->getSynchScope() == cast<FenceInst>(I)->getSynchScope();
+ if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(this))
+ return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I)->isVolatile() &&
+ CXI->getOrdering() == cast<AtomicCmpXchgInst>(I)->getOrdering() &&
+ CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I)->getSynchScope();
+ if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(this))
+ return RMWI->getOperation() == cast<AtomicRMWInst>(I)->getOperation() &&
+ RMWI->isVolatile() == cast<AtomicRMWInst>(I)->isVolatile() &&
+ RMWI->getOrdering() == cast<AtomicRMWInst>(I)->getOrdering() &&
+ RMWI->getSynchScope() == cast<AtomicRMWInst>(I)->getSynchScope();
return true;
}
@@ -280,13 +317,16 @@ bool Instruction::mayReadFromMemory() const {
default: return false;
case Instruction::VAArg:
case Instruction::Load:
+ case Instruction::Fence: // FIXME: refine definition of mayReadFromMemory
+ case Instruction::AtomicCmpXchg:
+ case Instruction::AtomicRMW:
return true;
case Instruction::Call:
return !cast<CallInst>(this)->doesNotAccessMemory();
case Instruction::Invoke:
return !cast<InvokeInst>(this)->doesNotAccessMemory();
case Instruction::Store:
- return cast<StoreInst>(this)->isVolatile();
+ return !cast<StoreInst>(this)->isUnordered();
}
}
@@ -295,15 +335,18 @@ bool Instruction::mayReadFromMemory() const {
bool Instruction::mayWriteToMemory() const {
switch (getOpcode()) {
default: return false;
+ case Instruction::Fence: // FIXME: refine definition of mayWriteToMemory
case Instruction::Store:
case Instruction::VAArg:
+ case Instruction::AtomicCmpXchg:
+ case Instruction::AtomicRMW:
return true;
case Instruction::Call:
return !cast<CallInst>(this)->onlyReadsMemory();
case Instruction::Invoke:
return !cast<InvokeInst>(this)->onlyReadsMemory();
case Instruction::Load:
- return cast<LoadInst>(this)->isVolatile();
+ return !cast<LoadInst>(this)->isUnordered();
}
}
@@ -312,7 +355,7 @@ bool Instruction::mayWriteToMemory() const {
bool Instruction::mayThrow() const {
if (const CallInst *CI = dyn_cast<CallInst>(this))
return !CI->doesNotThrow();
- return false;
+ return isa<ResumeInst>(this);
}
/// isAssociative - Return true if the instruction is associative:
@@ -372,7 +415,7 @@ bool Instruction::isSafeToSpeculativelyExecute() const {
}
case Load: {
const LoadInst *LI = cast<LoadInst>(this);
- if (LI->isVolatile())
+ if (!LI->isUnordered())
return false;
return LI->getPointerOperand()->isDereferenceablePointer();
}
@@ -392,6 +435,11 @@ bool Instruction::isSafeToSpeculativelyExecute() const {
case Switch:
case Unwind:
case Unreachable:
+ case Fence:
+ case LandingPad:
+ case AtomicRMW:
+ case AtomicCmpXchg:
+ case Resume:
return false; // Misc instructions which have effects
}
}
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index 9baad09..b3a7205 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -62,11 +62,11 @@ const char *SelectInst::areInvalidOperands(Value *Op0, Value *Op1, Value *Op2) {
if (Op1->getType() != Op2->getType())
return "both values to select must have same type";
- if (const VectorType *VT = dyn_cast<VectorType>(Op0->getType())) {
+ if (VectorType *VT = dyn_cast<VectorType>(Op0->getType())) {
// Vector select.
if (VT->getElementType() != Type::getInt1Ty(Op0->getContext()))
return "vector select condition element type must be i1";
- const VectorType *ET = dyn_cast<VectorType>(Op1->getType());
+ VectorType *ET = dyn_cast<VectorType>(Op1->getType());
if (ET == 0)
return "selected values for vector select must be vectors";
if (ET->getNumElements() != VT->getNumElements())
@@ -166,6 +166,88 @@ Value *PHINode::hasConstantValue() const {
return ConstantValue;
}
+//===----------------------------------------------------------------------===//
+// LandingPadInst Implementation
+//===----------------------------------------------------------------------===//
+
+LandingPadInst::LandingPadInst(Type *RetTy, Value *PersonalityFn,
+ unsigned NumReservedValues, const Twine &NameStr,
+ Instruction *InsertBefore)
+ : Instruction(RetTy, Instruction::LandingPad, 0, 0, InsertBefore) {
+ init(PersonalityFn, 1 + NumReservedValues, NameStr);
+}
+
+LandingPadInst::LandingPadInst(Type *RetTy, Value *PersonalityFn,
+ unsigned NumReservedValues, const Twine &NameStr,
+ BasicBlock *InsertAtEnd)
+ : Instruction(RetTy, Instruction::LandingPad, 0, 0, InsertAtEnd) {
+ init(PersonalityFn, 1 + NumReservedValues, NameStr);
+}
+
+LandingPadInst::LandingPadInst(const LandingPadInst &LP)
+ : Instruction(LP.getType(), Instruction::LandingPad,
+ allocHungoffUses(LP.getNumOperands()), LP.getNumOperands()),
+ ReservedSpace(LP.getNumOperands()) {
+ Use *OL = OperandList, *InOL = LP.OperandList;
+ for (unsigned I = 0, E = ReservedSpace; I != E; ++I)
+ OL[I] = InOL[I];
+
+ setCleanup(LP.isCleanup());
+}
+
+LandingPadInst::~LandingPadInst() {
+ dropHungoffUses();
+}
+
+LandingPadInst *LandingPadInst::Create(Type *RetTy, Value *PersonalityFn,
+ unsigned NumReservedClauses,
+ const Twine &NameStr,
+ Instruction *InsertBefore) {
+ return new LandingPadInst(RetTy, PersonalityFn, NumReservedClauses, NameStr,
+ InsertBefore);
+}
+
+LandingPadInst *LandingPadInst::Create(Type *RetTy, Value *PersonalityFn,
+ unsigned NumReservedClauses,
+ const Twine &NameStr,
+ BasicBlock *InsertAtEnd) {
+ return new LandingPadInst(RetTy, PersonalityFn, NumReservedClauses, NameStr,
+ InsertAtEnd);
+}
+
+void LandingPadInst::init(Value *PersFn, unsigned NumReservedValues,
+ const Twine &NameStr) {
+ ReservedSpace = NumReservedValues;
+ NumOperands = 1;
+ OperandList = allocHungoffUses(ReservedSpace);
+ OperandList[0] = PersFn;
+ setName(NameStr);
+ setCleanup(false);
+}
+
+/// growOperands - grow operands - This grows the operand list in response to a
+/// push_back style of operation. This grows the number of ops by 2 times.
+void LandingPadInst::growOperands(unsigned Size) {
+ unsigned e = getNumOperands();
+ if (ReservedSpace >= e + Size) return;
+ ReservedSpace = (e + Size / 2) * 2;
+
+ Use *NewOps = allocHungoffUses(ReservedSpace);
+ Use *OldOps = OperandList;
+ for (unsigned i = 0; i != e; ++i)
+ NewOps[i] = OldOps[i];
+
+ OperandList = NewOps;
+ Use::zap(OldOps, OldOps + e, true);
+}
+
+void LandingPadInst::addClause(Value *Val) {
+ unsigned OpNo = getNumOperands();
+ growOperands(1);
+ assert(OpNo < ReservedSpace && "Growing didn't work!");
+ ++NumOperands;
+ OperandList[OpNo] = Val;
+}
//===----------------------------------------------------------------------===//
// CallInst Implementation
@@ -179,7 +261,7 @@ void CallInst::init(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr) {
Op<-1>() = Func;
#ifndef NDEBUG
- const FunctionType *FTy =
+ FunctionType *FTy =
cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
assert((Args.size() == FTy->getNumParams() ||
@@ -201,7 +283,7 @@ void CallInst::init(Value *Func, const Twine &NameStr) {
Op<-1>() = Func;
#ifndef NDEBUG
- const FunctionType *FTy =
+ FunctionType *FTy =
cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
assert(FTy->getNumParams() == 0 && "Calling a function with bad signature");
@@ -269,8 +351,8 @@ static bool IsConstantOne(Value *val) {
}
static Instruction *createMalloc(Instruction *InsertBefore,
- BasicBlock *InsertAtEnd, const Type *IntPtrTy,
- const Type *AllocTy, Value *AllocSize,
+ BasicBlock *InsertAtEnd, Type *IntPtrTy,
+ Type *AllocTy, Value *AllocSize,
Value *ArraySize, Function *MallocF,
const Twine &Name) {
assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) &&
@@ -319,7 +401,7 @@ static Instruction *createMalloc(Instruction *InsertBefore,
if (!MallocFunc)
// prototype malloc as "void *malloc(size_t)"
MallocFunc = M->getOrInsertFunction("malloc", BPTy, IntPtrTy, NULL);
- const PointerType *AllocPtrType = PointerType::getUnqual(AllocTy);
+ PointerType *AllocPtrType = PointerType::getUnqual(AllocTy);
CallInst *MCall = NULL;
Instruction *Result = NULL;
if (InsertBefore) {
@@ -354,7 +436,7 @@ static Instruction *createMalloc(Instruction *InsertBefore,
/// 2. Call malloc with that argument.
/// 3. Bitcast the result of the malloc call to the specified type.
Instruction *CallInst::CreateMalloc(Instruction *InsertBefore,
- const Type *IntPtrTy, const Type *AllocTy,
+ Type *IntPtrTy, Type *AllocTy,
Value *AllocSize, Value *ArraySize,
Function * MallocF,
const Twine &Name) {
@@ -371,7 +453,7 @@ Instruction *CallInst::CreateMalloc(Instruction *InsertBefore,
/// Note: This function does not add the bitcast to the basic block, that is the
/// responsibility of the caller.
Instruction *CallInst::CreateMalloc(BasicBlock *InsertAtEnd,
- const Type *IntPtrTy, const Type *AllocTy,
+ Type *IntPtrTy, Type *AllocTy,
Value *AllocSize, Value *ArraySize,
Function *MallocF, const Twine &Name) {
return createMalloc(NULL, InsertAtEnd, IntPtrTy, AllocTy, AllocSize,
@@ -388,8 +470,8 @@ static Instruction* createFree(Value* Source, Instruction *InsertBefore,
BasicBlock* BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd;
Module* M = BB->getParent()->getParent();
- const Type *VoidTy = Type::getVoidTy(M->getContext());
- const Type *IntPtrTy = Type::getInt8PtrTy(M->getContext());
+ Type *VoidTy = Type::getVoidTy(M->getContext());
+ Type *IntPtrTy = Type::getInt8PtrTy(M->getContext());
// prototype free as "void free(void*)"
Value *FreeFunc = M->getOrInsertFunction("free", VoidTy, IntPtrTy, NULL);
CallInst* Result = NULL;
@@ -436,7 +518,7 @@ void InvokeInst::init(Value *Fn, BasicBlock *IfNormal, BasicBlock *IfException,
Op<-1>() = IfException;
#ifndef NDEBUG
- const FunctionType *FTy =
+ FunctionType *FTy =
cast<FunctionType>(cast<PointerType>(Fn->getType())->getElementType());
assert(((Args.size() == FTy->getNumParams()) ||
@@ -494,6 +576,9 @@ void InvokeInst::removeAttribute(unsigned i, Attributes attr) {
setAttributes(PAL);
}
+LandingPadInst *InvokeInst::getLandingPadInst() const {
+ return cast<LandingPadInst>(getUnwindDest()->getFirstNonPHI());
+}
//===----------------------------------------------------------------------===//
// ReturnInst Implementation
@@ -574,6 +659,41 @@ BasicBlock *UnwindInst::getSuccessorV(unsigned idx) const {
}
//===----------------------------------------------------------------------===//
+// ResumeInst Implementation
+//===----------------------------------------------------------------------===//
+
+ResumeInst::ResumeInst(const ResumeInst &RI)
+ : TerminatorInst(Type::getVoidTy(RI.getContext()), Instruction::Resume,
+ OperandTraits<ResumeInst>::op_begin(this), 1) {
+ Op<0>() = RI.Op<0>();
+}
+
+ResumeInst::ResumeInst(Value *Exn, Instruction *InsertBefore)
+ : TerminatorInst(Type::getVoidTy(Exn->getContext()), Instruction::Resume,
+ OperandTraits<ResumeInst>::op_begin(this), 1, InsertBefore) {
+ Op<0>() = Exn;
+}
+
+ResumeInst::ResumeInst(Value *Exn, BasicBlock *InsertAtEnd)
+ : TerminatorInst(Type::getVoidTy(Exn->getContext()), Instruction::Resume,
+ OperandTraits<ResumeInst>::op_begin(this), 1, InsertAtEnd) {
+ Op<0>() = Exn;
+}
+
+unsigned ResumeInst::getNumSuccessorsV() const {
+ return getNumSuccessors();
+}
+
+void ResumeInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
+ llvm_unreachable("ResumeInst has no successors!");
+}
+
+BasicBlock *ResumeInst::getSuccessorV(unsigned idx) const {
+ llvm_unreachable("ResumeInst has no successors!");
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
// UnreachableInst Implementation
//===----------------------------------------------------------------------===//
@@ -665,6 +785,27 @@ BranchInst::BranchInst(const BranchInst &BI) :
SubclassOptionalData = BI.SubclassOptionalData;
}
+void BranchInst::swapSuccessors() {
+ assert(isConditional() &&
+ "Cannot swap successors of an unconditional branch");
+ Op<-1>().swap(Op<-2>());
+
+ // Update profile metadata if present and it matches our structural
+ // expectations.
+ MDNode *ProfileData = getMetadata(LLVMContext::MD_prof);
+ if (!ProfileData || ProfileData->getNumOperands() != 3)
+ return;
+
+ // The first operand is the name. Fetch them backwards and build a new one.
+ Value *Ops[] = {
+ ProfileData->getOperand(0),
+ ProfileData->getOperand(2),
+ ProfileData->getOperand(1)
+ };
+ setMetadata(LLVMContext::MD_prof,
+ MDNode::get(ProfileData->getContext(), Ops));
+}
+
BasicBlock *BranchInst::getSuccessorV(unsigned idx) const {
return getSuccessor(idx);
}
@@ -692,7 +833,7 @@ static Value *getAISize(LLVMContext &Context, Value *Amt) {
return Amt;
}
-AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize,
+AllocaInst::AllocaInst(Type *Ty, Value *ArraySize,
const Twine &Name, Instruction *InsertBefore)
: UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
getAISize(Ty->getContext(), ArraySize), InsertBefore) {
@@ -701,7 +842,7 @@ AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize,
setName(Name);
}
-AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize,
+AllocaInst::AllocaInst(Type *Ty, Value *ArraySize,
const Twine &Name, BasicBlock *InsertAtEnd)
: UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
getAISize(Ty->getContext(), ArraySize), InsertAtEnd) {
@@ -710,7 +851,7 @@ AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize,
setName(Name);
}
-AllocaInst::AllocaInst(const Type *Ty, const Twine &Name,
+AllocaInst::AllocaInst(Type *Ty, const Twine &Name,
Instruction *InsertBefore)
: UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
getAISize(Ty->getContext(), 0), InsertBefore) {
@@ -719,7 +860,7 @@ AllocaInst::AllocaInst(const Type *Ty, const Twine &Name,
setName(Name);
}
-AllocaInst::AllocaInst(const Type *Ty, const Twine &Name,
+AllocaInst::AllocaInst(Type *Ty, const Twine &Name,
BasicBlock *InsertAtEnd)
: UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
getAISize(Ty->getContext(), 0), InsertAtEnd) {
@@ -728,7 +869,7 @@ AllocaInst::AllocaInst(const Type *Ty, const Twine &Name,
setName(Name);
}
-AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize, unsigned Align,
+AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, unsigned Align,
const Twine &Name, Instruction *InsertBefore)
: UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
getAISize(Ty->getContext(), ArraySize), InsertBefore) {
@@ -737,7 +878,7 @@ AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize, unsigned Align,
setName(Name);
}
-AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize, unsigned Align,
+AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, unsigned Align,
const Twine &Name, BasicBlock *InsertAtEnd)
: UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
getAISize(Ty->getContext(), ArraySize), InsertAtEnd) {
@@ -787,6 +928,8 @@ bool AllocaInst::isStaticAlloca() const {
void LoadInst::AssertOK() {
assert(getOperand(0)->getType()->isPointerTy() &&
"Ptr must have pointer type.");
+ assert(!(isAtomic() && getAlignment() == 0) &&
+ "Alignment required for atomic load");
}
LoadInst::LoadInst(Value *Ptr, const Twine &Name, Instruction *InsertBef)
@@ -794,6 +937,7 @@ LoadInst::LoadInst(Value *Ptr, const Twine &Name, Instruction *InsertBef)
Load, Ptr, InsertBef) {
setVolatile(false);
setAlignment(0);
+ setAtomic(NotAtomic);
AssertOK();
setName(Name);
}
@@ -803,6 +947,7 @@ LoadInst::LoadInst(Value *Ptr, const Twine &Name, BasicBlock *InsertAE)
Load, Ptr, InsertAE) {
setVolatile(false);
setAlignment(0);
+ setAtomic(NotAtomic);
AssertOK();
setName(Name);
}
@@ -813,6 +958,18 @@ LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
Load, Ptr, InsertBef) {
setVolatile(isVolatile);
setAlignment(0);
+ setAtomic(NotAtomic);
+ AssertOK();
+ setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
+ BasicBlock *InsertAE)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertAE) {
+ setVolatile(isVolatile);
+ setAlignment(0);
+ setAtomic(NotAtomic);
AssertOK();
setName(Name);
}
@@ -823,6 +980,7 @@ LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
Load, Ptr, InsertBef) {
setVolatile(isVolatile);
setAlignment(Align);
+ setAtomic(NotAtomic);
AssertOK();
setName(Name);
}
@@ -833,27 +991,43 @@ LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
Load, Ptr, InsertAE) {
setVolatile(isVolatile);
setAlignment(Align);
+ setAtomic(NotAtomic);
AssertOK();
setName(Name);
}
-LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
+ unsigned Align, AtomicOrdering Order,
+ SynchronizationScope SynchScope,
+ Instruction *InsertBef)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertBef) {
+ setVolatile(isVolatile);
+ setAlignment(Align);
+ setAtomic(Order, SynchScope);
+ AssertOK();
+ setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
+ unsigned Align, AtomicOrdering Order,
+ SynchronizationScope SynchScope,
BasicBlock *InsertAE)
: UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
Load, Ptr, InsertAE) {
setVolatile(isVolatile);
- setAlignment(0);
+ setAlignment(Align);
+ setAtomic(Order, SynchScope);
AssertOK();
setName(Name);
}
-
-
LoadInst::LoadInst(Value *Ptr, const char *Name, Instruction *InsertBef)
: UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
Load, Ptr, InsertBef) {
setVolatile(false);
setAlignment(0);
+ setAtomic(NotAtomic);
AssertOK();
if (Name && Name[0]) setName(Name);
}
@@ -863,6 +1037,7 @@ LoadInst::LoadInst(Value *Ptr, const char *Name, BasicBlock *InsertAE)
Load, Ptr, InsertAE) {
setVolatile(false);
setAlignment(0);
+ setAtomic(NotAtomic);
AssertOK();
if (Name && Name[0]) setName(Name);
}
@@ -873,6 +1048,7 @@ LoadInst::LoadInst(Value *Ptr, const char *Name, bool isVolatile,
Load, Ptr, InsertBef) {
setVolatile(isVolatile);
setAlignment(0);
+ setAtomic(NotAtomic);
AssertOK();
if (Name && Name[0]) setName(Name);
}
@@ -883,6 +1059,7 @@ LoadInst::LoadInst(Value *Ptr, const char *Name, bool isVolatile,
Load, Ptr, InsertAE) {
setVolatile(isVolatile);
setAlignment(0);
+ setAtomic(NotAtomic);
AssertOK();
if (Name && Name[0]) setName(Name);
}
@@ -891,7 +1068,7 @@ void LoadInst::setAlignment(unsigned Align) {
assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
assert(Align <= MaximumAlignment &&
"Alignment is greater than MaximumAlignment!");
- setInstructionSubclassData((getSubclassDataFromInstruction() & 1) |
+ setInstructionSubclassData((getSubclassDataFromInstruction() & ~(31 << 1)) |
((Log2_32(Align)+1)<<1));
assert(getAlignment() == Align && "Alignment representation error!");
}
@@ -907,6 +1084,8 @@ void StoreInst::AssertOK() {
assert(getOperand(0)->getType() ==
cast<PointerType>(getOperand(1)->getType())->getElementType()
&& "Ptr must be a pointer to Val type!");
+ assert(!(isAtomic() && getAlignment() == 0) &&
+ "Alignment required for atomic load");
}
@@ -919,6 +1098,7 @@ StoreInst::StoreInst(Value *val, Value *addr, Instruction *InsertBefore)
Op<1>() = addr;
setVolatile(false);
setAlignment(0);
+ setAtomic(NotAtomic);
AssertOK();
}
@@ -931,6 +1111,7 @@ StoreInst::StoreInst(Value *val, Value *addr, BasicBlock *InsertAtEnd)
Op<1>() = addr;
setVolatile(false);
setAlignment(0);
+ setAtomic(NotAtomic);
AssertOK();
}
@@ -944,6 +1125,7 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
Op<1>() = addr;
setVolatile(isVolatile);
setAlignment(0);
+ setAtomic(NotAtomic);
AssertOK();
}
@@ -957,6 +1139,37 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
Op<1>() = addr;
setVolatile(isVolatile);
setAlignment(Align);
+ setAtomic(NotAtomic);
+ AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+ unsigned Align, AtomicOrdering Order,
+ SynchronizationScope SynchScope,
+ Instruction *InsertBefore)
+ : Instruction(Type::getVoidTy(val->getContext()), Store,
+ OperandTraits<StoreInst>::op_begin(this),
+ OperandTraits<StoreInst>::operands(this),
+ InsertBefore) {
+ Op<0>() = val;
+ Op<1>() = addr;
+ setVolatile(isVolatile);
+ setAlignment(Align);
+ setAtomic(Order, SynchScope);
+ AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+ BasicBlock *InsertAtEnd)
+ : Instruction(Type::getVoidTy(val->getContext()), Store,
+ OperandTraits<StoreInst>::op_begin(this),
+ OperandTraits<StoreInst>::operands(this),
+ InsertAtEnd) {
+ Op<0>() = val;
+ Op<1>() = addr;
+ setVolatile(isVolatile);
+ setAlignment(0);
+ setAtomic(NotAtomic);
AssertOK();
}
@@ -970,10 +1183,13 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
Op<1>() = addr;
setVolatile(isVolatile);
setAlignment(Align);
+ setAtomic(NotAtomic);
AssertOK();
}
StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+ unsigned Align, AtomicOrdering Order,
+ SynchronizationScope SynchScope,
BasicBlock *InsertAtEnd)
: Instruction(Type::getVoidTy(val->getContext()), Store,
OperandTraits<StoreInst>::op_begin(this),
@@ -982,7 +1198,8 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
Op<0>() = val;
Op<1>() = addr;
setVolatile(isVolatile);
- setAlignment(0);
+ setAlignment(Align);
+ setAtomic(Order, SynchScope);
AssertOK();
}
@@ -990,37 +1207,135 @@ void StoreInst::setAlignment(unsigned Align) {
assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
assert(Align <= MaximumAlignment &&
"Alignment is greater than MaximumAlignment!");
- setInstructionSubclassData((getSubclassDataFromInstruction() & 1) |
+ setInstructionSubclassData((getSubclassDataFromInstruction() & ~(31 << 1)) |
((Log2_32(Align)+1) << 1));
assert(getAlignment() == Align && "Alignment representation error!");
}
//===----------------------------------------------------------------------===//
-// GetElementPtrInst Implementation
+// AtomicCmpXchgInst Implementation
//===----------------------------------------------------------------------===//
-static unsigned retrieveAddrSpace(const Value *Val) {
- return cast<PointerType>(Val->getType())->getAddressSpace();
+void AtomicCmpXchgInst::Init(Value *Ptr, Value *Cmp, Value *NewVal,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ Op<0>() = Ptr;
+ Op<1>() = Cmp;
+ Op<2>() = NewVal;
+ setOrdering(Ordering);
+ setSynchScope(SynchScope);
+
+ assert(getOperand(0) && getOperand(1) && getOperand(2) &&
+ "All operands must be non-null!");
+ assert(getOperand(0)->getType()->isPointerTy() &&
+ "Ptr must have pointer type!");
+ assert(getOperand(1)->getType() ==
+ cast<PointerType>(getOperand(0)->getType())->getElementType()
+ && "Ptr must be a pointer to Cmp type!");
+ assert(getOperand(2)->getType() ==
+ cast<PointerType>(getOperand(0)->getType())->getElementType()
+ && "Ptr must be a pointer to NewVal type!");
+ assert(Ordering != NotAtomic &&
+ "AtomicCmpXchg instructions must be atomic!");
+}
+
+AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope,
+ Instruction *InsertBefore)
+ : Instruction(Cmp->getType(), AtomicCmpXchg,
+ OperandTraits<AtomicCmpXchgInst>::op_begin(this),
+ OperandTraits<AtomicCmpXchgInst>::operands(this),
+ InsertBefore) {
+ Init(Ptr, Cmp, NewVal, Ordering, SynchScope);
}
-void GetElementPtrInst::init(Value *Ptr, Value* const *Idx, unsigned NumIdx,
- const Twine &Name) {
- assert(NumOperands == 1+NumIdx && "NumOperands not initialized?");
- Use *OL = OperandList;
- OL[0] = Ptr;
+AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope,
+ BasicBlock *InsertAtEnd)
+ : Instruction(Cmp->getType(), AtomicCmpXchg,
+ OperandTraits<AtomicCmpXchgInst>::op_begin(this),
+ OperandTraits<AtomicCmpXchgInst>::operands(this),
+ InsertAtEnd) {
+ Init(Ptr, Cmp, NewVal, Ordering, SynchScope);
+}
+
+//===----------------------------------------------------------------------===//
+// AtomicRMWInst Implementation
+//===----------------------------------------------------------------------===//
- for (unsigned i = 0; i != NumIdx; ++i)
- OL[i+1] = Idx[i];
+void AtomicRMWInst::Init(BinOp Operation, Value *Ptr, Value *Val,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ Op<0>() = Ptr;
+ Op<1>() = Val;
+ setOperation(Operation);
+ setOrdering(Ordering);
+ setSynchScope(SynchScope);
- setName(Name);
+ assert(getOperand(0) && getOperand(1) &&
+ "All operands must be non-null!");
+ assert(getOperand(0)->getType()->isPointerTy() &&
+ "Ptr must have pointer type!");
+ assert(getOperand(1)->getType() ==
+ cast<PointerType>(getOperand(0)->getType())->getElementType()
+ && "Ptr must be a pointer to Val type!");
+ assert(Ordering != NotAtomic &&
+ "AtomicRMW instructions must be atomic!");
}
-void GetElementPtrInst::init(Value *Ptr, Value *Idx, const Twine &Name) {
- assert(NumOperands == 2 && "NumOperands not initialized?");
- Use *OL = OperandList;
- OL[0] = Ptr;
- OL[1] = Idx;
+AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope,
+ Instruction *InsertBefore)
+ : Instruction(Val->getType(), AtomicRMW,
+ OperandTraits<AtomicRMWInst>::op_begin(this),
+ OperandTraits<AtomicRMWInst>::operands(this),
+ InsertBefore) {
+ Init(Operation, Ptr, Val, Ordering, SynchScope);
+}
+
+AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope,
+ BasicBlock *InsertAtEnd)
+ : Instruction(Val->getType(), AtomicRMW,
+ OperandTraits<AtomicRMWInst>::op_begin(this),
+ OperandTraits<AtomicRMWInst>::operands(this),
+ InsertAtEnd) {
+ Init(Operation, Ptr, Val, Ordering, SynchScope);
+}
+
+//===----------------------------------------------------------------------===//
+// FenceInst Implementation
+//===----------------------------------------------------------------------===//
+
+FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering,
+ SynchronizationScope SynchScope,
+ Instruction *InsertBefore)
+ : Instruction(Type::getVoidTy(C), Fence, 0, 0, InsertBefore) {
+ setOrdering(Ordering);
+ setSynchScope(SynchScope);
+}
+FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering,
+ SynchronizationScope SynchScope,
+ BasicBlock *InsertAtEnd)
+ : Instruction(Type::getVoidTy(C), Fence, 0, 0, InsertAtEnd) {
+ setOrdering(Ordering);
+ setSynchScope(SynchScope);
+}
+
+//===----------------------------------------------------------------------===//
+// GetElementPtrInst Implementation
+//===----------------------------------------------------------------------===//
+
+void GetElementPtrInst::init(Value *Ptr, ArrayRef<Value *> IdxList,
+ const Twine &Name) {
+ assert(NumOperands == 1 + IdxList.size() && "NumOperands not initialized?");
+ OperandList[0] = Ptr;
+ std::copy(IdxList.begin(), IdxList.end(), op_begin() + 1);
setName(Name);
}
@@ -1029,34 +1344,10 @@ GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI)
OperandTraits<GetElementPtrInst>::op_end(this)
- GEPI.getNumOperands(),
GEPI.getNumOperands()) {
- Use *OL = OperandList;
- Use *GEPIOL = GEPI.OperandList;
- for (unsigned i = 0, E = NumOperands; i != E; ++i)
- OL[i] = GEPIOL[i];
+ std::copy(GEPI.op_begin(), GEPI.op_end(), op_begin());
SubclassOptionalData = GEPI.SubclassOptionalData;
}
-GetElementPtrInst::GetElementPtrInst(Value *Ptr, Value *Idx,
- const Twine &Name, Instruction *InBe)
- : Instruction(PointerType::get(
- checkGEPType(getIndexedType(Ptr->getType(),Idx)), retrieveAddrSpace(Ptr)),
- GetElementPtr,
- OperandTraits<GetElementPtrInst>::op_end(this) - 2,
- 2, InBe) {
- init(Ptr, Idx, Name);
-}
-
-GetElementPtrInst::GetElementPtrInst(Value *Ptr, Value *Idx,
- const Twine &Name, BasicBlock *IAE)
- : Instruction(PointerType::get(
- checkGEPType(getIndexedType(Ptr->getType(),Idx)),
- retrieveAddrSpace(Ptr)),
- GetElementPtr,
- OperandTraits<GetElementPtrInst>::op_end(this) - 2,
- 2, IAE) {
- init(Ptr, Idx, Name);
-}
-
/// getIndexedType - Returns the type of the element that would be accessed with
/// a gep instruction with the specified parameters.
///
@@ -1067,14 +1358,13 @@ GetElementPtrInst::GetElementPtrInst(Value *Ptr, Value *Idx,
/// pointer type.
///
template <typename IndexTy>
-static Type *getIndexedTypeInternal(const Type *Ptr, IndexTy const *Idxs,
- unsigned NumIdx) {
- const PointerType *PTy = dyn_cast<PointerType>(Ptr);
+static Type *getIndexedTypeInternal(Type *Ptr, ArrayRef<IndexTy> IdxList) {
+ PointerType *PTy = dyn_cast<PointerType>(Ptr);
if (!PTy) return 0; // Type isn't a pointer type!
Type *Agg = PTy->getElementType();
// Handle the special case of the empty set index set, which is always valid.
- if (NumIdx == 0)
+ if (IdxList.empty())
return Agg;
// If there is at least one index, the top level type must be sized, otherwise
@@ -1083,44 +1373,29 @@ static Type *getIndexedTypeInternal(const Type *Ptr, IndexTy const *Idxs,
return 0;
unsigned CurIdx = 1;
- for (; CurIdx != NumIdx; ++CurIdx) {
+ for (; CurIdx != IdxList.size(); ++CurIdx) {
CompositeType *CT = dyn_cast<CompositeType>(Agg);
if (!CT || CT->isPointerTy()) return 0;
- IndexTy Index = Idxs[CurIdx];
+ IndexTy Index = IdxList[CurIdx];
if (!CT->indexValid(Index)) return 0;
Agg = CT->getTypeAtIndex(Index);
}
- return CurIdx == NumIdx ? Agg : 0;
+ return CurIdx == IdxList.size() ? Agg : 0;
}
-Type *GetElementPtrInst::getIndexedType(const Type *Ptr, Value* const *Idxs,
- unsigned NumIdx) {
- return getIndexedTypeInternal(Ptr, Idxs, NumIdx);
+Type *GetElementPtrInst::getIndexedType(Type *Ptr, ArrayRef<Value *> IdxList) {
+ return getIndexedTypeInternal(Ptr, IdxList);
}
-Type *GetElementPtrInst::getIndexedType(const Type *Ptr,
- Constant* const *Idxs,
- unsigned NumIdx) {
- return getIndexedTypeInternal(Ptr, Idxs, NumIdx);
+Type *GetElementPtrInst::getIndexedType(Type *Ptr,
+ ArrayRef<Constant *> IdxList) {
+ return getIndexedTypeInternal(Ptr, IdxList);
}
-Type *GetElementPtrInst::getIndexedType(const Type *Ptr,
- uint64_t const *Idxs,
- unsigned NumIdx) {
- return getIndexedTypeInternal(Ptr, Idxs, NumIdx);
+Type *GetElementPtrInst::getIndexedType(Type *Ptr, ArrayRef<uint64_t> IdxList) {
+ return getIndexedTypeInternal(Ptr, IdxList);
}
-Type *GetElementPtrInst::getIndexedType(const Type *Ptr, Value *Idx) {
- const PointerType *PTy = dyn_cast<PointerType>(Ptr);
- if (!PTy) return 0; // Type isn't a pointer type!
-
- // Check the pointer index.
- if (!PTy->indexValid(Idx)) return 0;
-
- return PTy->getElementType();
-}
-
-
/// hasAllZeroIndices - Return true if all of the indices of this GEP are
/// zeros. If so, the result pointer and the first operand have the same
/// value, just potentially different types.
@@ -1286,13 +1561,13 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
if (!V1->getType()->isVectorTy() || V1->getType() != V2->getType())
return false;
- const VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType());
+ VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType());
if (MaskTy == 0 || !MaskTy->getElementType()->isIntegerTy(32))
return false;
// Check to see if Mask is valid.
if (const ConstantVector *MV = dyn_cast<ConstantVector>(Mask)) {
- const VectorType *VTy = cast<VectorType>(V1->getType());
+ VectorType *VTy = cast<VectorType>(V1->getType());
for (unsigned i = 0, e = MV->getNumOperands(); i != e; ++i) {
if (ConstantInt* CI = dyn_cast<ConstantInt>(MV->getOperand(i))) {
if (CI->uge(VTy->getNumElements()*2))
@@ -1382,7 +1657,7 @@ ExtractValueInst::ExtractValueInst(const ExtractValueInst &EVI)
// A null type is returned if the indices are invalid for the specified
// pointer type.
//
-Type *ExtractValueInst::getIndexedType(const Type *Agg,
+Type *ExtractValueInst::getIndexedType(Type *Agg,
ArrayRef<unsigned> Idxs) {
for (unsigned CurIdx = 0; CurIdx != Idxs.size(); ++CurIdx) {
unsigned Index = Idxs[CurIdx];
@@ -1392,10 +1667,10 @@ Type *ExtractValueInst::getIndexedType(const Type *Agg,
// insertvalue we need to check array indexing manually.
// Since the only other types we can index into are struct types it's just
// as easy to check those manually as well.
- if (const ArrayType *AT = dyn_cast<ArrayType>(Agg)) {
+ if (ArrayType *AT = dyn_cast<ArrayType>(Agg)) {
if (Index >= AT->getNumElements())
return 0;
- } else if (const StructType *ST = dyn_cast<StructType>(Agg)) {
+ } else if (StructType *ST = dyn_cast<StructType>(Agg)) {
if (Index >= ST->getNumElements())
return 0;
} else {
@@ -1413,7 +1688,7 @@ Type *ExtractValueInst::getIndexedType(const Type *Agg,
//===----------------------------------------------------------------------===//
BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
- const Type *Ty, const Twine &Name,
+ Type *Ty, const Twine &Name,
Instruction *InsertBefore)
: Instruction(Ty, iType,
OperandTraits<BinaryOperator>::op_begin(this),
@@ -1426,7 +1701,7 @@ BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
}
BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
- const Type *Ty, const Twine &Name,
+ Type *Ty, const Twine &Name,
BasicBlock *InsertAtEnd)
: Instruction(Ty, iType,
OperandTraits<BinaryOperator>::op_begin(this),
@@ -1589,7 +1864,7 @@ BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const Twine &Name,
BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name,
Instruction *InsertBefore) {
Constant *C;
- if (const VectorType *PTy = dyn_cast<VectorType>(Op->getType())) {
+ if (VectorType *PTy = dyn_cast<VectorType>(Op->getType())) {
C = Constant::getAllOnesValue(PTy->getElementType());
C = ConstantVector::get(
std::vector<Constant*>(PTy->getNumElements(), C));
@@ -1604,7 +1879,7 @@ BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name,
BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name,
BasicBlock *InsertAtEnd) {
Constant *AllOnes;
- if (const VectorType *PTy = dyn_cast<VectorType>(Op->getType())) {
+ if (VectorType *PTy = dyn_cast<VectorType>(Op->getType())) {
// Create a vector of all ones values.
Constant *Elt = Constant::getAllOnesValue(PTy->getElementType());
AllOnes = ConstantVector::get(
@@ -1743,8 +2018,8 @@ bool CastInst::isLosslessCast() const {
return false;
// Identity cast is always lossless
- const Type* SrcTy = getOperand(0)->getType();
- const Type* DstTy = getType();
+ Type* SrcTy = getOperand(0)->getType();
+ Type* DstTy = getType();
if (SrcTy == DstTy)
return true;
@@ -1763,12 +2038,12 @@ bool CastInst::isLosslessCast() const {
/// # ptrtoint i32* %x to i32 ; on 32-bit plaforms only
/// @brief Determine if the described cast is a no-op.
bool CastInst::isNoopCast(Instruction::CastOps Opcode,
- const Type *SrcTy,
- const Type *DestTy,
- const Type *IntPtrTy) {
+ Type *SrcTy,
+ Type *DestTy,
+ Type *IntPtrTy) {
switch (Opcode) {
default:
- assert(!"Invalid CastOp");
+ assert(0 && "Invalid CastOp");
case Instruction::Trunc:
case Instruction::ZExt:
case Instruction::SExt:
@@ -1791,7 +2066,7 @@ bool CastInst::isNoopCast(Instruction::CastOps Opcode,
}
/// @brief Determine if a cast is a no-op.
-bool CastInst::isNoopCast(const Type *IntPtrTy) const {
+bool CastInst::isNoopCast(Type *IntPtrTy) const {
return isNoopCast(getOpcode(), getOperand(0)->getType(), getType(), IntPtrTy);
}
@@ -1805,8 +2080,7 @@ bool CastInst::isNoopCast(const Type *IntPtrTy) const {
/// If no such cast is permited, the function returns 0.
unsigned CastInst::isEliminableCastPair(
Instruction::CastOps firstOp, Instruction::CastOps secondOp,
- const Type *SrcTy, const Type *MidTy, const Type *DstTy, const Type *IntPtrTy)
-{
+ Type *SrcTy, Type *MidTy, Type *DstTy, Type *IntPtrTy) {
// Define the 144 possibilities for these two cast instructions. The values
// in this matrix determine what to do in a given situation and select the
// case in the switch below. The rows correspond to firstOp, the columns
@@ -1859,12 +2133,16 @@ unsigned CastInst::isEliminableCastPair(
};
// If either of the casts are a bitcast from scalar to vector, disallow the
- // merging.
- if ((firstOp == Instruction::BitCast &&
- isa<VectorType>(SrcTy) != isa<VectorType>(MidTy)) ||
- (secondOp == Instruction::BitCast &&
- isa<VectorType>(MidTy) != isa<VectorType>(DstTy)))
- return 0; // Disallowed
+ // merging. However, bitcast of A->B->A are allowed.
+ bool isFirstBitcast = (firstOp == Instruction::BitCast);
+ bool isSecondBitcast = (secondOp == Instruction::BitCast);
+ bool chainedBitcast = (SrcTy == DstTy && isFirstBitcast && isSecondBitcast);
+
+ // Check if any of the bitcasts convert scalars<->vectors.
+ if ((isFirstBitcast && isa<VectorType>(SrcTy) != isa<VectorType>(MidTy)) ||
+ (isSecondBitcast && isa<VectorType>(MidTy) != isa<VectorType>(DstTy)))
+ // Unless we are bitcasing to the original type, disallow optimizations.
+ if (!chainedBitcast) return 0;
int ElimCase = CastResults[firstOp-Instruction::CastOpsBegin]
[secondOp-Instruction::CastOpsBegin];
@@ -1958,16 +2236,16 @@ unsigned CastInst::isEliminableCastPair(
case 99:
// cast combination can't happen (error in input). This is for all cases
// where the MidTy is not the same for the two cast instructions.
- assert(!"Invalid Cast Combination");
+ assert(0 && "Invalid Cast Combination");
return 0;
default:
- assert(!"Error in CastResults table!!!");
+ assert(0 && "Error in CastResults table!!!");
return 0;
}
return 0;
}
-CastInst *CastInst::Create(Instruction::CastOps op, Value *S, const Type *Ty,
+CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty,
const Twine &Name, Instruction *InsertBefore) {
assert(castIsValid(op, S, Ty) && "Invalid cast!");
// Construct and return the appropriate CastInst subclass
@@ -1985,12 +2263,12 @@ CastInst *CastInst::Create(Instruction::CastOps op, Value *S, const Type *Ty,
case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertBefore);
case BitCast: return new BitCastInst (S, Ty, Name, InsertBefore);
default:
- assert(!"Invalid opcode provided");
+ assert(0 && "Invalid opcode provided");
}
return 0;
}
-CastInst *CastInst::Create(Instruction::CastOps op, Value *S, const Type *Ty,
+CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty,
const Twine &Name, BasicBlock *InsertAtEnd) {
assert(castIsValid(op, S, Ty) && "Invalid cast!");
// Construct and return the appropriate CastInst subclass
@@ -2008,12 +2286,12 @@ CastInst *CastInst::Create(Instruction::CastOps op, Value *S, const Type *Ty,
case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertAtEnd);
case BitCast: return new BitCastInst (S, Ty, Name, InsertAtEnd);
default:
- assert(!"Invalid opcode provided");
+ assert(0 && "Invalid opcode provided");
}
return 0;
}
-CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty,
+CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty,
const Twine &Name,
Instruction *InsertBefore) {
if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
@@ -2021,7 +2299,7 @@ CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty,
return Create(Instruction::ZExt, S, Ty, Name, InsertBefore);
}
-CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty,
+CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty,
const Twine &Name,
BasicBlock *InsertAtEnd) {
if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
@@ -2029,7 +2307,7 @@ CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty,
return Create(Instruction::ZExt, S, Ty, Name, InsertAtEnd);
}
-CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty,
+CastInst *CastInst::CreateSExtOrBitCast(Value *S, Type *Ty,
const Twine &Name,
Instruction *InsertBefore) {
if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
@@ -2037,7 +2315,7 @@ CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty,
return Create(Instruction::SExt, S, Ty, Name, InsertBefore);
}
-CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty,
+CastInst *CastInst::CreateSExtOrBitCast(Value *S, Type *Ty,
const Twine &Name,
BasicBlock *InsertAtEnd) {
if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
@@ -2045,7 +2323,7 @@ CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty,
return Create(Instruction::SExt, S, Ty, Name, InsertAtEnd);
}
-CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty,
+CastInst *CastInst::CreateTruncOrBitCast(Value *S, Type *Ty,
const Twine &Name,
Instruction *InsertBefore) {
if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
@@ -2053,7 +2331,7 @@ CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty,
return Create(Instruction::Trunc, S, Ty, Name, InsertBefore);
}
-CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty,
+CastInst *CastInst::CreateTruncOrBitCast(Value *S, Type *Ty,
const Twine &Name,
BasicBlock *InsertAtEnd) {
if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
@@ -2061,7 +2339,7 @@ CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty,
return Create(Instruction::Trunc, S, Ty, Name, InsertAtEnd);
}
-CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty,
+CastInst *CastInst::CreatePointerCast(Value *S, Type *Ty,
const Twine &Name,
BasicBlock *InsertAtEnd) {
assert(S->getType()->isPointerTy() && "Invalid cast");
@@ -2074,7 +2352,7 @@ CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty,
}
/// @brief Create a BitCast or a PtrToInt cast instruction
-CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty,
+CastInst *CastInst::CreatePointerCast(Value *S, Type *Ty,
const Twine &Name,
Instruction *InsertBefore) {
assert(S->getType()->isPointerTy() && "Invalid cast");
@@ -2086,7 +2364,7 @@ CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty,
return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
}
-CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty,
+CastInst *CastInst::CreateIntegerCast(Value *C, Type *Ty,
bool isSigned, const Twine &Name,
Instruction *InsertBefore) {
assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() &&
@@ -2100,7 +2378,7 @@ CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty,
return Create(opcode, C, Ty, Name, InsertBefore);
}
-CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty,
+CastInst *CastInst::CreateIntegerCast(Value *C, Type *Ty,
bool isSigned, const Twine &Name,
BasicBlock *InsertAtEnd) {
assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() &&
@@ -2114,7 +2392,7 @@ CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty,
return Create(opcode, C, Ty, Name, InsertAtEnd);
}
-CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty,
+CastInst *CastInst::CreateFPCast(Value *C, Type *Ty,
const Twine &Name,
Instruction *InsertBefore) {
assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
@@ -2127,7 +2405,7 @@ CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty,
return Create(opcode, C, Ty, Name, InsertBefore);
}
-CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty,
+CastInst *CastInst::CreateFPCast(Value *C, Type *Ty,
const Twine &Name,
BasicBlock *InsertAtEnd) {
assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
@@ -2142,15 +2420,15 @@ CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty,
// Check whether it is valid to call getCastOpcode for these types.
// This routine must be kept in sync with getCastOpcode.
-bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) {
+bool CastInst::isCastable(Type *SrcTy, Type *DestTy) {
if (!SrcTy->isFirstClassType() || !DestTy->isFirstClassType())
return false;
if (SrcTy == DestTy)
return true;
- if (const VectorType *SrcVecTy = dyn_cast<VectorType>(SrcTy))
- if (const VectorType *DestVecTy = dyn_cast<VectorType>(DestTy))
+ if (VectorType *SrcVecTy = dyn_cast<VectorType>(SrcTy))
+ if (VectorType *DestVecTy = dyn_cast<VectorType>(DestTy))
if (SrcVecTy->getNumElements() == DestVecTy->getNumElements()) {
// An element by element cast. Valid if casting the elements is valid.
SrcTy = SrcVecTy->getElementType();
@@ -2212,8 +2490,8 @@ bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) {
// This routine must be kept in sync with isCastable.
Instruction::CastOps
CastInst::getCastOpcode(
- const Value *Src, bool SrcIsSigned, const Type *DestTy, bool DestIsSigned) {
- const Type *SrcTy = Src->getType();
+ const Value *Src, bool SrcIsSigned, Type *DestTy, bool DestIsSigned) {
+ Type *SrcTy = Src->getType();
assert(SrcTy->isFirstClassType() && DestTy->isFirstClassType() &&
"Only first class types are castable!");
@@ -2221,8 +2499,8 @@ CastInst::getCastOpcode(
if (SrcTy == DestTy)
return BitCast;
- if (const VectorType *SrcVecTy = dyn_cast<VectorType>(SrcTy))
- if (const VectorType *DestVecTy = dyn_cast<VectorType>(DestTy))
+ if (VectorType *SrcVecTy = dyn_cast<VectorType>(SrcTy))
+ if (VectorType *DestVecTy = dyn_cast<VectorType>(DestTy))
if (SrcVecTy->getNumElements() == DestVecTy->getNumElements()) {
// An element by element cast. Find the appropriate opcode based on the
// element types.
@@ -2292,17 +2570,17 @@ CastInst::getCastOpcode(
} else if (SrcTy->isIntegerTy()) {
return IntToPtr; // int -> ptr
} else {
- assert(!"Casting pointer to other than pointer or int");
+ assert(0 && "Casting pointer to other than pointer or int");
}
} else if (DestTy->isX86_MMXTy()) {
if (SrcTy->isVectorTy()) {
assert(DestBits == SrcBits && "Casting vector of wrong width to X86_MMX");
return BitCast; // 64-bit vector to MMX
} else {
- assert(!"Illegal cast to X86_MMX");
+ assert(0 && "Illegal cast to X86_MMX");
}
} else {
- assert(!"Casting to type that is not first-class");
+ assert(0 && "Casting to type that is not first-class");
}
// If we fall through to here we probably hit an assertion cast above
@@ -2320,10 +2598,10 @@ CastInst::getCastOpcode(
/// it in one place and to eliminate the redundant code for getting the sizes
/// of the types involved.
bool
-CastInst::castIsValid(Instruction::CastOps op, Value *S, const Type *DstTy) {
+CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) {
// Check for type sanity on the arguments
- const Type *SrcTy = S->getType();
+ Type *SrcTy = S->getType();
if (!SrcTy->isFirstClassType() || !DstTy->isFirstClassType() ||
SrcTy->isAggregateType() || DstTy->isAggregateType())
return false;
@@ -2384,144 +2662,144 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, const Type *DstTy) {
}
TruncInst::TruncInst(
- Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+ Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
) : CastInst(Ty, Trunc, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal Trunc");
}
TruncInst::TruncInst(
- Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+ Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
) : CastInst(Ty, Trunc, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal Trunc");
}
ZExtInst::ZExtInst(
- Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+ Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
) : CastInst(Ty, ZExt, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal ZExt");
}
ZExtInst::ZExtInst(
- Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+ Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
) : CastInst(Ty, ZExt, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal ZExt");
}
SExtInst::SExtInst(
- Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+ Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
) : CastInst(Ty, SExt, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal SExt");
}
SExtInst::SExtInst(
- Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+ Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
) : CastInst(Ty, SExt, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal SExt");
}
FPTruncInst::FPTruncInst(
- Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+ Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
) : CastInst(Ty, FPTrunc, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPTrunc");
}
FPTruncInst::FPTruncInst(
- Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+ Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
) : CastInst(Ty, FPTrunc, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPTrunc");
}
FPExtInst::FPExtInst(
- Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+ Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
) : CastInst(Ty, FPExt, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPExt");
}
FPExtInst::FPExtInst(
- Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+ Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
) : CastInst(Ty, FPExt, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPExt");
}
UIToFPInst::UIToFPInst(
- Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+ Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
) : CastInst(Ty, UIToFP, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal UIToFP");
}
UIToFPInst::UIToFPInst(
- Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+ Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
) : CastInst(Ty, UIToFP, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal UIToFP");
}
SIToFPInst::SIToFPInst(
- Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+ Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
) : CastInst(Ty, SIToFP, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal SIToFP");
}
SIToFPInst::SIToFPInst(
- Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+ Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
) : CastInst(Ty, SIToFP, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal SIToFP");
}
FPToUIInst::FPToUIInst(
- Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+ Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
) : CastInst(Ty, FPToUI, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToUI");
}
FPToUIInst::FPToUIInst(
- Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+ Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
) : CastInst(Ty, FPToUI, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToUI");
}
FPToSIInst::FPToSIInst(
- Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+ Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
) : CastInst(Ty, FPToSI, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToSI");
}
FPToSIInst::FPToSIInst(
- Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+ Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
) : CastInst(Ty, FPToSI, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToSI");
}
PtrToIntInst::PtrToIntInst(
- Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+ Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
) : CastInst(Ty, PtrToInt, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToInt");
}
PtrToIntInst::PtrToIntInst(
- Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+ Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
) : CastInst(Ty, PtrToInt, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToInt");
}
IntToPtrInst::IntToPtrInst(
- Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+ Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
) : CastInst(Ty, IntToPtr, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal IntToPtr");
}
IntToPtrInst::IntToPtrInst(
- Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+ Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
) : CastInst(Ty, IntToPtr, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal IntToPtr");
}
BitCastInst::BitCastInst(
- Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+ Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
) : CastInst(Ty, BitCast, S, Name, InsertBefore) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal BitCast");
}
BitCastInst::BitCastInst(
- Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+ Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
) : CastInst(Ty, BitCast, S, Name, InsertAtEnd) {
assert(castIsValid(getOpcode(), S, Ty) && "Illegal BitCast");
}
@@ -2532,7 +2810,7 @@ BitCastInst::BitCastInst(
void CmpInst::Anchor() const {}
-CmpInst::CmpInst(const Type *ty, OtherOps op, unsigned short predicate,
+CmpInst::CmpInst(Type *ty, OtherOps op, unsigned short predicate,
Value *LHS, Value *RHS, const Twine &Name,
Instruction *InsertBefore)
: Instruction(ty, op,
@@ -2545,7 +2823,7 @@ CmpInst::CmpInst(const Type *ty, OtherOps op, unsigned short predicate,
setName(Name);
}
-CmpInst::CmpInst(const Type *ty, OtherOps op, unsigned short predicate,
+CmpInst::CmpInst(Type *ty, OtherOps op, unsigned short predicate,
Value *LHS, Value *RHS, const Twine &Name,
BasicBlock *InsertAtEnd)
: Instruction(ty, op,
@@ -2612,7 +2890,7 @@ bool CmpInst::isEquality() const {
CmpInst::Predicate CmpInst::getInversePredicate(Predicate pred) {
switch (pred) {
- default: assert(!"Unknown cmp predicate!");
+ default: assert(0 && "Unknown cmp predicate!");
case ICMP_EQ: return ICMP_NE;
case ICMP_NE: return ICMP_EQ;
case ICMP_UGT: return ICMP_ULE;
@@ -2645,7 +2923,7 @@ CmpInst::Predicate CmpInst::getInversePredicate(Predicate pred) {
ICmpInst::Predicate ICmpInst::getSignedPredicate(Predicate pred) {
switch (pred) {
- default: assert(! "Unknown icmp predicate!");
+ default: assert(0 && "Unknown icmp predicate!");
case ICMP_EQ: case ICMP_NE:
case ICMP_SGT: case ICMP_SLT: case ICMP_SGE: case ICMP_SLE:
return pred;
@@ -2658,7 +2936,7 @@ ICmpInst::Predicate ICmpInst::getSignedPredicate(Predicate pred) {
ICmpInst::Predicate ICmpInst::getUnsignedPredicate(Predicate pred) {
switch (pred) {
- default: assert(! "Unknown icmp predicate!");
+ default: assert(0 && "Unknown icmp predicate!");
case ICMP_EQ: case ICMP_NE:
case ICMP_UGT: case ICMP_ULT: case ICMP_UGE: case ICMP_ULE:
return pred;
@@ -2734,7 +3012,7 @@ ICmpInst::makeConstantRange(Predicate pred, const APInt &C) {
CmpInst::Predicate CmpInst::getSwappedPredicate(Predicate pred) {
switch (pred) {
- default: assert(!"Unknown cmp predicate!");
+ default: assert(0 && "Unknown cmp predicate!");
case ICMP_EQ: case ICMP_NE:
return pred;
case ICMP_SGT: return ICMP_SLT;
@@ -3065,14 +3343,34 @@ AllocaInst *AllocaInst::clone_impl() const {
}
LoadInst *LoadInst::clone_impl() const {
- return new LoadInst(getOperand(0),
- Twine(), isVolatile(),
- getAlignment());
+ return new LoadInst(getOperand(0), Twine(), isVolatile(),
+ getAlignment(), getOrdering(), getSynchScope());
}
StoreInst *StoreInst::clone_impl() const {
- return new StoreInst(getOperand(0), getOperand(1),
- isVolatile(), getAlignment());
+ return new StoreInst(getOperand(0), getOperand(1), isVolatile(),
+ getAlignment(), getOrdering(), getSynchScope());
+
+}
+
+AtomicCmpXchgInst *AtomicCmpXchgInst::clone_impl() const {
+ AtomicCmpXchgInst *Result =
+ new AtomicCmpXchgInst(getOperand(0), getOperand(1), getOperand(2),
+ getOrdering(), getSynchScope());
+ Result->setVolatile(isVolatile());
+ return Result;
+}
+
+AtomicRMWInst *AtomicRMWInst::clone_impl() const {
+ AtomicRMWInst *Result =
+ new AtomicRMWInst(getOperation(),getOperand(0), getOperand(1),
+ getOrdering(), getSynchScope());
+ Result->setVolatile(isVolatile());
+ return Result;
+}
+
+FenceInst *FenceInst::clone_impl() const {
+ return new FenceInst(getContext(), getOrdering(), getSynchScope());
}
TruncInst *TruncInst::clone_impl() const {
@@ -3155,6 +3453,10 @@ PHINode *PHINode::clone_impl() const {
return new PHINode(*this);
}
+LandingPadInst *LandingPadInst::clone_impl() const {
+ return new LandingPadInst(*this);
+}
+
ReturnInst *ReturnInst::clone_impl() const {
return new(getNumOperands()) ReturnInst(*this);
}
@@ -3176,6 +3478,10 @@ InvokeInst *InvokeInst::clone_impl() const {
return new(getNumOperands()) InvokeInst(*this);
}
+ResumeInst *ResumeInst::clone_impl() const {
+ return new(1) ResumeInst(*this);
+}
+
UnwindInst *UnwindInst::clone_impl() const {
LLVMContext &Context = getContext();
return new UnwindInst(Context);
diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h
index 06a6f2a..a3f68fe 100644
--- a/lib/VMCore/LLVMContextImpl.h
+++ b/lib/VMCore/LLVMContextImpl.h
@@ -42,8 +42,8 @@ class Value;
struct DenseMapAPIntKeyInfo {
struct KeyTy {
APInt val;
- const Type* type;
- KeyTy(const APInt& V, const Type* Ty) : val(V), type(Ty) {}
+ Type* type;
+ KeyTy(const APInt& V, Type* Ty) : val(V), type(Ty) {}
KeyTy(const KeyTy& that) : val(that.val), type(that.type) {}
bool operator==(const KeyTy& that) const {
return type == that.type && this->val == that.val;
diff --git a/lib/VMCore/Makefile b/lib/VMCore/Makefile
index 03a4fc7..2b9b0f2 100644
--- a/lib/VMCore/Makefile
+++ b/lib/VMCore/Makefile
@@ -20,9 +20,9 @@ GENFILE:=$(PROJ_OBJ_ROOT)/include/llvm/Intrinsics.gen
INTRINSICTD := $(PROJ_SRC_ROOT)/include/llvm/Intrinsics.td
INTRINSICTDS := $(wildcard $(PROJ_SRC_ROOT)/include/llvm/Intrinsics*.td)
-$(ObjDir)/Intrinsics.gen.tmp: $(ObjDir)/.dir $(INTRINSICTDS) $(TBLGEN)
+$(ObjDir)/Intrinsics.gen.tmp: $(ObjDir)/.dir $(INTRINSICTDS) $(LLVM_TBLGEN)
$(Echo) Building Intrinsics.gen.tmp from Intrinsics.td
- $(Verb) $(TableGen) $(call SYSPATH, $(INTRINSICTD)) -o $(call SYSPATH, $@) -gen-intrinsic
+ $(Verb) $(LLVMTableGen) $(call SYSPATH, $(INTRINSICTD)) -o $(call SYSPATH, $@) -gen-intrinsic
$(GENFILE): $(ObjDir)/Intrinsics.gen.tmp
$(Verb) $(CMP) -s $@ $< || ( $(CP) $< $@ && \
diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp
index be2fcb8..c29029b 100644
--- a/lib/VMCore/Module.cpp
+++ b/lib/VMCore/Module.cpp
@@ -32,25 +32,10 @@ using namespace llvm;
// Methods to implement the globals and functions lists.
//
-GlobalVariable *ilist_traits<GlobalVariable>::createSentinel() {
- GlobalVariable *Ret = new GlobalVariable(Type::getInt32Ty(getGlobalContext()),
- false, GlobalValue::ExternalLinkage);
- // This should not be garbage monitored.
- LeakDetector::removeGarbageObject(Ret);
- return Ret;
-}
-GlobalAlias *ilist_traits<GlobalAlias>::createSentinel() {
- GlobalAlias *Ret = new GlobalAlias(Type::getInt32Ty(getGlobalContext()),
- GlobalValue::ExternalLinkage);
- // This should not be garbage monitored.
- LeakDetector::removeGarbageObject(Ret);
- return Ret;
-}
-
// Explicit instantiations of SymbolTableListTraits since some of the methods
// are not in the public header file.
-template class llvm::SymbolTableListTraits<GlobalVariable, Module>;
template class llvm::SymbolTableListTraits<Function, Module>;
+template class llvm::SymbolTableListTraits<GlobalVariable, Module>;
template class llvm::SymbolTableListTraits<GlobalAlias, Module>;
//===----------------------------------------------------------------------===//
@@ -82,8 +67,10 @@ Module::Endianness Module::getEndianness() const {
Module::Endianness ret = AnyEndianness;
while (!temp.empty()) {
- StringRef token = DataLayout;
- tie(token, temp) = getToken(temp, "-");
+ std::pair<StringRef, StringRef> P = getToken(temp, "-");
+
+ StringRef token = P.first;
+ temp = P.second;
if (token[0] == 'e') {
ret = LittleEndian;
@@ -95,15 +82,16 @@ Module::Endianness Module::getEndianness() const {
return ret;
}
-/// Target Pointer Size information...
+/// Target Pointer Size information.
Module::PointerSize Module::getPointerSize() const {
StringRef temp = DataLayout;
Module::PointerSize ret = AnyPointerSize;
while (!temp.empty()) {
- StringRef token, signalToken;
- tie(token, temp) = getToken(temp, "-");
- tie(signalToken, token) = getToken(token, ":");
+ std::pair<StringRef, StringRef> TmpP = getToken(temp, "-");
+ temp = TmpP.second;
+ TmpP = getToken(TmpP.first, ":");
+ StringRef token = TmpP.second, signalToken = TmpP.first;
if (signalToken[0] == 'p') {
int size = 0;
@@ -149,7 +137,7 @@ void Module::getMDKindNames(SmallVectorImpl<StringRef> &Result) const {
// the symbol table directly for this common task.
//
Constant *Module::getOrInsertFunction(StringRef Name,
- const FunctionType *Ty,
+ FunctionType *Ty,
AttrListPtr AttributeList) {
// See if we have a definition for the specified function already.
GlobalValue *F = getNamedValue(Name);
@@ -182,7 +170,7 @@ Constant *Module::getOrInsertFunction(StringRef Name,
}
Constant *Module::getOrInsertTargetIntrinsic(StringRef Name,
- const FunctionType *Ty,
+ FunctionType *Ty,
AttrListPtr AttributeList) {
// See if we have a definition for the specified function already.
GlobalValue *F = getNamedValue(Name);
@@ -199,7 +187,7 @@ Constant *Module::getOrInsertTargetIntrinsic(StringRef Name,
}
Constant *Module::getOrInsertFunction(StringRef Name,
- const FunctionType *Ty) {
+ FunctionType *Ty) {
AttrListPtr AttributeList = AttrListPtr::get((AttributeWithIndex *)0, 0);
return getOrInsertFunction(Name, Ty, AttributeList);
}
@@ -211,7 +199,7 @@ Constant *Module::getOrInsertFunction(StringRef Name,
//
Constant *Module::getOrInsertFunction(StringRef Name,
AttrListPtr AttributeList,
- const Type *RetTy, ...) {
+ Type *RetTy, ...) {
va_list Args;
va_start(Args, RetTy);
@@ -229,7 +217,7 @@ Constant *Module::getOrInsertFunction(StringRef Name,
}
Constant *Module::getOrInsertFunction(StringRef Name,
- const Type *RetTy, ...) {
+ Type *RetTy, ...) {
va_list Args;
va_start(Args, RetTy);
@@ -279,7 +267,7 @@ GlobalVariable *Module::getGlobalVariable(StringRef Name,
/// with a constantexpr cast to the right type.
/// 3. Finally, if the existing global is the correct delclaration, return the
/// existing global.
-Constant *Module::getOrInsertGlobal(StringRef Name, const Type *Ty) {
+Constant *Module::getOrInsertGlobal(StringRef Name, Type *Ty) {
// See if we have a definition for the specified global already.
GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(getNamedValue(Name));
if (GV == 0) {
@@ -436,7 +424,7 @@ namespace {
// To avoid walking constant expressions multiple times and other IR
// objects, we keep several helper maps.
DenseSet<const Value*> VisitedConstants;
- DenseSet<const Type*> VisitedTypes;
+ DenseSet<Type*> VisitedTypes;
std::vector<StructType*> &StructTypes;
public:
@@ -549,5 +537,3 @@ namespace {
void Module::findUsedStructTypes(std::vector<StructType*> &StructTypes) const {
TypeFinder(StructTypes).run(*this);
}
-
-
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index 5cf2905..ecedb1d 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -28,7 +28,6 @@
#include "llvm/Support/Mutex.h"
#include "llvm/ADT/StringMap.h"
#include <algorithm>
-#include <cstdio>
#include <map>
using namespace llvm;
@@ -167,8 +166,8 @@ class BBPassManager : public PMDataManager, public FunctionPass {
public:
static char ID;
- explicit BBPassManager(int Depth)
- : PMDataManager(Depth), FunctionPass(ID) {}
+ explicit BBPassManager()
+ : PMDataManager(), FunctionPass(ID) {}
/// Execute all of the passes scheduled for execution. Keep track of
/// whether any of the passes modifies the function, and if so, return true.
@@ -193,7 +192,7 @@ public:
// Print passes managed by this manager
void dumpPassStructure(unsigned Offset) {
- llvm::dbgs() << std::string(Offset*2, ' ') << "BasicBlockPass Manager\n";
+ llvm::dbgs().indent(Offset*2) << "BasicBlockPass Manager\n";
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
BasicBlockPass *BP = getContainedPass(Index);
BP->dumpPassStructure(Offset + 1);
@@ -228,9 +227,9 @@ private:
bool wasRun;
public:
static char ID;
- explicit FunctionPassManagerImpl(int Depth) :
- Pass(PT_PassManager, ID), PMDataManager(Depth),
- PMTopLevelManager(new FPPassManager(1)), wasRun(false) {}
+ explicit FunctionPassManagerImpl() :
+ Pass(PT_PassManager, ID), PMDataManager(),
+ PMTopLevelManager(new FPPassManager()), wasRun(false) {}
/// add - Add a pass to the queue of passes to run. This passes ownership of
/// the Pass to the PassManager. When the PassManager is destroyed, the pass
@@ -303,8 +302,8 @@ char FunctionPassManagerImpl::ID = 0;
class MPPassManager : public Pass, public PMDataManager {
public:
static char ID;
- explicit MPPassManager(int Depth) :
- Pass(PT_PassManager, ID), PMDataManager(Depth) { }
+ explicit MPPassManager() :
+ Pass(PT_PassManager, ID), PMDataManager() { }
// Delete on the fly managers.
virtual ~MPPassManager() {
@@ -349,7 +348,7 @@ public:
// Print passes managed by this manager
void dumpPassStructure(unsigned Offset) {
- llvm::dbgs() << std::string(Offset*2, ' ') << "ModulePass Manager\n";
+ llvm::dbgs().indent(Offset*2) << "ModulePass Manager\n";
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
ModulePass *MP = getContainedPass(Index);
MP->dumpPassStructure(Offset + 1);
@@ -388,9 +387,9 @@ class PassManagerImpl : public Pass,
public:
static char ID;
- explicit PassManagerImpl(int Depth) :
- Pass(PT_PassManager, ID), PMDataManager(Depth),
- PMTopLevelManager(new MPPassManager(1)) {}
+ explicit PassManagerImpl() :
+ Pass(PT_PassManager, ID), PMDataManager(),
+ PMTopLevelManager(new MPPassManager()) {}
/// add - Add a pass to the queue of passes to run. This passes ownership of
/// the Pass to the PassManager. When the PassManager is destroyed, the pass
@@ -1340,7 +1339,7 @@ bool BBPassManager::doFinalization(Function &F) {
/// Create new Function pass manager
FunctionPassManager::FunctionPassManager(Module *m) : M(m) {
- FPM = new FunctionPassManagerImpl(0);
+ FPM = new FunctionPassManagerImpl();
// FPM is the top level manager.
FPM->setTopLevelManager(FPM);
@@ -1532,7 +1531,7 @@ bool FPPassManager::runOnModule(Module &M) {
bool Changed = doInitialization(M);
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
- runOnFunction(*I);
+ Changed |= runOnFunction(*I);
return doFinalization(M) || Changed;
}
@@ -1626,7 +1625,7 @@ void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
FunctionPassManagerImpl *FPP = OnTheFlyManagers[P];
if (!FPP) {
- FPP = new FunctionPassManagerImpl(0);
+ FPP = new FunctionPassManagerImpl();
// FPP is the top level manager.
FPP->setTopLevelManager(FPP);
@@ -1635,9 +1634,11 @@ void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
FPP->add(RequiredPass);
// Register P as the last user of RequiredPass.
- SmallVector<Pass *, 1> LU;
- LU.push_back(RequiredPass);
- FPP->setLastUser(LU, P);
+ if (RequiredPass) {
+ SmallVector<Pass *, 1> LU;
+ LU.push_back(RequiredPass);
+ FPP->setLastUser(LU, P);
+ }
}
/// Return function pass corresponding to PassInfo PI, that is
@@ -1677,7 +1678,7 @@ bool PassManagerImpl::run(Module &M) {
/// Create new pass manager
PassManager::PassManager() {
- PM = new PassManagerImpl(0);
+ PM = new PassManagerImpl();
// PM is the top level manager
PM->setTopLevelManager(PM);
}
@@ -1761,13 +1762,23 @@ void PMStack::pop() {
// Push PM on the stack and set its top level manager.
void PMStack::push(PMDataManager *PM) {
assert(PM && "Unable to push. Pass Manager expected");
+ assert(PM->getDepth()==0 && "Pass Manager depth set too early");
if (!this->empty()) {
+ assert(PM->getPassManagerType() > this->top()->getPassManagerType()
+ && "pushing bad pass manager to PMStack");
PMTopLevelManager *TPM = this->top()->getTopLevelManager();
assert(TPM && "Unable to find top level manager");
TPM->addIndirectPassManager(PM);
PM->setTopLevelManager(TPM);
+ PM->setDepth(this->top()->getDepth()+1);
+ }
+ else {
+ assert((PM->getPassManagerType() == PMT_ModulePassManager
+ || PM->getPassManagerType() == PMT_FunctionPassManager)
+ && "pushing bad pass manager to PMStack");
+ PM->setDepth(1);
}
S.push_back(PM);
@@ -1777,10 +1788,10 @@ void PMStack::push(PMDataManager *PM) {
void PMStack::dump() const {
for (std::vector<PMDataManager *>::const_iterator I = S.begin(),
E = S.end(); I != E; ++I)
- printf("%s ", (*I)->getAsPass()->getPassName());
+ dbgs() << (*I)->getAsPass()->getPassName() << ' ';
if (!S.empty())
- printf("\n");
+ dbgs() << '\n';
}
/// Find appropriate Module Pass Manager in the PM Stack and
@@ -1823,7 +1834,7 @@ void FunctionPass::assignPassManager(PMStack &PMS,
PMDataManager *PMD = PMS.top();
// [1] Create new Function Pass Manager
- FPP = new FPPassManager(PMD->getDepth() + 1);
+ FPP = new FPPassManager();
FPP->populateInheritedAnalysis(PMS);
// [2] Set up new manager's top level manager
@@ -1860,7 +1871,7 @@ void BasicBlockPass::assignPassManager(PMStack &PMS,
PMDataManager *PMD = PMS.top();
// [1] Create new Basic Block Manager
- BBP = new BBPassManager(PMD->getDepth() + 1);
+ BBP = new BBPassManager();
// [2] Set up new manager's top level manager
// Basic Block Pass Manager does not live by itself
diff --git a/lib/VMCore/PassRegistry.cpp b/lib/VMCore/PassRegistry.cpp
index fa92620..2df6557 100644
--- a/lib/VMCore/PassRegistry.cpp
+++ b/lib/VMCore/PassRegistry.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/Function.h"
#include <vector>
using namespace llvm;
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index f874d1b..10184bc 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -40,8 +40,8 @@ Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) {
/// getScalarType - If this is a vector type, return the element type,
/// otherwise return this.
-const Type *Type::getScalarType() const {
- if (const VectorType *VTy = dyn_cast<VectorType>(this))
+Type *Type::getScalarType() {
+ if (VectorType *VTy = dyn_cast<VectorType>(this))
return VTy->getElementType();
return this;
}
@@ -77,7 +77,7 @@ bool Type::isFPOrFPVectorTy() const {
// canLosslesslyBitCastTo - Return true if this type can be converted to
// 'Ty' without any reinterpretation of bits. For example, i8* to i32*.
//
-bool Type::canLosslesslyBitCastTo(const Type *Ty) const {
+bool Type::canLosslesslyBitCastTo(Type *Ty) const {
// Identity cast means no change so return true
if (this == Ty)
return true;
@@ -146,7 +146,7 @@ unsigned Type::getPrimitiveSizeInBits() const {
/// getScalarSizeInBits - If this is a vector type, return the
/// getPrimitiveSizeInBits value for the element type. Otherwise return the
/// getPrimitiveSizeInBits value for this type.
-unsigned Type::getScalarSizeInBits() const {
+unsigned Type::getScalarSizeInBits() {
return getScalarType()->getPrimitiveSizeInBits();
}
@@ -306,7 +306,7 @@ APInt IntegerType::getMask() const {
// FunctionType Implementation
//===----------------------------------------------------------------------===//
-FunctionType::FunctionType(const Type *Result, ArrayRef<Type*> Params,
+FunctionType::FunctionType(Type *Result, ArrayRef<Type*> Params,
bool IsVarArgs)
: Type(Result->getContext(), FunctionTyID) {
Type **SubTys = reinterpret_cast<Type**>(this+1);
@@ -326,7 +326,7 @@ FunctionType::FunctionType(const Type *Result, ArrayRef<Type*> Params,
}
// FunctionType::get - The factory function for the FunctionType class.
-FunctionType *FunctionType::get(const Type *ReturnType,
+FunctionType *FunctionType::get(Type *ReturnType,
ArrayRef<Type*> Params, bool isVarArg) {
// TODO: This is brutally slow.
std::vector<Type*> Key;
@@ -351,21 +351,21 @@ FunctionType *FunctionType::get(const Type *ReturnType,
}
-FunctionType *FunctionType::get(const Type *Result, bool isVarArg) {
+FunctionType *FunctionType::get(Type *Result, bool isVarArg) {
return get(Result, ArrayRef<Type *>(), isVarArg);
}
/// isValidReturnType - Return true if the specified type is valid as a return
/// type.
-bool FunctionType::isValidReturnType(const Type *RetTy) {
+bool FunctionType::isValidReturnType(Type *RetTy) {
return !RetTy->isFunctionTy() && !RetTy->isLabelTy() &&
!RetTy->isMetadataTy();
}
/// isValidArgumentType - Return true if the specified type is valid as an
/// argument type.
-bool FunctionType::isValidArgumentType(const Type *ArgTy) {
+bool FunctionType::isValidArgumentType(Type *ArgTy) {
return ArgTy->isFirstClassType();
}
@@ -392,7 +392,7 @@ StructType *StructType::get(LLVMContext &Context, ArrayRef<Type*> ETypes,
// Value not found. Create a new type!
ST = new (Context.pImpl->TypeAllocator) StructType(Context);
- ST->setSubclassData(SCDB_IsAnonymous); // Anonymous struct.
+ ST->setSubclassData(SCDB_IsLiteral); // Literal struct.
ST->setBody(ETypes, isPacked);
return ST;
}
@@ -412,13 +412,6 @@ void StructType::setBody(ArrayRef<Type*> Elements, bool isPacked) {
NumContainedTys = Elements.size();
}
-StructType *StructType::createNamed(LLVMContext &Context, StringRef Name) {
- StructType *ST = new (Context.pImpl->TypeAllocator) StructType(Context);
- if (!Name.empty())
- ST->setName(Name);
- return ST;
-}
-
void StructType::setName(StringRef Name) {
if (Name == getName()) return;
@@ -461,6 +454,13 @@ void StructType::setName(StringRef Name) {
//===----------------------------------------------------------------------===//
// StructType Helper functions.
+StructType *StructType::create(LLVMContext &Context, StringRef Name) {
+ StructType *ST = new (Context.pImpl->TypeAllocator) StructType(Context);
+ if (!Name.empty())
+ ST->setName(Name);
+ return ST;
+}
+
StructType *StructType::get(LLVMContext &Context, bool isPacked) {
return get(Context, llvm::ArrayRef<Type*>(), isPacked);
}
@@ -478,21 +478,36 @@ StructType *StructType::get(Type *type, ...) {
return llvm::StructType::get(Ctx, StructFields);
}
-StructType *StructType::createNamed(LLVMContext &Context, StringRef Name,
- ArrayRef<Type*> Elements, bool isPacked) {
- StructType *ST = createNamed(Context, Name);
+StructType *StructType::create(LLVMContext &Context, ArrayRef<Type*> Elements,
+ StringRef Name, bool isPacked) {
+ StructType *ST = create(Context, Name);
ST->setBody(Elements, isPacked);
return ST;
}
-StructType *StructType::createNamed(StringRef Name, ArrayRef<Type*> Elements,
- bool isPacked) {
+StructType *StructType::create(LLVMContext &Context, ArrayRef<Type*> Elements) {
+ return create(Context, Elements, StringRef());
+}
+
+StructType *StructType::create(LLVMContext &Context) {
+ return create(Context, StringRef());
+}
+
+
+StructType *StructType::create(ArrayRef<Type*> Elements, StringRef Name,
+ bool isPacked) {
+ assert(!Elements.empty() &&
+ "This method may not be invoked with an empty list");
+ return create(Elements[0]->getContext(), Elements, Name, isPacked);
+}
+
+StructType *StructType::create(ArrayRef<Type*> Elements) {
assert(!Elements.empty() &&
"This method may not be invoked with an empty list");
- return createNamed(Elements[0]->getContext(), Name, Elements, isPacked);
+ return create(Elements[0]->getContext(), Elements, StringRef());
}
-StructType *StructType::createNamed(StringRef Name, Type *type, ...) {
+StructType *StructType::create(StringRef Name, Type *type, ...) {
assert(type != 0 && "Cannot create a struct type with no elements with this");
LLVMContext &Ctx = type->getContext();
va_list ap;
@@ -502,11 +517,12 @@ StructType *StructType::createNamed(StringRef Name, Type *type, ...) {
StructFields.push_back(type);
type = va_arg(ap, llvm::Type*);
}
- return llvm::StructType::createNamed(Ctx, Name, StructFields);
+ return llvm::StructType::create(Ctx, StructFields, Name);
}
+
StringRef StructType::getName() const {
- assert(!isAnonymous() && "Anonymous structs never have names");
+ assert(!isLiteral() && "Literal structs never have names");
if (SymbolTableEntry == 0) return StringRef();
return ((StringMapEntry<StructType*> *)SymbolTableEntry)->getKey();
@@ -524,14 +540,14 @@ void StructType::setBody(Type *type, ...) {
setBody(StructFields);
}
-bool StructType::isValidElementType(const Type *ElemTy) {
+bool StructType::isValidElementType(Type *ElemTy) {
return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
!ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
}
/// isLayoutIdentical - Return true if this is layout identical to the
/// specified struct.
-bool StructType::isLayoutIdentical(const StructType *Other) const {
+bool StructType::isLayoutIdentical(StructType *Other) const {
if (this == Other) return true;
if (isPacked() != Other->isPacked() ||
@@ -557,8 +573,8 @@ StructType *Module::getTypeByName(StringRef Name) const {
// CompositeType Implementation
//===----------------------------------------------------------------------===//
-Type *CompositeType::getTypeAtIndex(const Value *V) const {
- if (const StructType *STy = dyn_cast<StructType>(this)) {
+Type *CompositeType::getTypeAtIndex(const Value *V) {
+ if (StructType *STy = dyn_cast<StructType>(this)) {
unsigned Idx = (unsigned)cast<ConstantInt>(V)->getZExtValue();
assert(indexValid(Idx) && "Invalid structure index!");
return STy->getElementType(Idx);
@@ -566,8 +582,8 @@ Type *CompositeType::getTypeAtIndex(const Value *V) const {
return cast<SequentialType>(this)->getElementType();
}
-Type *CompositeType::getTypeAtIndex(unsigned Idx) const {
- if (const StructType *STy = dyn_cast<StructType>(this)) {
+Type *CompositeType::getTypeAtIndex(unsigned Idx) {
+ if (StructType *STy = dyn_cast<StructType>(this)) {
assert(indexValid(Idx) && "Invalid structure index!");
return STy->getElementType(Idx);
}
@@ -605,7 +621,7 @@ ArrayType::ArrayType(Type *ElType, uint64_t NumEl)
}
-ArrayType *ArrayType::get(const Type *elementType, uint64_t NumElements) {
+ArrayType *ArrayType::get(Type *elementType, uint64_t NumElements) {
Type *ElementType = const_cast<Type*>(elementType);
assert(isValidElementType(ElementType) && "Invalid type for array element!");
@@ -618,7 +634,7 @@ ArrayType *ArrayType::get(const Type *elementType, uint64_t NumElements) {
return Entry;
}
-bool ArrayType::isValidElementType(const Type *ElemTy) {
+bool ArrayType::isValidElementType(Type *ElemTy) {
return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
!ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
}
@@ -632,7 +648,7 @@ VectorType::VectorType(Type *ElType, unsigned NumEl)
NumElements = NumEl;
}
-VectorType *VectorType::get(const Type *elementType, unsigned NumElements) {
+VectorType *VectorType::get(Type *elementType, unsigned NumElements) {
Type *ElementType = const_cast<Type*>(elementType);
assert(NumElements > 0 && "#Elements of a VectorType must be greater than 0");
assert(isValidElementType(ElementType) &&
@@ -647,7 +663,7 @@ VectorType *VectorType::get(const Type *elementType, unsigned NumElements) {
return Entry;
}
-bool VectorType::isValidElementType(const Type *ElemTy) {
+bool VectorType::isValidElementType(Type *ElemTy) {
return ElemTy->isIntegerTy() || ElemTy->isFloatingPointTy();
}
@@ -655,8 +671,7 @@ bool VectorType::isValidElementType(const Type *ElemTy) {
// PointerType Implementation
//===----------------------------------------------------------------------===//
-PointerType *PointerType::get(const Type *eltTy, unsigned AddressSpace) {
- Type *EltTy = const_cast<Type*>(eltTy);
+PointerType *PointerType::get(Type *EltTy, unsigned AddressSpace) {
assert(EltTy && "Can't get a pointer to <null> type!");
assert(isValidElementType(EltTy) && "Invalid type for pointer element!");
@@ -677,11 +692,11 @@ PointerType::PointerType(Type *E, unsigned AddrSpace)
setSubclassData(AddrSpace);
}
-PointerType *Type::getPointerTo(unsigned addrs) const {
+PointerType *Type::getPointerTo(unsigned addrs) {
return PointerType::get(this, addrs);
}
-bool PointerType::isValidElementType(const Type *ElemTy) {
+bool PointerType::isValidElementType(Type *ElemTy) {
return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
!ElemTy->isMetadataTy();
}
diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp
index f1815e3..2fa5f08 100644
--- a/lib/VMCore/Value.cpp
+++ b/lib/VMCore/Value.cpp
@@ -35,12 +35,12 @@ using namespace llvm;
// Value Class
//===----------------------------------------------------------------------===//
-static inline Type *checkType(const Type *Ty) {
+static inline Type *checkType(Type *Ty) {
assert(Ty && "Value defined with a null type: Error!");
return const_cast<Type*>(Ty);
}
-Value::Value(const Type *ty, unsigned scid)
+Value::Value(Type *ty, unsigned scid)
: SubclassID(scid), HasValueHandle(0),
SubclassOptionalData(0), SubclassData(0), VTy((Type*)checkType(ty)),
UseList(0), Name(0) {
@@ -369,7 +369,7 @@ bool Value::isDereferenceablePointer() const {
for (User::const_op_iterator I = GEP->op_begin()+1,
E = GEP->op_end(); I != E; ++I) {
Value *Index = *I;
- const Type *Ty = *GTI++;
+ Type *Ty = *GTI++;
// Struct indices can't be out of bounds.
if (isa<StructType>(Ty))
continue;
@@ -380,7 +380,7 @@ bool Value::isDereferenceablePointer() const {
if (CI->isZero())
continue;
// Check to see that it's within the bounds of an array.
- const ArrayType *ATy = dyn_cast<ArrayType>(Ty);
+ ArrayType *ATy = dyn_cast<ArrayType>(Ty);
if (!ATy)
return false;
if (CI->getValue().getActiveBits() > 64)
diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp
index 21a1f03..e13bd7d 100644
--- a/lib/VMCore/ValueTypes.cpp
+++ b/lib/VMCore/ValueTypes.cpp
@@ -19,6 +19,12 @@
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
+EVT EVT::changeExtendedVectorElementTypeToInteger() const {
+ LLVMContext &Context = LLVMTy->getContext();
+ EVT IntTy = getIntegerVT(Context, getVectorElementType().getSizeInBits());
+ return getVectorVT(Context, IntTy, getVectorNumElements());
+}
+
EVT EVT::getExtendedIntegerVT(LLVMContext &Context, unsigned BitWidth) {
EVT VT;
VT.LLVMTy = IntegerType::get(Context, BitWidth);
@@ -77,9 +83,9 @@ unsigned EVT::getExtendedVectorNumElements() const {
unsigned EVT::getExtendedSizeInBits() const {
assert(isExtended() && "Type is not extended!");
- if (const IntegerType *ITy = dyn_cast<IntegerType>(LLVMTy))
+ if (IntegerType *ITy = dyn_cast<IntegerType>(LLVMTy))
return ITy->getBitWidth();
- if (const VectorType *VTy = dyn_cast<VectorType>(LLVMTy))
+ if (VectorType *VTy = dyn_cast<VectorType>(LLVMTy))
return VTy->getBitWidth();
assert(false && "Unrecognized extended type!");
return 0; // Suppress warnings.
@@ -140,7 +146,7 @@ std::string EVT::getEVTString() const {
/// getTypeForEVT - This method returns an LLVM type corresponding to the
/// specified EVT. For integer types, this returns an unsigned type. Note
/// that this will abort for types that cannot be represented.
-const Type *EVT::getTypeForEVT(LLVMContext &Context) const {
+Type *EVT::getTypeForEVT(LLVMContext &Context) const {
switch (V.SimpleTy) {
default:
assert(isExtended() && "Type is not extended!");
@@ -186,7 +192,7 @@ const Type *EVT::getTypeForEVT(LLVMContext &Context) const {
/// getEVT - Return the value type corresponding to the specified type. This
/// returns all pointers as MVT::iPTR. If HandleUnknown is true, unknown types
/// are returned as Other, otherwise they are invalid.
-EVT EVT::getEVT(const Type *Ty, bool HandleUnknown){
+EVT EVT::getEVT(Type *Ty, bool HandleUnknown){
switch (Ty->getTypeID()) {
default:
if (HandleUnknown) return MVT(MVT::Other);
@@ -204,7 +210,7 @@ EVT EVT::getEVT(const Type *Ty, bool HandleUnknown){
case Type::PPC_FP128TyID: return MVT(MVT::ppcf128);
case Type::PointerTyID: return MVT(MVT::iPTR);
case Type::VectorTyID: {
- const VectorType *VTy = cast<VectorType>(Ty);
+ VectorType *VTy = cast<VectorType>(Ty);
return getVectorVT(Ty->getContext(), getEVT(VTy->getElementType(), false),
VTy->getNumElements());
}
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index b146b89..9564b7d 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -35,6 +35,12 @@
// * It is illegal to have a ret instruction that returns a value that does not
// agree with the function return value type.
// * Function call argument types match the function prototype
+// * A landing pad is defined by a landingpad instruction, and can be jumped to
+// only by the unwind edge of an invoke instruction.
+// * A landingpad instruction must be the first non-PHI instruction in the
+// block.
+// * All landingpad instructions must use the same personality function with
+// the same function.
// * All other things that are tested by asserts spread about the code...
//
//===----------------------------------------------------------------------===//
@@ -131,18 +137,22 @@ namespace {
/// already.
SmallPtrSet<MDNode *, 32> MDNodes;
+ /// PersonalityFn - The personality function referenced by the
+ /// LandingPadInsts. All LandingPadInsts within the same function must use
+ /// the same personality function.
+ const Value *PersonalityFn;
+
Verifier()
- : FunctionPass(ID),
- Broken(false), RealPass(true), action(AbortProcessAction),
- Mod(0), Context(0), DT(0), MessagesStr(Messages) {
- initializeVerifierPass(*PassRegistry::getPassRegistry());
- }
+ : FunctionPass(ID), Broken(false), RealPass(true),
+ action(AbortProcessAction), Mod(0), Context(0), DT(0),
+ MessagesStr(Messages), PersonalityFn(0) {
+ initializeVerifierPass(*PassRegistry::getPassRegistry());
+ }
explicit Verifier(VerifierFailureAction ctn)
- : FunctionPass(ID),
- Broken(false), RealPass(true), action(ctn), Mod(0), Context(0), DT(0),
- MessagesStr(Messages) {
- initializeVerifierPass(*PassRegistry::getPassRegistry());
- }
+ : FunctionPass(ID), Broken(false), RealPass(true), action(ctn), Mod(0),
+ Context(0), DT(0), MessagesStr(Messages), PersonalityFn(0) {
+ initializeVerifierPass(*PassRegistry::getPassRegistry());
+ }
bool doInitialization(Module &M) {
Mod = &M;
@@ -165,6 +175,7 @@ namespace {
visit(F);
InstsInThisBlock.clear();
+ PersonalityFn = 0;
// If this is a real pass, in a pass manager, we must abort before
// returning back to the pass manager, or else the pass manager may try to
@@ -278,18 +289,22 @@ namespace {
void visitUserOp1(Instruction &I);
void visitUserOp2(Instruction &I) { visitUserOp1(I); }
void visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI);
+ void visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI);
+ void visitAtomicRMWInst(AtomicRMWInst &RMWI);
+ void visitFenceInst(FenceInst &FI);
void visitAllocaInst(AllocaInst &AI);
void visitExtractValueInst(ExtractValueInst &EVI);
void visitInsertValueInst(InsertValueInst &IVI);
+ void visitLandingPadInst(LandingPadInst &LPI);
void VerifyCallSite(CallSite CS);
- bool PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
+ bool PerformTypeCheck(Intrinsic::ID ID, Function *F, Type *Ty,
int VT, unsigned ArgNo, std::string &Suffix);
void VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F,
unsigned RetNum, unsigned ParamNum, ...);
- void VerifyParameterAttrs(Attributes Attrs, const Type *Ty,
+ void VerifyParameterAttrs(Attributes Attrs, Type *Ty,
bool isReturnValue, const Value *V);
- void VerifyFunctionAttrs(const FunctionType *FT, const AttrListPtr &Attrs,
+ void VerifyFunctionAttrs(FunctionType *FT, const AttrListPtr &Attrs,
const Value *V);
void WriteValue(const Value *V) {
@@ -302,7 +317,7 @@ namespace {
}
}
- void WriteType(const Type *T) {
+ void WriteType(Type *T) {
if (!T) return;
MessagesStr << ' ' << *T;
}
@@ -323,7 +338,7 @@ namespace {
}
void CheckFailed(const Twine &Message, const Value *V1,
- const Type *T2, const Value *V3 = 0) {
+ Type *T2, const Value *V3 = 0) {
MessagesStr << Message.str() << "\n";
WriteValue(V1);
WriteType(T2);
@@ -331,8 +346,8 @@ namespace {
Broken = true;
}
- void CheckFailed(const Twine &Message, const Type *T1,
- const Type *T2 = 0, const Type *T3 = 0) {
+ void CheckFailed(const Twine &Message, Type *T1,
+ Type *T2 = 0, Type *T3 = 0) {
MessagesStr << Message.str() << "\n";
WriteType(T1);
WriteType(T2);
@@ -421,9 +436,9 @@ void Verifier::visitGlobalVariable(GlobalVariable &GV) {
"invalid linkage for intrinsic global variable", &GV);
// Don't worry about emitting an error for it not being an array,
// visitGlobalValue will complain on appending non-array.
- if (const ArrayType *ATy = dyn_cast<ArrayType>(GV.getType())) {
- const StructType *STy = dyn_cast<StructType>(ATy->getElementType());
- const PointerType *FuncPtrTy =
+ if (ArrayType *ATy = dyn_cast<ArrayType>(GV.getType())) {
+ StructType *STy = dyn_cast<StructType>(ATy->getElementType());
+ PointerType *FuncPtrTy =
FunctionType::get(Type::getVoidTy(*Context), false)->getPointerTo();
Assert1(STy && STy->getNumElements() == 2 &&
STy->getTypeAtIndex(0u)->isIntegerTy(32) &&
@@ -514,7 +529,7 @@ void Verifier::visitMDNode(MDNode &MD, Function *F) {
// VerifyParameterAttrs - Check the given attributes for an argument or return
// value of the specified type. The value V is printed in error messages.
-void Verifier::VerifyParameterAttrs(Attributes Attrs, const Type *Ty,
+void Verifier::VerifyParameterAttrs(Attributes Attrs, Type *Ty,
bool isReturnValue, const Value *V) {
if (Attrs == Attribute::None)
return;
@@ -541,7 +556,7 @@ void Verifier::VerifyParameterAttrs(Attributes Attrs, const Type *Ty,
Attribute::getAsString(TypeI), V);
Attributes ByValI = Attrs & Attribute::ByVal;
- if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) {
+ if (PointerType *PTy = dyn_cast<PointerType>(Ty)) {
Assert1(!ByValI || PTy->getElementType()->isSized(),
"Attribute " + Attribute::getAsString(ByValI) +
" does not support unsized types!", V);
@@ -554,7 +569,7 @@ void Verifier::VerifyParameterAttrs(Attributes Attrs, const Type *Ty,
// VerifyFunctionAttrs - Check parameter attributes against a function type.
// The value V is printed in error messages.
-void Verifier::VerifyFunctionAttrs(const FunctionType *FT,
+void Verifier::VerifyFunctionAttrs(FunctionType *FT,
const AttrListPtr &Attrs,
const Value *V) {
if (Attrs.isEmpty())
@@ -565,7 +580,7 @@ void Verifier::VerifyFunctionAttrs(const FunctionType *FT,
for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
const AttributeWithIndex &Attr = Attrs.getSlot(i);
- const Type *Ty;
+ Type *Ty;
if (Attr.Index == 0)
Ty = FT->getReturnType();
else if (Attr.Index-1 < FT->getNumParams())
@@ -615,7 +630,7 @@ static bool VerifyAttributeCount(const AttrListPtr &Attrs, unsigned Params) {
//
void Verifier::visitFunction(Function &F) {
// Check function arguments.
- const FunctionType *FT = F.getFunctionType();
+ FunctionType *FT = F.getFunctionType();
unsigned NumArgs = F.arg_size();
Assert1(Context == &F.getContext(),
@@ -795,7 +810,7 @@ void Verifier::visitReturnInst(ReturnInst &RI) {
void Verifier::visitSwitchInst(SwitchInst &SI) {
// Check to make sure that all of the constants in the switch instruction
// have the same type as the switched-on value.
- const Type *SwitchTy = SI.getCondition()->getType();
+ Type *SwitchTy = SI.getCondition()->getType();
SmallPtrSet<ConstantInt*, 32> Constants;
for (unsigned i = 1, e = SI.getNumCases(); i != e; ++i) {
Assert1(SI.getCaseValue(i)->getType() == SwitchTy,
@@ -836,8 +851,8 @@ void Verifier::visitUserOp1(Instruction &I) {
void Verifier::visitTruncInst(TruncInst &I) {
// Get the source and destination types
- const Type *SrcTy = I.getOperand(0)->getType();
- const Type *DestTy = I.getType();
+ Type *SrcTy = I.getOperand(0)->getType();
+ Type *DestTy = I.getType();
// Get the size of the types in bits, we'll need this later
unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
@@ -854,8 +869,8 @@ void Verifier::visitTruncInst(TruncInst &I) {
void Verifier::visitZExtInst(ZExtInst &I) {
// Get the source and destination types
- const Type *SrcTy = I.getOperand(0)->getType();
- const Type *DestTy = I.getType();
+ Type *SrcTy = I.getOperand(0)->getType();
+ Type *DestTy = I.getType();
// Get the size of the types in bits, we'll need this later
Assert1(SrcTy->isIntOrIntVectorTy(), "ZExt only operates on integer", &I);
@@ -872,8 +887,8 @@ void Verifier::visitZExtInst(ZExtInst &I) {
void Verifier::visitSExtInst(SExtInst &I) {
// Get the source and destination types
- const Type *SrcTy = I.getOperand(0)->getType();
- const Type *DestTy = I.getType();
+ Type *SrcTy = I.getOperand(0)->getType();
+ Type *DestTy = I.getType();
// Get the size of the types in bits, we'll need this later
unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
@@ -890,8 +905,8 @@ void Verifier::visitSExtInst(SExtInst &I) {
void Verifier::visitFPTruncInst(FPTruncInst &I) {
// Get the source and destination types
- const Type *SrcTy = I.getOperand(0)->getType();
- const Type *DestTy = I.getType();
+ Type *SrcTy = I.getOperand(0)->getType();
+ Type *DestTy = I.getType();
// Get the size of the types in bits, we'll need this later
unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
unsigned DestBitSize = DestTy->getScalarSizeInBits();
@@ -907,8 +922,8 @@ void Verifier::visitFPTruncInst(FPTruncInst &I) {
void Verifier::visitFPExtInst(FPExtInst &I) {
// Get the source and destination types
- const Type *SrcTy = I.getOperand(0)->getType();
- const Type *DestTy = I.getType();
+ Type *SrcTy = I.getOperand(0)->getType();
+ Type *DestTy = I.getType();
// Get the size of the types in bits, we'll need this later
unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
@@ -925,8 +940,8 @@ void Verifier::visitFPExtInst(FPExtInst &I) {
void Verifier::visitUIToFPInst(UIToFPInst &I) {
// Get the source and destination types
- const Type *SrcTy = I.getOperand(0)->getType();
- const Type *DestTy = I.getType();
+ Type *SrcTy = I.getOperand(0)->getType();
+ Type *DestTy = I.getType();
bool SrcVec = SrcTy->isVectorTy();
bool DstVec = DestTy->isVectorTy();
@@ -948,8 +963,8 @@ void Verifier::visitUIToFPInst(UIToFPInst &I) {
void Verifier::visitSIToFPInst(SIToFPInst &I) {
// Get the source and destination types
- const Type *SrcTy = I.getOperand(0)->getType();
- const Type *DestTy = I.getType();
+ Type *SrcTy = I.getOperand(0)->getType();
+ Type *DestTy = I.getType();
bool SrcVec = SrcTy->isVectorTy();
bool DstVec = DestTy->isVectorTy();
@@ -971,8 +986,8 @@ void Verifier::visitSIToFPInst(SIToFPInst &I) {
void Verifier::visitFPToUIInst(FPToUIInst &I) {
// Get the source and destination types
- const Type *SrcTy = I.getOperand(0)->getType();
- const Type *DestTy = I.getType();
+ Type *SrcTy = I.getOperand(0)->getType();
+ Type *DestTy = I.getType();
bool SrcVec = SrcTy->isVectorTy();
bool DstVec = DestTy->isVectorTy();
@@ -994,8 +1009,8 @@ void Verifier::visitFPToUIInst(FPToUIInst &I) {
void Verifier::visitFPToSIInst(FPToSIInst &I) {
// Get the source and destination types
- const Type *SrcTy = I.getOperand(0)->getType();
- const Type *DestTy = I.getType();
+ Type *SrcTy = I.getOperand(0)->getType();
+ Type *DestTy = I.getType();
bool SrcVec = SrcTy->isVectorTy();
bool DstVec = DestTy->isVectorTy();
@@ -1017,8 +1032,8 @@ void Verifier::visitFPToSIInst(FPToSIInst &I) {
void Verifier::visitPtrToIntInst(PtrToIntInst &I) {
// Get the source and destination types
- const Type *SrcTy = I.getOperand(0)->getType();
- const Type *DestTy = I.getType();
+ Type *SrcTy = I.getOperand(0)->getType();
+ Type *DestTy = I.getType();
Assert1(SrcTy->isPointerTy(), "PtrToInt source must be pointer", &I);
Assert1(DestTy->isIntegerTy(), "PtrToInt result must be integral", &I);
@@ -1028,8 +1043,8 @@ void Verifier::visitPtrToIntInst(PtrToIntInst &I) {
void Verifier::visitIntToPtrInst(IntToPtrInst &I) {
// Get the source and destination types
- const Type *SrcTy = I.getOperand(0)->getType();
- const Type *DestTy = I.getType();
+ Type *SrcTy = I.getOperand(0)->getType();
+ Type *DestTy = I.getType();
Assert1(SrcTy->isIntegerTy(), "IntToPtr source must be an integral", &I);
Assert1(DestTy->isPointerTy(), "IntToPtr result must be a pointer",&I);
@@ -1039,8 +1054,8 @@ void Verifier::visitIntToPtrInst(IntToPtrInst &I) {
void Verifier::visitBitCastInst(BitCastInst &I) {
// Get the source and destination types
- const Type *SrcTy = I.getOperand(0)->getType();
- const Type *DestTy = I.getType();
+ Type *SrcTy = I.getOperand(0)->getType();
+ Type *DestTy = I.getType();
// Get the size of the types in bits, we'll need this later
unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
@@ -1090,11 +1105,11 @@ void Verifier::VerifyCallSite(CallSite CS) {
Assert1(CS.getCalledValue()->getType()->isPointerTy(),
"Called function must be a pointer!", I);
- const PointerType *FPTy = cast<PointerType>(CS.getCalledValue()->getType());
+ PointerType *FPTy = cast<PointerType>(CS.getCalledValue()->getType());
Assert1(FPTy->getElementType()->isFunctionTy(),
"Called function is not pointer to function type!", I);
- const FunctionType *FTy = cast<FunctionType>(FPTy->getElementType());
+ FunctionType *FTy = cast<FunctionType>(FPTy->getElementType());
// Verify that the correct number of arguments are being passed
if (FTy->isVarArg())
@@ -1152,6 +1167,12 @@ void Verifier::visitCallInst(CallInst &CI) {
void Verifier::visitInvokeInst(InvokeInst &II) {
VerifyCallSite(&II);
+
+ // Verify that there is a landingpad instruction as the first non-PHI
+ // instruction of the 'unwind' destination.
+ Assert1(II.getUnwindDest()->isLandingPad(),
+ "The unwind destination does not have a landingpad instruction!",&II);
+
visitTerminatorInst(II);
}
@@ -1219,8 +1240,8 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) {
void Verifier::visitICmpInst(ICmpInst &IC) {
// Check that the operands are the same type
- const Type *Op0Ty = IC.getOperand(0)->getType();
- const Type *Op1Ty = IC.getOperand(1)->getType();
+ Type *Op0Ty = IC.getOperand(0)->getType();
+ Type *Op1Ty = IC.getOperand(1)->getType();
Assert1(Op0Ty == Op1Ty,
"Both operands to ICmp instruction are not of the same type!", &IC);
// Check that the operands are the right type
@@ -1236,8 +1257,8 @@ void Verifier::visitICmpInst(ICmpInst &IC) {
void Verifier::visitFCmpInst(FCmpInst &FC) {
// Check that the operands are the same type
- const Type *Op0Ty = FC.getOperand(0)->getType();
- const Type *Op1Ty = FC.getOperand(1)->getType();
+ Type *Op0Ty = FC.getOperand(0)->getType();
+ Type *Op1Ty = FC.getOperand(1)->getType();
Assert1(Op0Ty == Op1Ty,
"Both operands to FCmp instruction are not of the same type!", &FC);
// Check that the operands are the right type
@@ -1274,10 +1295,13 @@ void Verifier::visitShuffleVectorInst(ShuffleVectorInst &SV) {
}
void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
+ Assert1(cast<PointerType>(GEP.getOperand(0)->getType())
+ ->getElementType()->isSized(),
+ "GEP into unsized type!", &GEP);
+
SmallVector<Value*, 16> Idxs(GEP.idx_begin(), GEP.idx_end());
- const Type *ElTy =
- GetElementPtrInst::getIndexedType(GEP.getOperand(0)->getType(),
- Idxs.begin(), Idxs.end());
+ Type *ElTy =
+ GetElementPtrInst::getIndexedType(GEP.getOperand(0)->getType(), Idxs);
Assert1(ElTy, "Invalid indices for GEP pointer type!", &GEP);
Assert2(GEP.getType()->isPointerTy() &&
cast<PointerType>(GEP.getType())->getElementType() == ElTy,
@@ -1286,26 +1310,44 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
}
void Verifier::visitLoadInst(LoadInst &LI) {
- const PointerType *PTy = dyn_cast<PointerType>(LI.getOperand(0)->getType());
+ PointerType *PTy = dyn_cast<PointerType>(LI.getOperand(0)->getType());
Assert1(PTy, "Load operand must be a pointer.", &LI);
- const Type *ElTy = PTy->getElementType();
+ Type *ElTy = PTy->getElementType();
Assert2(ElTy == LI.getType(),
"Load result type does not match pointer operand type!", &LI, ElTy);
+ if (LI.isAtomic()) {
+ Assert1(LI.getOrdering() != Release && LI.getOrdering() != AcquireRelease,
+ "Load cannot have Release ordering", &LI);
+ Assert1(LI.getAlignment() != 0,
+ "Atomic load must specify explicit alignment", &LI);
+ } else {
+ Assert1(LI.getSynchScope() == CrossThread,
+ "Non-atomic load cannot have SynchronizationScope specified", &LI);
+ }
visitInstruction(LI);
}
void Verifier::visitStoreInst(StoreInst &SI) {
- const PointerType *PTy = dyn_cast<PointerType>(SI.getOperand(1)->getType());
+ PointerType *PTy = dyn_cast<PointerType>(SI.getOperand(1)->getType());
Assert1(PTy, "Store operand must be a pointer.", &SI);
- const Type *ElTy = PTy->getElementType();
+ Type *ElTy = PTy->getElementType();
Assert2(ElTy == SI.getOperand(0)->getType(),
"Stored value type does not match pointer operand type!",
&SI, ElTy);
+ if (SI.isAtomic()) {
+ Assert1(SI.getOrdering() != Acquire && SI.getOrdering() != AcquireRelease,
+ "Store cannot have Acquire ordering", &SI);
+ Assert1(SI.getAlignment() != 0,
+ "Atomic store must specify explicit alignment", &SI);
+ } else {
+ Assert1(SI.getSynchScope() == CrossThread,
+ "Non-atomic store cannot have SynchronizationScope specified", &SI);
+ }
visitInstruction(SI);
}
void Verifier::visitAllocaInst(AllocaInst &AI) {
- const PointerType *PTy = AI.getType();
+ PointerType *PTy = AI.getType();
Assert1(PTy->getAddressSpace() == 0,
"Allocation instruction pointer not in the generic address space!",
&AI);
@@ -1316,6 +1358,49 @@ void Verifier::visitAllocaInst(AllocaInst &AI) {
visitInstruction(AI);
}
+void Verifier::visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI) {
+ Assert1(CXI.getOrdering() != NotAtomic,
+ "cmpxchg instructions must be atomic.", &CXI);
+ Assert1(CXI.getOrdering() != Unordered,
+ "cmpxchg instructions cannot be unordered.", &CXI);
+ PointerType *PTy = dyn_cast<PointerType>(CXI.getOperand(0)->getType());
+ Assert1(PTy, "First cmpxchg operand must be a pointer.", &CXI);
+ Type *ElTy = PTy->getElementType();
+ Assert2(ElTy == CXI.getOperand(1)->getType(),
+ "Expected value type does not match pointer operand type!",
+ &CXI, ElTy);
+ Assert2(ElTy == CXI.getOperand(2)->getType(),
+ "Stored value type does not match pointer operand type!",
+ &CXI, ElTy);
+ visitInstruction(CXI);
+}
+
+void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
+ Assert1(RMWI.getOrdering() != NotAtomic,
+ "atomicrmw instructions must be atomic.", &RMWI);
+ Assert1(RMWI.getOrdering() != Unordered,
+ "atomicrmw instructions cannot be unordered.", &RMWI);
+ PointerType *PTy = dyn_cast<PointerType>(RMWI.getOperand(0)->getType());
+ Assert1(PTy, "First atomicrmw operand must be a pointer.", &RMWI);
+ Type *ElTy = PTy->getElementType();
+ Assert2(ElTy == RMWI.getOperand(1)->getType(),
+ "Argument value type does not match pointer operand type!",
+ &RMWI, ElTy);
+ Assert1(AtomicRMWInst::FIRST_BINOP <= RMWI.getOperation() &&
+ RMWI.getOperation() <= AtomicRMWInst::LAST_BINOP,
+ "Invalid binary operation!", &RMWI);
+ visitInstruction(RMWI);
+}
+
+void Verifier::visitFenceInst(FenceInst &FI) {
+ const AtomicOrdering Ordering = FI.getOrdering();
+ Assert1(Ordering == Acquire || Ordering == Release ||
+ Ordering == AcquireRelease || Ordering == SequentiallyConsistent,
+ "fence instructions may only have "
+ "acquire, release, acq_rel, or seq_cst ordering.", &FI);
+ visitInstruction(FI);
+}
+
void Verifier::visitExtractValueInst(ExtractValueInst &EVI) {
Assert1(ExtractValueInst::getIndexedType(EVI.getAggregateOperand()->getType(),
EVI.getIndices()) ==
@@ -1334,6 +1419,55 @@ void Verifier::visitInsertValueInst(InsertValueInst &IVI) {
visitInstruction(IVI);
}
+void Verifier::visitLandingPadInst(LandingPadInst &LPI) {
+ BasicBlock *BB = LPI.getParent();
+
+ // The landingpad instruction is ill-formed if it doesn't have any clauses and
+ // isn't a cleanup.
+ Assert1(LPI.getNumClauses() > 0 || LPI.isCleanup(),
+ "LandingPadInst needs at least one clause or to be a cleanup.", &LPI);
+
+ // The landingpad instruction defines its parent as a landing pad block. The
+ // landing pad block may be branched to only by the unwind edge of an invoke.
+ for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
+ const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator());
+ Assert1(II && II->getUnwindDest() == BB,
+ "Block containing LandingPadInst must be jumped to "
+ "only by the unwind edge of an invoke.", &LPI);
+ }
+
+ // The landingpad instruction must be the first non-PHI instruction in the
+ // block.
+ Assert1(LPI.getParent()->getLandingPadInst() == &LPI,
+ "LandingPadInst not the first non-PHI instruction in the block.",
+ &LPI);
+
+ // The personality functions for all landingpad instructions within the same
+ // function should match.
+ if (PersonalityFn)
+ Assert1(LPI.getPersonalityFn() == PersonalityFn,
+ "Personality function doesn't match others in function", &LPI);
+ PersonalityFn = LPI.getPersonalityFn();
+
+ // All operands must be constants.
+ Assert1(isa<Constant>(PersonalityFn), "Personality function is not constant!",
+ &LPI);
+ for (unsigned i = 0, e = LPI.getNumClauses(); i < e; ++i) {
+ Value *Clause = LPI.getClause(i);
+ Assert1(isa<Constant>(Clause), "Clause is not constant!", &LPI);
+ if (LPI.isCatch(i)) {
+ Assert1(isa<PointerType>(Clause->getType()),
+ "Catch operand does not have pointer type!", &LPI);
+ } else {
+ Assert1(LPI.isFilter(i), "Clause is neither catch nor filter!", &LPI);
+ Assert1(isa<ConstantArray>(Clause) || isa<ConstantAggregateZero>(Clause),
+ "Filter operand is not an array of constants!", &LPI);
+ }
+ }
+
+ visitInstruction(LPI);
+}
+
/// verifyInstruction - Verify that an instruction is well formed.
///
void Verifier::visitInstruction(Instruction &I) {
@@ -1588,20 +1722,20 @@ static std::string IntrinsicParam(unsigned ArgNo, unsigned NumRets) {
return "Intrinsic result type #" + utostr(ArgNo);
}
-bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
+bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, Type *Ty,
int VT, unsigned ArgNo, std::string &Suffix) {
- const FunctionType *FTy = F->getFunctionType();
+ FunctionType *FTy = F->getFunctionType();
unsigned NumElts = 0;
- const Type *EltTy = Ty;
- const VectorType *VTy = dyn_cast<VectorType>(Ty);
+ Type *EltTy = Ty;
+ VectorType *VTy = dyn_cast<VectorType>(Ty);
if (VTy) {
EltTy = VTy->getElementType();
NumElts = VTy->getNumElements();
}
- const Type *RetTy = FTy->getReturnType();
- const StructType *ST = dyn_cast<StructType>(RetTy);
+ Type *RetTy = FTy->getReturnType();
+ StructType *ST = dyn_cast<StructType>(RetTy);
unsigned NumRetVals;
if (RetTy->isVoidTy())
NumRetVals = 0;
@@ -1618,7 +1752,7 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
// type.
if ((Match & (ExtendedElementVectorType |
TruncatedElementVectorType)) != 0) {
- const IntegerType *IEltTy = dyn_cast<IntegerType>(EltTy);
+ IntegerType *IEltTy = dyn_cast<IntegerType>(EltTy);
if (!VTy || !IEltTy) {
CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not "
"an integral vector type.", F);
@@ -1709,7 +1843,7 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
// Outside of TableGen, we don't distinguish iPTRAny (to any address space)
// and iPTR. In the verifier, we can not distinguish which case we have so
// allow either case to be legal.
- if (const PointerType* PTyp = dyn_cast<PointerType>(Ty)) {
+ if (PointerType* PTyp = dyn_cast<PointerType>(Ty)) {
EVT PointeeVT = EVT::getEVT(PTyp->getElementType(), true);
if (PointeeVT == MVT::Other) {
CheckFailed("Intrinsic has pointer to complex type.");
@@ -1757,7 +1891,7 @@ void Verifier::VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F,
unsigned NumParams, ...) {
va_list VA;
va_start(VA, NumParams);
- const FunctionType *FTy = F->getFunctionType();
+ FunctionType *FTy = F->getFunctionType();
// For overloaded intrinsics, the Suffix of the function name must match the
// types of the arguments. This variable keeps track of the expected
@@ -1769,8 +1903,8 @@ void Verifier::VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F,
return;
}
- const Type *Ty = FTy->getReturnType();
- const StructType *ST = dyn_cast<StructType>(Ty);
+ Type *Ty = FTy->getReturnType();
+ StructType *ST = dyn_cast<StructType>(Ty);
if (NumRetVals == 0 && !Ty->isVoidTy()) {
CheckFailed("Intrinsic should return void", F);
OpenPOWER on IntegriCloud